# Approach 3: MAB

In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


from tqdm import tqdm

In [16]:
# Fixate the global random seed
SEED = 0
np.random.seed(SEED)

## Training

###  Data Preprocessing

In [17]:
TRAIN_PATH = 'data/train.csv'

df_train = pd.read_csv(TRAIN_PATH)

In [18]:
print("> Quick visual check of dataframe:") 
df_train.head()

> Quick visual check of dataframe:


Unnamed: 0,user_id,item_id,rating,user_age,user_gender,user_occupation,user_zip,release_date,genre
0,1,1,5,24,M,technician,Arizona,1995.0,Animation
1,1,2,3,24,M,technician,Arizona,1995.0,Action
2,1,3,4,24,M,technician,Arizona,1995.0,Thriller
3,1,4,3,24,M,technician,Arizona,1995.0,Action
4,1,5,3,24,M,technician,Arizona,1995.0,Crime


In [19]:
mask_young = df_train['user_age'] < 30
mask_male = df_train['user_gender'] == "M"

df_user_my = df_train[mask_male & mask_young]
df_user_mo = df_train[mask_male & ~mask_young]
df_user_fy = df_train[~mask_male & mask_young]
df_user_fo = df_train[~mask_male & ~mask_young]

df_user_groups = {
    'my' : df_user_my,
    'mo' : df_user_mo,
    'fy' : df_user_fy,
    'fo' : df_user_fo
}

In [20]:
df_user_groups['my']

Unnamed: 0,user_id,item_id,rating,user_age,user_gender,user_occupation,user_zip,release_date,genre
0,1,1,5,24,M,technician,Arizona,1995.0,Animation
1,1,2,3,24,M,technician,Arizona,1995.0,Action
2,1,3,4,24,M,technician,Arizona,1995.0,Thriller
3,1,4,3,24,M,technician,Arizona,1995.0,Action
4,1,5,3,24,M,technician,Arizona,1995.0,Crime
...,...,...,...,...,...,...,...,...,...
90565,943,1047,2,22,M,student,Texas,1996.0,Comedy
90566,943,1074,4,22,M,student,Texas,1994.0,Comedy
90567,943,1188,3,22,M,student,Texas,1990.0,Action
90568,943,1228,3,22,M,student,Texas,1995.0,Action


In [21]:
# TODO generalise to all four groups. For development purposes we start by restricting ourselves to only the `my` dataframe.

In [22]:
df = df_user_my

#### Define MAB Structure

In [23]:
class EpsilonTrainer(object):
    """ Assumptions 
        -   the reward database is freshly reindexed (i.e. discrete 0,1,.. without jumps)
            => used when indexing into the user/item column
            
    """
    
    
    
    def __init__(self, epsilon, 
                 reward_database, item_col_name, user_col_name,
                 n_recommendations,
                 n_iterations = 1):
        
        ''' Save arguments to object-instance '''
        self.epsilon = epsilon  # greediness
        self.reward_database = reward_database  # used to access the ratings
        self.item_col_name = item_col_name  # header name of item column in that db
        self.user_col_name = user_col_name  # header name of user column in that db
        self.n_recommendations = n_recommendations  # number of recommendations used for training the MAB machine
        self.n_iterations = n_iterations  # number of iterations performed subsequently to reduce variance; investigate statistical differences
        
        ''' Setup subviews of interest '''
        self.items = self.reward_database[self.item_col_name].unique()
        self.n_items = len(self.items)
        
        self.users = self.reward_database[self.user_col_name].unique()
        self.n_users = len(self.users)
        
    
    def reset(self):
        ''' (Re-)Initialise the learnt data for next iteration '''
        self.item_counter_arr = np.zeros(self.n_items)  # count how often each item appeared
        self.item_reward_arr = np.zeros(self.n_items)  # accumulated reward per item (in [0,1] range)
    
    
    def random_user(self):
        """ returns a user_id for a random user in the the user column of the review_database """
        user_idx = np.random.randint(self.n_users)
        user_id = self.users[user_idx]
        return user_id
    
    def select_item(self):
        """ selects an item from the database with epsilon-greedy strategy """
        
        outcome = np.random.uniform(low=0.0, high=1.0)
        
        if outcome < self.epsilon:
            """ Explore """
            item_idx = np.random.randint(self.n_items)
        else:
            """ Exploit """
            item_idx = np.argmax(self.item_reward_arr)  # TODO check that indeed idx and not an id. would assume idx because array index need not correspond to e.g. the movie id.
        
        item_id = self.items[item_idx]
        return item_id    
        
    def replay(self):
        log = []  # store the results from each iteration
        
        for iteration in tqdm(range(0, self.n_iterations)):
            ''' initialise '''
            self.reset()
            
            total_rewards = 0
            like_fraction = np.zeros(self.n_recommendations)
            
            for rec_idx in range(0, self.n_recommendations):
                found_train_tuple = False
                
                while not found_train_tuple:
                    user_id = self.random_user()
                    item_id = self.select_item()

                    print(f"> User_id: %i, Item_id: %i" % (user_id, item_id))

            
        
    
    def record_result(self):
        pass
    