# Third attempt at learning

In [None]:
import pandas as pd
import numpy as np
import plotly
import plotly.graph_objs as go
plotly.offline.init_notebook_mode()
import os
import random
import math
from tqdm import tqdm
from sklearn.metrics import mean_squared_error
from sklearn.decomposition import PCA
from scipy.stats import spearmanr
import pymc3 as pm
import theano
import datetime
theano.config.compute_test_value = 'raise'
%matplotlib inline
from multiprocessing import Pool, cpu_count
from functools import partial

In [None]:
SELECTED_DATA_DIR = "../selected-data/"
MOVIES_FILE = "best_movie_ratings_features_engineered.csv"
USERS_FILE = "users_ratings.csv"

## Read raw data

In [None]:
movies_raw = pd.read_csv(SELECTED_DATA_DIR + MOVIES_FILE, index_col=0)
movies_raw.rating = movies_raw.rating/10
movies_raw.sample()

In [None]:
users = pd.read_csv(SELECTED_DATA_DIR + USERS_FILE, index_col=0)
users.rating = users.rating/10
users.sample()

In [None]:
users.shape

## Reduce data dimension (PCA)

In [None]:
WANTED_DIM = 20

In [None]:
pca_df = movies_raw[list(movies_raw.columns[2:])]
pca = PCA(n_components=WANTED_DIM)
pca_df = pd.DataFrame(pca.fit_transform(pca_df))
pca_df.index = movies_raw.index

In [None]:
movies_raw.shape

Pourcentage of variance in dataset conserveted

In [None]:
pca.explained_variance_ratio_.sum()

In [None]:
movies = pd.concat([movies_raw[list(movies_raw.columns[:2])], pd.DataFrame(pca_df)] ,axis=1)

### Collaborative data

In [None]:
collabo = movies.merge(users, left_index=True, right_index=True)

In [None]:
for n in range(WANTED_DIM):
    collabo[n] = (collabo[n] * collabo['rating_x'])* collabo['rating_x'] # fois le rating au carre

In [None]:
collabo = collabo.groupby(collabo.user).aggregate(np.average)

In [None]:
for n in range(WANTED_DIM):
    collabo[n] = (collabo[n] * collabo['rating_x']) # fois le rating moyen pour pouvoir compare les users

In [None]:
collabo = collabo[[n for n in range(WANTED_DIM)]]

In [None]:
collabo.sample(5)

## Actions selection function

In [None]:
def random_choice(user_features, movies, epoch, s):
    """ random approach to the problem, always exploring"""
    return movies.sample()

In [None]:
def greedy_choice_contentbased(user_features, movies, epoch, s):
    """ greedy approach to the problem """
    epsilon = 1 / math.sqrt(epoch+1)
    return greedy_choice_no_t(user_features, movies, epoch, s, epsilon)

def greedy_choice_no_t_contentbased(user_features, movies, epoch, s, epsilon=0.3 ):
    """ greedy approach to the problem """
    if random.random() > epsilon: # choose the best
        return best_contentbased_recommandation(user_features, movies, epoch, s) # best_contentbased_recommandation(user_features, movies, epoch, s)
    else:
        return movies.sample()
    
def greedy_choice_UCB(user_features, movies, epoch, s):
    """ greedy approach with upper confidence bounds """
    epsilon = 1 / math.sqrt(epoch+1)
    if random.random() > epsilon: # choose the best
        return best_contentbased_recommandation(user_features, movies, epoch, s, True) # best_contentbased_recommandation(user_features, movies, epoch, s)
    else:
        return movies.sample()
    

In [None]:
def greedy_choice_collaborative(user_features, movies, epoch, s):
    """ greedy approach to the problem """
    epsilon = 1 / math.sqrt(epoch+1)
    if random.random() > epsilon: # choose the best
        return best_collaborative_recommandation(user_features, movies, epoch, s)
    else:
        return movies.sample()

def greedy_choice_no_t_collaborative(user_features, movies, epoch, s, epsilon=0.3 ):
    """ greedy approach to the problem """
    if random.random() > epsilon: # choose the best
        return best_collaborative_recommandation(user_features, movies, epoch, s) # best_contentbased_recommandation(user_features, movies, epoch, s)
    else:
        return movies.sample()

In [None]:
def bayes_UCB(user_features, movies, epoch, s):
    # Hyperparameters
    c0 = 10
    d0 = 3
    e0 = 0.01
    f0 = 0.001
    g0 = 0.001
    # function
    I = np.eye(user_features.size)
    ratings = np.zeros(movies.shape[0])
    with pm.Model():
        s = pm.Gamma('s', d0, e0)
        sigma = pm.InverseGamma('sigma', f0, g0)
        theta = pm.MvNormal('theta', mu=0.5, cov=c0 * sigma * I)
        rating = pm.Normal('rating', mu=0, sd=sigma, observed=user_features)

        for i, (title, movie) in tqdm(enumerate(movies.iterrows())): 
            movies_features = get_movie_features(movies)
            # Expected value of outcome
            mu = user_features.dot(movies_features) * (1 - np.exp(-epoch/s))
            # Likelihood (sampling distribution) of observations
            rating.mu = mu
            
            step = pm.Metropolis()
            trace = pm.sample(1000, step=step, njobs=1, progressbar=False)
            ratings[i] = rating.distribution.random()[0]
    return movies[movies.index == movies.index[ratings.argmax()]]

## Learning utilities

In [None]:
def compute_utility(user_features, movie_features, epoch, s):
    """ Compute utility U based on user preferences and movie preferences """
    res = user_features.dot(movie_features) * (1 - math.exp(-epoch/s))
    return res

def compute_UCB(epoch, Nt):
    return math.sqrt((2 * math.log(epoch)) / Nt)

def get_movie_features(movie):
    """ selected features from dataframe """
    if isinstance(movie, pd.Series):
        return movie[-WANTED_DIM:]
    elif isinstance(movie, pd.DataFrame):
        return get_movie_features(movie.loc[movie.index[0]])
    else:
        raise TypeError("{} should be a Series or DataFrame".format(movie))
    
def best_contentbased_recommandation(user_features, movies, epoch, s, UCB=False):
    """ Return the movie with the highest utility """
    utilities = np.zeros(movies.shape[0])
    for i, (title, movie) in enumerate(movies.iterrows()):
        movie_features = get_movie_features(movie)
        utilities[i] = compute_utility(user_features, movie_features, epoch - movie.last_t, s)
        if UCB:
            utilities[i] += compute_UCB(epoch, movie.Nt)
    return movies[movies.index == movies.index[utilities.argmax()]]

def best_collaborative_recommandation(user_features, user_movies, epoch, s):
    """ Return the movie with the highest utility """
    corr = np.zeros(collabo.shape[0])
    corruser = np.zeros(collabo.shape[0])
    # TODO retirer lui-meme de la matrix collabo
    # on fait une pearson corr avec tous les autres users -> CLUSTERING
    for collabi, collabrow in enumerate(collabo.iterrows()):
        otheruser_index = collabrow[0]
        otheruser_features = collabrow[1]
        corr[collabi] = np.correlate(user_features, otheruser_features)
        corruser[collabi] = otheruser_index
    # on prends les films des 5 plus proche
    idxbestuser = []
    for i in range(10):
        idxmax = corr.argmax()
        idxbestuser.append(corruser[idxmax])
        corruser[idxmax] = 0
    moviesbestuser = users.copy()[users.user.isin(idxbestuser)].index
    # on fait une jointure avec les films du user
    try:
        subsetmovie = user_movies.copy().loc[moviesbestuser]
        subsetmovie = subsetmovie.dropna()
    except:
        print("WARNING : no jointure btw user")
        return best_contentbased_recommandation(user_features, user_movies, epoch, s)
    ## on retourne le mieux coté
    ## TODO : verifier qu'on ne l'a pas deja vu
    #argmaxmovie = subsetmovie['rating'].argmax()
    #if subsetmovie.loc[argmaxmovie][0] == 'rating':
    #    print('WTF')
    #    print(subsetmovie.loc[argmaxmovie].name)
    ##print(subsetmovie.loc[argmaxmovie])
    #return subsetmovie.loc[argmaxmovie]
    return best_contentbased_recommandation(user_features, subsetmovie, epoch, s)

def all_recommandation(user_features, movies):
    """ Return all movies sorted by utility """
    movies = movies.copy()
    movies['utilities'] = movies.apply(lambda mov: compute_utility(user_features, get_movie_features(mov), 1000), axis=1)
    return movies.sort_values(by="utilities")
        
def iterative_mean(old, new, t):
    """ Compute the new mean """
    return ((t-1) / t) * old + (1/t) * new
    
def update_features(user_features, movie_features, rating, t):
    """ update the user preferences """
    return iterative_mean(user_features, movie_features * rating, t+1)

## Simulate with one selector (greedy or random or bayes or ...)

And return scoring

In [None]:
def reinforcement_learning(user, moviestc, choicef, s, numberSimulation):
    if s<200:
        print("WARNING : s is really small, movies will get often repeated")
    user_features = np.zeros(moviestc.shape[1] - 2)
    movies = moviestc.copy()
    movies = movies[movies.columns.difference(["votes", "rating"])]
    movies.insert(0, 'last_t', np.ones(movies.shape[0]).astype(np.int64))
    movies.insert(0, 't', [i for i in range(movies.shape[0])])
    movies.insert(0, 'rating', user.rating)
    movies.insert(0, 'Nt', np.zeros(movies.shape[0]))
    cumregret = [0]
    accuracy_rmse = [0]
    avg_rating = [0]
    timestamp = []
    for t in range(numberSimulation):
        now = datetime.datetime.now()
        recommandation = choicef(user_features, movies, t+1, s)
        recommandation_features = get_movie_features(recommandation)
        try:
            user_rating = user.loc[recommandation.index[0]].rating
        except:
            print(type(recommandation))
            print(dir(recommandation))
            user_rating = user.loc[recommandation.name].rating
            #print(recommandation.index[0])
            #print(recommandation.index[0])
            #print(user.loc[recommandation.index[0]])
            #raise ValueError("STOP")
        user_features = update_features(user_features, recommandation_features, user_rating, t)
        utility = compute_utility(user_features, recommandation_features, t, s)
        cumregret.append(cumregret[-1] + (user_rating - utility ))
        accuracy_rmse.append((user_rating - utility )**2 )
        avg_rating.append(user_rating)
        movies.loc[movies.index.isin(recommandation.index),'last_t'] = t
        movies.loc[movies.index.isin(recommandation.index),'Nt'] += 1
        timestamp.append((datetime.datetime.now() - now).total_seconds())
    return {'cumregret': cumregret, 'accuracy_rmse':accuracy_rmse, 'avg_rating':avg_rating, 'timediff':timestamp}

## Simulate many

In [None]:
def rl_multiple_users(users, movies, algorithms, s=500, N=20, N_USER=50):
    def wrapper_rl_one_user(args):
        return reinforcement_learning(*args)
    results_all = []
    users_sample = users[users.user.isin(pd.Series(users.user.unique()).sample(N_USER))].copy()
    print(users_sample.shape)
    movies_sample = movies[movies.index.isin(users_sample.index.unique())].copy()
    for algo in algorithms:
        res_algo = []
        args = []
        for i, name in enumerate(users_sample.user.unique()):
            user = users[users.user == name]
            movies_user = movies_sample[movies_sample.index.isin(user.index)]
            res = reinforcement_learning(user, movies_user, algo, s, N)
            res_algo.append(res)
        results_all.append(res_algo)
    return results_all

# Results

## Select multiple users and algo and plot results

In [None]:
# Keep list consistent
ALGOS      = [greedy_choice_no_t_contentbased, greedy_choice_no_t_collaborative, random_choice, greedy_choice_UCB] #, greedy_choice, random_choice]
ALGOS_NAME = ['greedy_choice_no_t_collaborative','greedy_choice_no_t_contentbased', 'random_choice', 'greedy_choice_UCB'] #, 'greedy_choice', 'random_choice']
assert(len(ALGOS) == len(ALGOS_NAME))

In [None]:
METRICS = ['cumregret', 'accuracy_rmse', 'avg_rating', 'timediff']
TITLE_GRAPH=['Average cumulative regret for each algorithm', 'Average accuracy for each algorithm', 'Average accuracy for each algorithm', 'Average running time for each algorithm']
X_AXIS = ['Cumulative reget', 'Accuracy (root mean square error)', 'Rating', 'Time']
assert(len(METRICS) == len(TITLE_GRAPH) == len(X_AXIS))

In [None]:
%time res = rl_multiple_users(users, movies, ALGOS, N=500, N_USER=3, s=200)

In [None]:
for metric, tgraph, xaxix in zip(METRICS,TITLE_GRAPH,X_AXIS):
    data = []
    for algo, algon in enumerate(ALGOS_NAME):
        temp = np.average(np.array([i[metric] for i in res[algo]]), axis=0)[1:]
        data.append(go.Scatter(
            x = list([i for i in range(len(temp))]),
            y = temp,
            name=algon
        ))
    layout = dict(title = tgraph,
              xaxis = dict(title = tgraph),
              yaxis = dict(title = xaxix),
    )
    fig = dict(data=data, layout=layout)
    plotly.offline.iplot(fig)