# Second attempt at learning

In [None]:
import pandas as pd
import numpy as np
import plotly.offline as plt
import plotly.graph_objs as go
plt.init_notebook_mode()
import os
import random
import math
from tqdm import tqdm
from sklearn.metrics import mean_squared_error
from scipy.stats import spearmanr
import pymc3 as pm
import theano
theano.config.compute_test_value = 'raise'
%matplotlib inline
from multiprocessing import Pool, cpu_count

SELECTED_DATA_DIR = "../selected-data/"
MOVIES_FILE = "best_movie_ratings_features_engineered.csv"
USERS_FILE = "users_ratings.csv"

## Read data

In [None]:
movies = pd.read_csv(SELECTED_DATA_DIR + MOVIES_FILE, index_col=0)
movies.rating = movies.rating/10
movies.sample()

In [None]:
users = pd.read_csv(SELECTED_DATA_DIR + USERS_FILE, index_col=0)
users.rating = users.rating/10
users.sample()

## Learning

In [None]:
def compute_utility(user_features, movie_features, epoch, s):
    """ Compute utility U based on user preferences and movie preferences """
    res = user_features.dot(movie_features) * (1 - math.exp(-epoch/s))
    return res

def get_movie_features(movie):
    """ selected features from dataframe """
    if isinstance(movie, pd.Series):
        return movie[-50:]
    elif isinstance(movie, pd.DataFrame):
        return get_movie_features(movie.loc[movie.index[0]])
    else:
        raise TypeError("{} should be a Series or DataFrame".format(movie))
    
def best_recommandation(user_features, movies, epoch, s):
    """ Return the movie with the highest utility """
    utilities = np.zeros(movies.shape[0])
    for i, (title, movie) in enumerate(movies.iterrows()):
        movie_features = get_movie_features(movie)
        utilities[i] = compute_utility(user_features, movie_features, epoch - movie.last_t, s)
    return movies[movies.index == movies.index[utilities.argmax()]]

def all_recommandation(user_features, movies):
    """ Return all movies sorted by utility """
    movies = movies.copy()
    movies['utilities'] = movies.apply(lambda mov: compute_utility(user_features, get_movie_features(mov), 1000), axis=1)
    return movies.sort_values(by="utilities")

def random_choice(user_features, movies, epoch, s):
    """ random approach to the problem, always exploring"""
    return movies.sample()

def greedy_choice(user_features, movies, epoch, s):
    """ greedy approach to the problem """
    epsilon = 1 / math.sqrt(epoch+1)
    if random.random() > epsilon: # choose the best
        return best_recommandation(user_features, movies, epoch, s)
    else:
        return movies.sample()

def greedy_choice_no_t(user_features, movies, epsilon=0.5):
    """ greedy approach to the problem """
    if random.random() > epsilon: # choose the best
        return best_recommandation(user_features, movies)
    else:
        return movies.sample()
        
def iterative_mean(old, new, t):
    """ Compute the new mean """
    return ((t-1) / t) * old + (1/t) * new
    
def update_features(user_features, movie_features, rating, t):
    """ update the user preferen """
    return iterative_mean(user_features, movie_features * rating, t+1)

def reinforcement_learning(user, moviestc, choicef = greedy_choice, s=200, N=20):
    user_features = np.zeros(moviestc.shape[1] - 2)
    movies = moviestc.copy()
    movies = movies[movies.columns.difference(["votes", "rating"])]
    movies.insert(0, 'last_t', np.ones(movies.shape[0]).astype(np.int64))
    movies.insert(0, 't', [i for i in range(movies.shape[0])])
    movies.insert(0, 'rating', user.rating)
    regret = 0
    for t in range(N):
        recommandation = choicef(user_features, movies, t+1, s)
        recommandation_features = get_movie_features(recommandation)
        user_rating = user.get_value(recommandation.index[0], "rating")
        user_features = update_features(user_features, recommandation_features, user_rating, t)
        utility = compute_utility(user_features, recommandation_features, t, s)
        regret += user_rating - utility
        movies.loc[movies.index.isin(recommandation.index),'last_t'] = t
    return regret

## One user

### Selection one user randomly

In [None]:
user = users[users.user.isin(users.user.sample())]
user.shape

In [None]:
# get only movies that this user rated
movies_user = movies[movies.index.isin(user.index)]
movies_user.shape

### Score : regret

In [None]:
regret_greedy = reinforcement_learning(user, movies_user, greedy_choice)
regret_random = reinforcement_learning(user, movies_user, random_choice)
print(regret_greedy, regret_random)

## Multiple users

In [None]:
def wrapper_rl_one_user(args):
    return reinforcement_learning(*args)

def rl_multiple_users(users, movies, algorithms=[greedy_choice, random_choice], s=200, N=20, N_USER=50):
    regrets = []
    users_sample = users[users.user.isin(pd.Series(users.user.unique()).sample(N_USER))].copy()
    print(users_sample.shape)
    movies_sample = movies[movies.index.isin(users_sample.index.unique())].copy()
    for algo in algorithms:
        regret_algo = []
        args = []
        for i, name in enumerate(users_sample.user.unique()):
            user = users[users.user == name]
            movies_user = movies_sample[movies_sample.index.isin(user.index)]
            regret_algo.append(reinforcement_learning(user, movies_user, algo, s, N))
        regrets.append(regret_algo)
    regrets = [sum(regret)/len(regret) for regret in regrets]
    return regrets

In [None]:
for s in [0.05, 0.5, 1, 2, 3, 4, 5, 10, 100, 1000]:
    regrets = rl_multiple_users(users, movies)
    print(s , " : ", *regrets)

Un s entre 1 et 2 semble optimal

## Try Bayesian inference

### With scoring to evaluate performance

In [None]:
c0 = 10
d0 = 3
e0 = 0.01
f0 = 0.001
g0 = 0.001

def bayes_inference(Dl, x):
    N = x.size - 1
    I = np.eye(N)
    features = x[:N]
    epoch = x[N]
    with pm.Model():
        sigma = pm.InverseGamma('sigma', f0, g0)
        # distribution of user features content
        theta = pm.MvNormal('theta', mu=0.5, cov=c0 * sigma * I, shape=(N,))
        # distribution of user recovery rate
        s = pm.Gamma('s', d0, e0)
        
        mu = theta.dot(features) * (1 - np.exp(-epoch/s))

        rating = pm.Normal('rating', mu=mu, sd=sigma, observed=Dl)

        step = pm.Metropolis()
        trace = pm.sample(1000, step=step, progressbar=False)

    rating_distribution = [compute_utility(trace.theta[i], features, epoch, trace.s[i]) for i in range(len(trace.theta))]
    #plt.plot([go.Histogram(x=rating_distribution)])
    return random.choice(rating_distribution)

def UCB_choice(user, movies, epoch, s):
    Dl = movies[movies.index.isin(movies.index[:epoch])]
    ratings = np.zeros(movies.shape[0])
    for i, (title, movie) in enumerate(movies.iterrows()):
        ratings[i] = bayes_inference(Dl, movie)
    return movies[movies.index == movies.index[ratings.argmax()]]

In [None]:
#%time UCB_choice(user, movies_user, 5, 1)

In [None]:
%time rl_multiple_users(users, movies, [random_choice, greedy_choice, UCB_choice], N=10, N_USER=4)