# Reinforcement Learning

# 7. Parametric Bandits

The objective of this lab is to recommend contents (here movies) using **parametric bandits**. The rewards are binary (like or dislike).


## Imports

In [1]:
import numpy as np
import pandas as pd

You will need ``ipywidgets`` to simulate the interactions with the user.

In [None]:
#!pip install ipywidgets

In [2]:
from ipywidgets import AppLayout, Button, GridspecLayout, Image, Layout

In [None]:
#!pip install scikit-learn

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MultiLabelBinarizer

## Data

We work on a catalogue of 1037 movies available in 2015.

In [4]:
catalogue = pd.read_pickle('movie_database.pickle')

In [None]:
len(catalogue)

In [None]:
catalogue.head()

The features are the following:

|Column|Description|Type|
|:---|:---|:---|
|Actors| Actors staring | list of strings|
|Awards| Awards received| string|
|Country| Country of origin| list of strings|
|Director| Director(s) of the movie|  list of strings|
|Genre| Genres (Action, ...) | list of strings|
|Language| Language(s) spoken |list of strings|
|Rated| Public rating (G = General, R = Restricted, ...)| list of strings|
|Released| Date of the movie| date|
|Title|Title of the movie|string|
|imdbID| IMDB id| string|
|imdbRating| IMDB rating (between 0 and 10)| float|
|Metascore| Metacritic score (between 0 and 100)|float|
|Box_office| Total money generated|float|
|imdbVotes| Number of IMDB votes| float|
|Runtime| Duration of the movie (in minutes)|float|
|poster| Poster of the movie (jpg)| binary string|

In [7]:
# Display the posters

def get_poster(k, scale=1):
    return Image(
        value = catalogue.loc[k].poster,
        format = 'jpg',
        width = 130 * scale,
        height = 200 * scale,
    )

def display_posters(index=None, n_col=5, n_rows=4):
    if index is None:
        index = np.arange(len(catalogue))
    if len(index):
        n_rows = min(n_rows, int(np.ceil(len(index) / n_col)))
        grid = GridspecLayout(n_rows, n_col)
        k = 0
        for i in range(n_rows):
            for j in range(n_col):
                if k < len(index):
                    grid[i, j] = get_poster(index[k])
                k += 1 
        return grid

In [None]:
display_posters()

## Features

We will describe each movie by some features, for instance its genre.

In [9]:
mlb = MultiLabelBinarizer()

In [10]:
movies = pd.DataFrame(mlb.fit_transform(catalogue['Genre']), columns=mlb.classes_)

In [None]:
movies.head()

In [None]:
movies.columns

## User

Each user will be modeled by a vector of weights (positive or negative) on each feature. 

In [13]:
user = pd.DataFrame(0, index = [0], columns=movies.columns)
user['Action'] = 2
user['Crime'] = 1
user['Sci-Fi'] = -2

## To do

* Display the favorite movies of this user. 
* Test another user, and quantify their similarity (e.g., proportion of common top-100 movies).

Favorite movies of this user

In [None]:
score_user = (user.values @ movies.values.T)[0]
display_posters(score_user.argsort()[::-1])

Set another user

In [31]:
user2 = pd.DataFrame(0, index = [0], columns=movies.columns)
user2['Action'] = 2
user2['Adventure'] = 2
user2['Crime'] = -1
user2['Sci-Fi'] = -1

In [None]:
score_user2 = (user2.values @ movies.values.T)[0]
display_posters(score_user2.argsort()[::-1])

Calculate similarity

In [None]:
user1_top100 = score_user.argsort()[::-1][:100]
user2_top100 = score_user2.argsort()[::-1][:100]
similarity = len(set(user1_top100) & set(user2_top100)) / 100
similarity

## Offline learning

We start with offline learning. There are 2 steps: 
1. Collect the user's opinion on a few movies (e.g., 10)
2. Rank the other movies by logistic regression.

Let's test that.

In [34]:
# Add a column to record the user's opinion (like / dislike)
movies = movies.assign(like=None)

In [35]:
# Select a random movie (not yet seen by the user)
    
def select_random_movie():
    index = np.flatnonzero(movies.like.isna())
    if len(index):
        return np.random.choice(index)
    else:
        return np.random.choice(len(movies))

In [None]:
# Create buttons

def create_expanded_button(description, button_style):
    return Button(
        description=description,
        button_style=button_style,
        layout=Layout())

def update_likes(button):
    global movie_id
    movies.loc[movie_id, 'like'] = button.description == 'like'
    
def update_poster():
    global movie_id
    img.value = catalogue.loc[movie_id].poster
    
def on_button_clicked(button):
    global movie_id
    update_likes(button)
    movie_id = select_random_movie()
    update_poster()

# Setting the buttons
left_button = create_expanded_button('like', 'success')
right_button = create_expanded_button('dislike', 'danger')
left_button.on_click(on_button_clicked)
right_button.on_click(on_button_clicked)

# Setting the movie poster
movie_id = select_random_movie()
img = get_poster(movie_id, scale=1.5)

# Display
AppLayout(
    left_sidebar=left_button,
    right_sidebar=right_button, 
    center=img,
    pane_widths=[0.3, 0.4, 0.3]
)

## To do

* Give your opinion on some movies (e.g., 10), making sure that you get a few likes and a few dislikes.
* Apply logistic regression and display the other movies in order of preference (top movies first).
* Give your top-3 and bottom-3 genres, as predicted by the model.

In [84]:
movies = movies.assign(like=None)
# likes
for index in [101, 356, 655]:
    movies.at[index, 'like'] = True

# dislikes
for index in [901, 456, 255]:
    movies.at[index, 'like'] = False

In [None]:
# likes
likes = np.flatnonzero(movies.like==True)
display_posters(likes)

In [None]:
# dislikes
dislikes = np.flatnonzero(movies.like==False)
display_posters(dislikes)

Other movies in order of preference

In [None]:
model = LogisticRegression(fit_intercept=False)
rated_movies = movies.dropna()
X_train = rated_movies.drop(columns='like').values
y_train = rated_movies.like.astype(int).values
model.fit(X_train, y_train)

unrated_movies = movies[movies.like.isna()].copy()
X_test = unrated_movies.drop(columns='like').values
predicted_probabilities = model.predict_proba(X_test)[:,1]
unrated_movies.loc[:, 'predicted_probabilities'] = predicted_probabilities

unrated_movies = unrated_movies.sort_values('predicted_probabilities', ascending=False)
print('Top 20 preferred movies:')
display_posters(unrated_movies.index[:20])

In [None]:
unrated_movies = unrated_movies.assign(Title=None)
unrated_movies.loc[:, 'Title'] = catalogue.loc[unrated_movies.index].Title.values
unrated_movies[['Title', 'predicted_probabilities']].head(20)

Top-3 genres

In [None]:
genres_weights = model.coef_[0]
sorted_genres = pd.Series(genres_weights, index=rated_movies.drop(columns='like').columns).sort_values(ascending=False)
top_genres = sorted_genres.head(3)
top_genres_df = top_genres.reset_index()
top_genres_df.columns = ['Genre', 'Weight']
top_genres_df

Bottom-3 genres

In [None]:
bottom_genres = sorted_genres.tail(3)
bottom_genres_df = bottom_genres.reset_index()
bottom_genres_df.columns = ['Genre', 'Weight']
bottom_genres_df

## Online learning

We now learn the user preferences online, as they come. For that, we use a Bayesian algorithm inspired by Thompson sampling. 

On each feedback provided by the user:
1. (Learning) The parameter (vector of weights) is learned.
2. (Sampling) A new parameter is sampled, assuming a Gaussian distribution.
3. (Action) The top movie for this new parameter, among movies not yet seen by the user, is proposed. 

Note that:
* In step 1, we retrain the estimator **from scratch**, using logistic regression on all training data samples (**no** online estimation).
* In step 2, we discard correlations (**diagonal** covariance matrix).

## To do

* Complete the function ``select_bayes`` below.
* Test it on some movies (e.g., 10), until you get a few likes and a few dislikes.
* Display the other movies in order of preference (top movies first).

In [150]:
def select_bayes(movies, model):
    
    if set(movies.like) == {True, False, None}:
        
        # to be completed (learning, sampling, action)
        
        # Learning
        rated_movies = movies.dropna()
        X_train = rated_movies.drop(columns='like').values
        y_train = rated_movies.like.astype(int).values
        model.fit(X_train, y_train)
        
        # Sampling
        mu = model.coef_[0]
        predicted_probabilities = model.predict_proba(X_train)[:,1]
        hessian = np.zeros((len(mu), len(mu)))
        for i in range(len(X_train)):
            x = X_train[i]
            hessian += np.outer(x, x) * predicted_probabilities[i] * (1 - predicted_probabilities[i])
        hessian += np.eye(len(mu)) * 1.0
        gamma = np.linalg.inv(hessian)
        theta_sampled = np.random.multivariate_normal(mu, gamma)
        
        # Action
        unrated_movies = movies[movies.like.isna()].copy()
        X_test = unrated_movies.drop(columns='like').values
        score = X_test @ theta_sampled
        top_index = score.argsort()[::-1][0]
        return top_index
    
    else:    
        return select_random_movie()    

In [96]:
# reset
movies = movies.assign(like=None)

In [97]:
def on_button_clicked(button):
    global movie_id
    update_likes(button)
    movie_id = select_bayes()
    update_poster()    

In [None]:
# Setting the buttons
left_button = create_expanded_button('like', 'success')
right_button = create_expanded_button('dislike', 'danger')
left_button.on_click(on_button_clicked)
right_button.on_click(on_button_clicked)

# Setting the movie poster
movie_id = select_random_movie()
img = get_poster(movie_id, scale=1.5)

# Display
AppLayout(
    left_sidebar=left_button,
    right_sidebar=right_button, 
    center=img,
    pane_widths=[0.3, 0.4, 0.3]
)

## Analysis

Finally, we would like to assess the quality of our bandit algorithm.

## To do

* Choose a user, that is a parameter $\theta$ (vector of weights).
* Provide the answers of this user to the movies proposed by the algorithm, assuming binary rewards, with mean
$$
q(a) = \frac 1 {1 + e^{-\theta^T a}}
$$
where $a$ is the action (= movie proposed by the algorithm).
* Make sure that a reasonable fraction of movies are liked (e.g., between 10\% and 90\%). Otherwise, update $\theta$.
* Simulate an interaction of this user with the recommender system over 100 movies.
* Compute the [Spearman's correlation coefficient](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient) of the ranking of the unseen movies provided by the algorithm, compared to the ground-truth ranking.
* Plot the evolution of this coefficient with respect to the number of movies seen by the user, from 1 to 100.
* Give the top-3 and bottom-3 genres, as predicted by the model, and compare to the ground-truth.
* Do the same experiments with other features (e.g., actors, actors + genres, actors + director + genres).

In [151]:
from scipy.stats import spearmanr
import matplotlib.pyplot as plt

Generate $\theta$ parameter randomly for a user

In [None]:
movies = movies.assign(like=None)

model = LogisticRegression(fit_intercept=False)
model.coef_ = np.zeros((1, movies.shape[1] - 1))
model.classes_ = np.array([0, 1])
model.intercept_ = np.zeros(1)

while True:
    theta_user = np.random.randn(movies.shape[1] - 1)
    q_a = 1 / (1 + np.exp(- theta_user @ movies.drop(columns='like').values.T))
    if np.mean(q_a) >= 0.1 and np.mean(q_a) <= 0.9:
        break

print(f"Parameter: {theta_user}")
print(f"Fraction of liked movies: {np.mean(q_a)}")

Simulate the interation of this user

In [153]:
def user_feedback (movie, theta_user):
    q_a = 1 / (1 + np.exp(- theta_user @ movie))
    return np.random.rand() < q_a

rated_movies_list = []
spearman_correlations = []
n_rated = 100
x_list = []

for i in range(n_rated):
    movie_id = select_bayes(movies, model)
    movie = movies.drop(columns='like').loc[movie_id].values
    feedback = user_feedback(movie, theta_user)
    movies.loc[movie_id, 'like'] = feedback
    rated_movies_list.append(movie_id)
    unrated_movies = movies[movies.like.isna()]
    
    algo_ranking = model.predict_proba(unrated_movies.drop(columns='like').values)[:,1]
    gt_ranking = (theta_user @ unrated_movies.drop(columns='like').values.T)
    
    if np.all(algo_ranking == algo_ranking[0]):
        continue
    
    spearman_corr, _ = spearmanr(algo_ranking, gt_ranking)
    x_list.append(i)
    spearman_correlations.append(spearman_corr)

Plot the Spearman's correlation

In [None]:
plt.plot(x_list, spearman_correlations)
plt.xlabel('Number of rated movies')
plt.ylabel('Spearman\'s correlation')
plt.title('Evolution of the Spearman\'s correlation (only genres)')
plt.show()

Top-3 genres

In [None]:
genres_weights_algo = model.coef_[0].argsort()[::-1]
top_3_algo = genres_weights_algo[:3]
bottom_3_algo = genres_weights_algo[:-4:-1]

genres_weights_gt = theta_user.argsort()[::-1]
top_3_gt = genres_weights_gt[:3]
bottom_3_gt = genres_weights_gt[:-4:-1]

print(f"Top-3 genres predicted by the model: {movies.columns[top_3_algo].values}")
print(f"Top-3 genres in the ground truth: {movies.columns[top_3_gt].values}\n")
print(f"Bottom-3 genres predicted by the model: {movies.columns[bottom_3_algo].values}")
print(f"Bottom-3 genres in the ground truth: {movies.columns[bottom_3_gt].values}")

### Actors

In [156]:
movies_actors = pd.DataFrame(mlb.fit_transform(catalogue['Actors']), columns=mlb.classes_)

In [None]:
movies_actors = movies_actors.assign(like=None)

model = LogisticRegression(fit_intercept=False)
model.coef_ = np.zeros((1, movies_actors.shape[1] - 1))
model.classes_ = np.array([0, 1])
model.intercept_ = np.zeros(1)

while True:
    theta_user = np.random.randn(movies_actors.shape[1] - 1)
    q_a = 1 / (1 + np.exp(- theta_user @ movies_actors.drop(columns='like').values.T))
    if np.mean(q_a) >= 0.1 and np.mean(q_a) <= 0.9:
        break

print(f"Parameter: {theta_user}")
print(f"Fraction of liked movies: {np.mean(q_a)}")

In [None]:
rated_movies_list = []
spearman_correlations = []
n_rated = 100
x_list = []

for i in range(n_rated):
    movie_id = select_bayes(movies_actors, model)
    movie = movies_actors.drop(columns='like').loc[movie_id].values
    feedback = user_feedback(movie, theta_user)
    movies_actors.loc[movie_id, 'like'] = feedback
    rated_movies_list.append(movie_id)
    unrated_movies = movies_actors[movies_actors.like.isna()]
    
    algo_ranking = model.predict_proba(unrated_movies.drop(columns='like').values)[:,1]
    gt_ranking = (theta_user @ unrated_movies.drop(columns='like').values.T)
    
    if np.all(algo_ranking == algo_ranking[0]):
        continue
    
    spearman_corr, _ = spearmanr(algo_ranking, gt_ranking)
    x_list.append(i)
    spearman_correlations.append(spearman_corr)

In [None]:
plt.plot(x_list, spearman_correlations)
plt.xlabel('Number of rated movies')
plt.ylabel('Spearman\'s correlation')
plt.title('Evolution of the Spearman\'s correlation (only actors)')
plt.show()

In [None]:
genres_weights_algo = model.coef_[0].argsort()[::-1]
top_3_algo = genres_weights_algo[:3]
bottom_3_algo = genres_weights_algo[:-4:-1]

genres_weights_gt = theta_user.argsort()[::-1]
top_3_gt = genres_weights_gt[:3]
bottom_3_gt = genres_weights_gt[:-4:-1]

print(f"Top-3 genres predicted by the model: {movies_actors.columns[top_3_algo].values}")
print(f"Top-3 genres in the ground truth: {movies_actors.columns[top_3_gt].values}\n")
print(f"Bottom-3 genres predicted by the model: {movies_actors.columns[bottom_3_algo].values}")
print(f"Bottom-3 genres in the ground truth: {movies_actors.columns[bottom_3_gt].values}")

### Actors + genres

In [None]:
movies_actors = pd.DataFrame(mlb.fit_transform(catalogue['Actors']), columns=mlb.classes_)
movies_genres = pd.DataFrame(mlb.fit_transform(catalogue['Genre']), columns=mlb.classes_)
movies_actors_genres = pd.concat([movies_actors, movies_genres], axis=1)

In [None]:
movies_actors_genres = movies_actors_genres.assign(like=None)

model = LogisticRegression(fit_intercept=False)
model.coef_ = np.zeros((1, movies_actors_genres.shape[1] - 1))
model.classes_ = np.array([0, 1])
model.intercept_ = np.zeros(1)

while True:
    theta_user = np.random.randn(movies_actors_genres.shape[1] - 1)
    q_a = 1 / (1 + np.exp(- theta_user @ movies_actors_genres.drop(columns='like').values.T))
    if np.mean(q_a) >= 0.1 and np.mean(q_a) <= 0.9:
        break

print(f"Parameter: {theta_user}")
print(f"Fraction of liked movies: {np.mean(q_a)}")

In [164]:
rated_movies_list = []
spearman_correlations = []
n_rated = 100
x_list = []

for i in range(n_rated):
    movie_id = select_bayes(movies_actors_genres, model)
    movie = movies_actors_genres.drop(columns='like').loc[movie_id].values
    feedback = user_feedback(movie, theta_user)
    movies_actors_genres.loc[movie_id, 'like'] = feedback
    rated_movies_list.append(movie_id)
    unrated_movies = movies_actors_genres[movies_actors_genres.like.isna()]
    
    algo_ranking = model.predict_proba(unrated_movies.drop(columns='like').values)[:,1]
    gt_ranking = (theta_user @ unrated_movies.drop(columns='like').values.T)
    
    if np.all(algo_ranking == algo_ranking[0]):
        continue
    
    spearman_corr, _ = spearmanr(algo_ranking, gt_ranking)
    x_list.append(i)
    spearman_correlations.append(spearman_corr)

In [None]:
plt.plot(x_list, spearman_correlations)
plt.xlabel('Number of rated movies')
plt.ylabel('Spearman\'s correlation')
plt.title('Evolution of the Spearman\'s correlation (actors + genres)')
plt.show()

In [None]:
genres_weights_algo = model.coef_[0].argsort()[::-1]
top_3_algo = genres_weights_algo[:3]
bottom_3_algo = genres_weights_algo[:-4:-1]

genres_weights_gt = theta_user.argsort()[::-1]
top_3_gt = genres_weights_gt[:3]
bottom_3_gt = genres_weights_gt[:-4:-1]

print(f"Top-3 genres predicted by the model: {movies_actors_genres.columns[top_3_algo].values}")
print(f"Top-3 genres in the ground truth: {movies_actors_genres.columns[top_3_gt].values}\n")
print(f"Bottom-3 genres predicted by the model: {movies_actors_genres.columns[bottom_3_algo].values}")
print(f"Bottom-3 genres in the ground truth: {movies_actors_genres.columns[bottom_3_gt].values}")

### Actors + directors + genres

In [None]:
movies_actors = pd.DataFrame(mlb.fit_transform(catalogue['Actors']), columns=mlb.classes_)
movies_directors = pd.DataFrame(mlb.fit_transform(catalogue['Director']), columns=mlb.classes_)
movies_genres = pd.DataFrame(mlb.fit_transform(catalogue['Genre']), columns=mlb.classes_)
movies_actors_directors_genres = pd.concat([movies_actors, movies_directors, movies_genres], axis=1)

In [None]:
movies_actors_directors_genres = movies_actors_directors_genres.assign(like=None)

model = LogisticRegression(fit_intercept=False)
model.coef_ = np.zeros((1, movies_actors_directors_genres.shape[1] - 1))
model.classes_ = np.array([0, 1])
model.intercept_ = np.zeros(1)

while True:
    theta_user = np.random.randn(movies_actors_directors_genres.shape[1] - 1)
    q_a = 1 / (1 + np.exp(- theta_user @ movies_actors_directors_genres.drop(columns='like').values.T))
    if np.mean(q_a) >= 0.1 and np.mean(q_a) <= 0.9:
        break

print(f"Parameter: {theta_user}")
print(f"Fraction of liked movies: {np.mean(q_a)}")

In [170]:
rated_movies_list = []
spearman_correlations = []
n_rated = 100
x_list = []

for i in range(n_rated):
    movie_id = select_bayes(movies_actors_directors_genres, model)
    movie = movies_actors_directors_genres.drop(columns='like').loc[movie_id].values
    feedback = user_feedback(movie, theta_user)
    movies_actors_directors_genres.loc[movie_id, 'like'] = feedback
    rated_movies_list.append(movie_id)
    unrated_movies = movies_actors_directors_genres[movies_actors_directors_genres.like.isna()]
    
    algo_ranking = model.predict_proba(unrated_movies.drop(columns='like').values)[:,1]
    gt_ranking = (theta_user @ unrated_movies.drop(columns='like').values.T)
    
    if np.all(algo_ranking == algo_ranking[0]):
        continue
    
    spearman_corr, _ = spearmanr(algo_ranking, gt_ranking)
    x_list.append(i)
    spearman_correlations.append(spearman_corr)

In [None]:
plt.plot(x_list, spearman_correlations)
plt.xlabel('Number of rated movies')
plt.ylabel('Spearman\'s correlation')
plt.title('Evolution of the Spearman\'s correlation (actors + directors + genres)')
plt.show()

In [None]:
genres_weights_algo = model.coef_[0].argsort()[::-1]
top_3_algo = genres_weights_algo[:3]
bottom_3_algo = genres_weights_algo[:-4:-1]

genres_weights_gt = theta_user.argsort()[::-1]
top_3_gt = genres_weights_gt[:3]
bottom_3_gt = genres_weights_gt[:-4:-1]

print(f"Top-3 genres predicted by the model: {movies_actors_directors_genres.columns[top_3_algo].values}")
print(f"Top-3 genres in the ground truth: {movies_actors_directors_genres.columns[top_3_gt].values}\n")
print(f"Bottom-3 genres predicted by the model: {movies_actors_directors_genres.columns[bottom_3_algo].values}")
print(f"Bottom-3 genres in the ground truth: {movies_actors_directors_genres.columns[bottom_3_gt].values}")