# Reinforcement Learning

# 7. Parametric Bandits

The objective of this lab is to recommend contents (here movies) using **parametric bandits**. The rewards are binary (like or dislike).


## Imports

In [323]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import spearmanr

You will need ``ipywidgets`` to simulate the interactions with the user.

In [324]:
#!pip install ipywidgets

In [325]:
from ipywidgets import AppLayout, Button, GridspecLayout, Image, Layout

In [326]:
#!pip install scikit-learn

In [327]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MultiLabelBinarizer

## Data

We work on a catalogue of 1037 movies available in 2015.

In [328]:
catalogue = pd.read_pickle('movie_database.pickle')

In [None]:
len(catalogue)

In [None]:
catalogue.head()

The features are the following:

|Column|Description|Type|
|:---|:---|:---|
|Actors| Actors staring | list of strings|
|Awards| Awards received| string|
|Country| Country of origin| list of strings|
|Director| Director(s) of the movie|  list of strings|
|Genre| Genres (Action, ...) | list of strings|
|Language| Language(s) spoken |list of strings|
|Rated| Public rating (G = General, R = Restricted, ...)| list of strings|
|Released| Date of the movie| date|
|Title|Title of the movie|string|
|imdbID| IMDB id| string|
|imdbRating| IMDB rating (between 0 and 10)| float|
|Metascore| Metacritic score (between 0 and 100)|float|
|Box_office| Total money generated|float|
|imdbVotes| Number of IMDB votes| float|
|Runtime| Duration of the movie (in minutes)|float|
|poster| Poster of the movie (jpg)| binary string|

In [331]:
# Display the posters

def get_poster(k, scale=1):
    return Image(
        value = catalogue.loc[k].poster,
        format = 'jpg',
        width = 130 * scale,
        height = 200 * scale,
    )

def display_posters(index=None, n_col=5, n_rows=4):
    if index is None:
        index = np.arange(len(catalogue))
    if len(index):
        n_rows = min(n_rows, int(np.ceil(len(index) / n_col)))
        grid = GridspecLayout(n_rows, n_col)
        k = 0
        for i in range(n_rows):
            for j in range(n_col):
                if k < len(index):
                    grid[i, j] = get_poster(index[k])
                k += 1 
        return grid

In [None]:
display_posters()

## Features

We will describe each movie by some features, for instance its genre.

In [333]:
mlb = MultiLabelBinarizer()

In [334]:
movies = pd.DataFrame(mlb.fit_transform(catalogue['Genre']), columns=mlb.classes_)

In [None]:
movies.head()

In [None]:
movies.columns

## User

Each user will be modeled by a vector of weights (positive or negative) on each feature. 

In [337]:
user = pd.DataFrame(0, index = [0], columns=movies.columns)
user['Action'] = 2
user['Crime'] = 1
user['Sci-Fi'] = -2

## To do

* Display the favorite movies of this user. 
* Test another user, and quantify their similarity (e.g., proportion of common top-100 movies).

In [338]:
def get_top_5_movies(user, movies, catalogue):
    movies_array = movies.iloc[:,:-1]
    user_vector = user.iloc[0,:-1].values

    dot_products = np.dot(movies_array, user_vector)
    top_5_indices = dot_products.argsort()[-5:][::-1]
    return top_5_indices

In [None]:
display_posters(get_top_5_movies(user, movies, catalogue))

In [None]:
user2 = pd.DataFrame(0, index = [0], columns=movies.columns)
user2['Action'] = 1
user2['Crime'] = 1
user2['Sci-Fi'] = 1
user2['Adventure'] = 1

display_posters(get_top_5_movies(user2, movies, catalogue))

## Offline learning

We start with offline learning. There are 2 steps: 
1. Collect the user's opinion on a few movies (e.g., 10)
2. Rank the other movies by logistic regression.

Let's test that.

In [341]:
# Add a column to record the user's opinion (like / dislike)
movies = movies.assign(like=None)

In [342]:
# Select a random movie (not yet seen by the user)
    
def select_random_movie():
    index = np.flatnonzero(movies.like.isna())
    if len(index):
        return np.random.choice(index)
    else:
        return np.random.choice(len(movies))

In [343]:
# Create buttons

def create_expanded_button(description, button_style):
    return Button(
        description=description,
        button_style=button_style,
        layout=Layout())

def update_likes(button):
    global movie_id
    movies.loc[movie_id, 'like'] = button.description == 'like'
    
def update_poster():
    global movie_id
    img.value = catalogue.loc[movie_id].poster
    
def on_button_clicked(button):
    global movie_id
    update_likes(button)
    movie_id = select_random_movie()
    update_poster()    

In [None]:
left_button = create_expanded_button('like', 'success')
right_button = create_expanded_button('dislike', 'danger')
left_button.on_click(on_button_clicked)
right_button.on_click(on_button_clicked)

# Setting the movie poster
movie_id = select_random_movie()
img = get_poster(movie_id, scale=1.5)

# Display
AppLayout(
    left_sidebar=left_button,
    right_sidebar=right_button, 
    center=img,
    pane_widths=[0.3, 0.4, 0.3]
)

# Display the first movie
display(AppLayout(
    left_sidebar=left_button,
    right_sidebar=right_button, 
    center=img,
    pane_widths=[0.3, 0.4, 0.3]
))

## To do

* Give your opinion on some movies (e.g., 10), making sure that you get a few likes and a few dislikes.
* Apply logistic regression and display the other movies in order of preference (top movies first).
* Give your top-3 and bottom-3 genres, as predicted by the model.

In [345]:
# likes
likes = np.flatnonzero(movies.like==True)
display_posters(likes)

In [346]:
# dislikes
dislikes = np.flatnonzero(movies.like==False)
display_posters(dislikes)

In [347]:
liked_indexes = [0, 19, 183, 204, 250, 313, 332, 388, 443, 574, 575, 
610, 685, 695, 730, 748, 781, 784, 811, 813, 846, 
867, 908, 941, 1013, 1017, 16, 17, 44, 2, 39, 42, 
37, 7, 22, 32, 20, 45, 46, 31, 3, 18, 10, 12, 
50, 28, 6, 41, 21]
for index in liked_indexes:
    movies.at[index, 'like'] = True


disliked_indexes = [9, 25, 205, 249, 255, 456, 518, 567, 711, 771, 
792, 801, 901, 904, 991, 1036, 11, 14, 27, 13, 
34, 8, 48, 49, 5, 1, 15, 24, 43, 23, 30, 38, 
33, 47, 26, 36, 4, 40, 35, 29]
for index in disliked_indexes:
    movies.at[index, 'like'] = False

In [None]:
movies_labeled = movies.dropna(subset=['like'])

X_train = movies_labeled.drop(columns=['like'])
y_train = movies_labeled['like'].astype(int)

model = LogisticRegression()
model.fit(X_train, y_train)

X_test = movies.drop(columns=['like'])  
probabilities = model.predict_proba(X_test)[:, 1]

top_5_indices = probabilities.argsort()[-5:][::-1]

display_posters(top_5_indices)

In [None]:
genre_like_ratios = (
    movies_labeled.drop(columns=['like']).multiply(movies_labeled['like'], axis=0).sum()
)

genre_like_ratios = genre_like_ratios.sort_values(ascending=True)

top_3_genres = genre_like_ratios.tail(3) 
top_3_genres = top_3_genres[::-1]
bottom_3_genres = genre_like_ratios.head(3) 

print("Top 3 Favorite Genres:")
print(top_3_genres)

print("\n3 Least Favorite Genres:")
print(bottom_3_genres)

plt.figure(figsize=(12, 6))
plt.bar(top_3_genres.index, top_3_genres.values, color='green', label='Top 3 Genres')
plt.bar(bottom_3_genres.index, bottom_3_genres.values, color='red', label='Least Favorite Genres')

plt.title("Top 3 Favorite and Least Favorite Genres", fontsize=16)
plt.ylabel("Like Ratio", fontsize=12)
plt.xlabel("Genres", fontsize=12)
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

## Online learning

We now learn the user preferences online, as they come. For that, we use a Bayesian algorithm inspired by Thompson sampling. 

On each feedback provided by the user:
1. (Learning) The parameter (vector of weights) is learned.
2. (Sampling) A new parameter is sampled, assuming a Gaussian distribution.
3. (Action) The top movie for this new parameter, among movies not yet seen by the user, is proposed. 

Note that:
* In step 1, we retrain the estimator **from scratch**, using logistic regression on all training data samples (**no** online estimation).
* In step 2, we discard correlations (**diagonal** covariance matrix).

## To do

* Complete the function ``select_bayes`` below.
* Test it on some movies (e.g., 10), until you get a few likes and a few dislikes.
* Display the other movies in order of preference (top movies first).

In [350]:
def select_bayes():
    global mean_weights
    if set(movies.like) == {True, False, None}:
        labeled_movies = movies[movies.like.notna()]
        X_train = labeled_movies.drop('like', axis=1)
        y_train = labeled_movies['like'].astype(int)

        model = LogisticRegression(fit_intercept=False)
        model.fit(X_train, y_train)

        mean_weights = model.coef_[0]
        std_dev = 1.0
        sampled_weights = np.random.normal(mean_weights, std_dev)

        unseen_movies = movies[movies.like.isna()]
        X_test = unseen_movies.drop('like', axis=1)
        scores = X_test.dot(sampled_weights)

        top_movie_index = scores.idxmax()
        return top_movie_index

    else:
        return select_random_movie()

In [351]:
# reset
movies = movies.assign(like=None)

In [352]:
liked_indexes = [0, 19, 183, 204, 250, 313, 332, 388, 443, 574, 575, 
610, 685, 695, 730, 748, 781, 784, 811, 813, 846, 
867, 908, 941, 1013, 1017, 16, 17, 44, 2, 39, 42, 
37, 7, 22, 32, 20, 45, 46, 31, 3, 18, 10, 12, 
50, 28, 6, 41, 21]
for index in liked_indexes:
    movies.at[index, 'like'] = True


disliked_indexes = [9, 25, 205, 249, 255, 456, 518, 567, 711, 771, 
792, 801, 901, 904, 991, 1036, 11, 14, 27, 13, 
34, 8, 48, 49, 5, 1, 15, 24, 43, 23, 30, 38, 
33, 47, 26, 36, 4, 40, 35, 29]
for index in disliked_indexes:
    movies.at[index, 'like'] = False

In [353]:
def on_button_clicked(button):
    global movie_id
    update_likes(button)
    movie_id = select_bayes()
    update_poster()

In [None]:
# Setting the buttons
left_button = create_expanded_button('like', 'success')
right_button = create_expanded_button('dislike', 'danger')
left_button.on_click(on_button_clicked)
right_button.on_click(on_button_clicked)

# Setting the movie poster
movie_id = select_random_movie()
img = get_poster(movie_id, scale=1.5)

# Display
AppLayout(
    left_sidebar=left_button,
    right_sidebar=right_button,
    center=img,
    pane_widths=[0.3, 0.4, 0.3]
)

In [None]:
# Rank remaining movies by predicted scores
def rank_remaining_movies():
    unseen_movies = movies[movies.like.isna()]
    X_test = unseen_movies.drop('like', axis=1)
    scores = X_test.dot(mean_weights)
    ranked_movies = catalogue.loc[movies.like.isna()].copy()
    ranked_movies['Predicted Score'] = scores
    ranked_movies = ranked_movies.sort_values(by='Predicted Score', ascending=False)
    print("Top 10 Movies by Predicted Score:")
    print(ranked_movies[['Title', 'Predicted Score']].head(10))
    top_indices = ranked_movies.index[:10]
    display_posters(top_indices)
rank_remaining_movies()

## Analysis

Finally, we would like to assess the quality of our bandit algorithm.

## To do

* Choose a user, that is a parameter $\theta$ (vector of weights).
* Provide the answers of this user to the movies proposed by the algorithm, assuming binary rewards, with mean
$$
q(a) = \frac 1 {1 + e^{-\theta^T a}}
$$
where $a$ is the action (= movie proposed by the algorithm).
* Make sure that a reasonable fraction of movies are liked (e.g., between 10\% and 90\%). Otherwise, update $\theta$.
* Simulate an interaction of this user with the recommender system over 100 movies.
* Compute the [Spearman's correlation coefficient](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient) of the ranking of the unseen movies provided by the algorithm, compared to the ground-truth ranking.
* Plot the evolution of this coefficient with respect to the number of movies seen by the user, from 1 to 100.
* Give the top-3 and bottom-3 genres, as predicted by the model, and compare to the ground-truth.
* Do the same experiments with other features (e.g., actors, actors + genres, actors + director + genres).

In [356]:
def q(theta, a):
    prob = 1 / (1 + np.exp(-np.dot(theta, a)))
    return np.random.binomial(1, prob)

In [357]:
def get_liked_fraction():
    return len(movies.dropna(subset=['like'])) / len(movies)

In [358]:
def calculate_spearman(movie_data, user_preferences, prediction_model, correlation_results):
    unseen_movies = movie_data[movie_data['like'].isna()]
    actual_scores = np.dot(unseen_movies.iloc[:, :-1].values, user_preferences)
    predicted_scores = prediction_model.predict_proba(unseen_movies.drop(columns=['like']).fillna(0))[:, 1]
    correlation_results.append(spearmanr(actual_scores, predicted_scores)[0])


In [359]:
def display_correlation_chart(correlations):
    plt.plot(range(1, total_movies + 1), correlations)
    plt.xlabel('Number of Rated Movies')
    plt.ylabel('Spearman\'s Correlation Coefficient')
    plt.title('Evolution of Spearman\'s Correlation for Genre Preferences')
    plt.show()

In [360]:
movie_genres = pd.DataFrame(mlb.fit_transform(catalogue['Genre']), columns=mlb.classes_)
genre_preferences = {
    genre: np.random.randint(-2, 3) for genre in movie_genres.columns
}
movie_genres = movie_genres.assign(like=None)

preference_weights = np.array(list(genre_preferences.values()))
like_probability = 0.35
total_movies = 100

selected_indices = np.random.choice(len(movie_genres), size=int(like_probability * len(movie_genres)), replace=False)


In [None]:
for idx in selected_indices:
    genre_features = movie_genres.iloc[idx, :-1].values
    movie_genres.loc[idx, 'like'] = q(preference_weights, genre_features)

user_likes, spearman_values = [], []

for _ in range(total_movies):
    chosen_movie_id = select_bayes()
    genre_features = movie_genres.iloc[chosen_movie_id, :-1].values
    user_like = q(preference_weights, genre_features)
    movie_genres.loc[chosen_movie_id, 'like'] = user_like
    user_likes.append(user_like)
    calculate_spearman(movie_genres, preference_weights, model, spearman_values)

display_correlation_chart(spearman_values)

In [None]:
rated_genres = movie_genres.drop(columns=['like']).loc[movie_genres['like'].notna()]

predicted_coefficients = pd.Series(model.coef_[0], index=rated_genres.columns)
top_predicted_genres = predicted_coefficients.nlargest(3)
bottom_predicted_genres = predicted_coefficients.nsmallest(3)
true_coefficients = pd.Series(preference_weights, index=rated_genres.columns)
top_true_genres = true_coefficients.nlargest(3)
bottom_true_genres = true_coefficients.nsmallest(3)

print("Top-3 Predicted Genres:", ", ".join(f"{genre}" for genre in top_predicted_genres.index))
print("Top-3 True Genres:", ", ".join(f"{genre}" for genre in top_true_genres.index))
print("Bottom-3 Predicted Genres:", ", ".join(f"{genre}" for genre in bottom_predicted_genres.index))
print("Bottom-3 True Genres:", ", ".join(f"{genre}" for genre in bottom_true_genres.index))