The notebook includes an online evaluation simulated on the MovieLens dataset using multi-armed bandit strategies. We'll use Îµ-greedy and Thompson Sampling to compare two models: cosine similarity user-based and pearson similarity user-based. Also, we'll estimate static policy results to evaluate against multi-armed bandit strategies. As a comparison metric, we'll use CTR.

The results are shown at the end of the notebook.

The results analysis and interpretation are in the final report.

In [2]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().parent
sys.path.append(str(PROJECT_ROOT))

In [3]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from scipy import stats
from collections import Counter
import warnings

warnings.filterwarnings('ignore')

In [4]:
from src.data_reading import read_ratings_file
from src.evaluation import temporal_split
from src.models.similarity_based_cf import predict_rating_cf_user_based, recommend_k

# Data preparation

In [5]:
# For similarity-based CF, we would use only the file with the movie ratings, we will not need movie metadata or users' features

ratings = read_ratings_file() 

In [6]:
# Split on train and test sets by date

train, test = temporal_split(ratings, test_ratio=0.1)

Train set size: (900188, 4)
Test set size: (100021, 4)
Train timeframe: 2000-04-25 23:05:32 - 2000-12-29 23:42:47
Test timeframe: 2000-12-29 23:43:34 - 2003-02-28 17:49:50


In [7]:
# Create user_id x movie_id matrix

train_prep = train.pivot_table(
    index='user_id',
    columns='movie_id',
    values='rating'
)
train_prep_ = train_prep.fillna(0)

In [8]:
# Calculate user similarity with cosine distance

user_sim_cos = pd.DataFrame(
    cosine_similarity(train_prep_),
    index=train_prep.index,
    columns=train_prep.index
)

# Let's calculate user similarity with pearson similarity

user_sim_pearson = pd.DataFrame(
    cosine_similarity(
        train_prep.sub(train_prep.mean(axis=1), axis=0)\
        .fillna(0)\
        .values
    ),
    index=train_prep.index,
    columns=train_prep.index
)   

In [9]:
# From the test set, let's remove users and movies missing in the train set, as similarity based collaborative filtering algorithms don't support cold-start

test_users = np.intersect1d(test.user_id.unique(), train.user_id.unique())
test_movies = np.intersect1d(test.movie_id.unique(), train.movie_id.unique())

test = test[(test.user_id.isin(test_users)) & (test.movie_id.isin(test_movies))]
print(f'New test set shape is: {test.shape}')

New test set shape is: (95723, 4)


# Simulation

In [9]:
class MovieLensBandit:
    def __init__(self, models, strategy='thompson', epsilon=0.1):
        self.models = models  # 'cos_user_cf', 'pearson_user_cf'
        self.n_arms = len(models)
        self.strategy = strategy # 'thompson', 'epsilon'
        
        # Statistics for calculating CTR
        self.clicks = np.zeros(self.n_arms)
        self.impressions = np.zeros(self.n_arms)
        
        # Thompson Sampling parameters
        self.alphas = np.ones(self.n_arms) 
        self.betas = np.ones(self.n_arms)

        # E-greedy Epsilon parameter 
        self.epsilon = epsilon

    def select_model(self):
        if self.strategy == 'epsilon':
            if np.random.rand() < self.epsilon:
                return np.random.randint(self.n_arms)
            # Pick the model with highest historical CTR
            ctr = self.clicks / (self.impressions + 1e-6)
            return np.argmax(ctr)
        
        elif self.strategy == 'thompson':
            samples = [np.random.beta(self.alphas[i], self.betas[i]) for i in range(self.n_arms)]
            return np.argmax(samples)

    def update(self, arm_idx, num_relevant, k):
        """
        num_relevant: How many of the K movies were actually clicked.
        k: Total movies recommended in this turn.
        """
        self.impressions[arm_idx] += k
        self.clicks[arm_idx] += num_relevant
        
        self.alphas[arm_idx] += num_relevant
        self.betas[arm_idx] += (k - num_relevant)

In [11]:
def run_experiment(bandit, users, truth, k=10):
    ctr_results = []
    total_relevant = 0
    total_shown = 0
    chosen_models = []
    
    for i, user_id in enumerate(users):
        # Bandit chooses which model will recommend for the user
        arm_idx = bandit.select_model()
        chosen_model = bandit.models[arm_idx]
        
        if chosen_model == 'cos_user_cf':
            recs = recommend_k(
                user_id=user_id,
                test=test,
                predict_fn=predict_rating_cf_user_based,
                train_prep=train_prep,
                sim_df=user_sim_cos,
                n=10, 
                k=10
            )
        elif chosen_model == 'pearson_user_cf':
            recs = recommend_k(
            user_id=user_id,
            test=test,
            predict_fn=predict_rating_cf_user_based,
            train_prep=train_prep,
            sim_df=user_sim_pearson,
            n=10, 
            k=10
        )
        else:
            print('Wrong model name')
        
        # Calculate the number of relevant movies out of the recommended by the model picked
        liked_movies = truth.get(user_id, set())
        relevant_count = len([m for m in recs if m in liked_movies])
        
        # Update the bandit
        bandit.update(arm_idx, relevant_count, k)
        
        # Track CTR and models chosen
        total_relevant += relevant_count
        total_shown += k
        ctr_results.append(total_relevant / total_shown)
        chosen_models.append(chosen_model)
        
    return ctr_results, chosen_models

In [11]:
# Limit the number of users to speed up the experiment

test_users = np.random.choice(test.user_id.unique(), size=300, replace=False)
test_ = test[test.user_id.isin(test_users)]

In [12]:
ground_truth = test_.groupby('user_id')['movie_id'].apply(set).to_dict()

In [14]:
# Initiate strategies

epsilon_greedy = MovieLensBandit(strategy='epsilon', epsilon=0.1, models = ['cos_user_cf', 'pearson_user_cf'])
thompson_samp = MovieLensBandit(strategy='thompson', models = ['cos_user_cf', 'pearson_user_cf'])

In [15]:
# Execute the simulation
eps_results, eps_chosen_models = run_experiment(epsilon_greedy, test_users, ground_truth)
ts_results, ts_chosen_models = run_experiment(thompson_samp, test_users, ground_truth)

In [13]:
# Let's run static policy evaluation on cosine-similarity user-based model

static_results = []

for user_id in test_.user_id.unique():
    recs = recommend_k(
        user_id=user_id,
        test=test,
        predict_fn=predict_rating_cf_user_based,
        train_prep=train_prep,
        sim_df=user_sim_cos,
        n=10, 
        k=10
    )

    liked_movies = ground_truth.get(user_id, set())
    relevant_count = len([m for m in recs if m in liked_movies])

    ctr =  relevant_count / 10

    static_results.append(ctr)

# Simulation Results

In [30]:
# E-greedy results:

print(f'Mean CTR in e-greedy bandits simulation: {np.mean(eps_results):.4f}, models distribution: {Counter(eps_chosen_models)}')

Mean CTR in e-greedy bandits simulation: 0.0927, models distribution: Counter({'cos_user_cf': 287, 'pearson_user_cf': 13})


In [31]:
# Thompson Sampling results:

print(f'Mean CTR in Thompson Sampling bandits simulation: {np.mean(ts_results):.4f}, models distribution: {Counter(ts_chosen_models)}')

Mean CTR in Thompson Sampling bandits simulation: 0.0914, models distribution: Counter({'cos_user_cf': 294, 'pearson_user_cf': 6})


In [14]:
# Static policy results:

print(f'Mean CTR in static policy: {np.mean(static_results):.4f}')

Mean CTR in static policy: 0.0917
