Dataset used:
F. Maxwell Harper and Joseph A. Konstan. 2015. The MovieLens Datasets: History and Context. ACM Transactions on Interactive Intelligent Systems (TiiS) 5, 4: 19:1–19:19. https://doi.org/10.1145/2827872


Implementation based on paper:
Chen, Y. (2025). Contextual bandits to increase user prediction accuracy in movie recommendation system. ITM Web of Conferences, 73, 01018. https://doi.org/10.1051/itmconf/20257301018

In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

## STEP 1: PREPROCESSING DATA & EXTRACTING USER-MOVIE-FEATURES

In [2]:
import pandas as pd

# df = pd.read_csv("/content/drive/MyDrive/AI Planning/u.data", sep="\t", header=None)

data_dir = 'dataset/u.data'

df = pd.read_csv(data_dir, sep="\t", header=None)

df.columns=['user_id', 'item_id', 'rating', 'timestamp']
df.drop('timestamp', axis=1, inplace=True)
df.sort_values(['user_id'], ignore_index=True, inplace=True)
df.head()

Unnamed: 0,user_id,item_id,rating
0,1,46,4
1,1,257,4
2,1,12,5
3,1,74,1
4,1,134,4


In [3]:
# user = pd.read_csv("/content/drive/MyDrive/AI Planning/u.user", sep="|", header=None)
user_data_dir = 'dataset/u.user'

user = pd.read_csv(user_data_dir, sep="|", header=None)

user.columns=['user_id', 'age', 'gender', 'occupation','zip code']
user.drop('zip code', axis=1, inplace=True)
user.head()

Unnamed: 0,user_id,age,gender,occupation
0,1,24,M,technician
1,2,53,F,other
2,3,23,M,writer
3,4,24,M,technician
4,5,33,F,other


In [4]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse_output=False)
encoded_data = encoder.fit_transform(user[['gender', 'occupation']])
encoded_data = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out(['gender', 'occupation']))
encoded_data.head()

user = pd.concat([user, encoded_data], ignore_index=False, sort=False, axis=1)
user.drop(['gender', 'occupation'], axis=1, inplace=True)
user.head()

Unnamed: 0,user_id,age,gender_F,gender_M,occupation_administrator,occupation_artist,occupation_doctor,occupation_educator,occupation_engineer,occupation_entertainment,...,occupation_marketing,occupation_none,occupation_other,occupation_programmer,occupation_retired,occupation_salesman,occupation_scientist,occupation_student,occupation_technician,occupation_writer
0,1,24,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,2,53,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,23,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,4,24,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,5,33,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
from sklearn.preprocessing import MinMaxScaler
minmax = MinMaxScaler()
user['age'] = minmax.fit_transform(user[['age']])

user.head()

Unnamed: 0,user_id,age,gender_F,gender_M,occupation_administrator,occupation_artist,occupation_doctor,occupation_educator,occupation_engineer,occupation_entertainment,...,occupation_marketing,occupation_none,occupation_other,occupation_programmer,occupation_retired,occupation_salesman,occupation_scientist,occupation_student,occupation_technician,occupation_writer
0,1,0.257576,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,2,0.69697,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,0.242424,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,4,0.257576,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,5,0.393939,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
# movie = pd.read_csv("/content/drive/MyDrive/AI Planning/u.item", sep='|', header=None, encoding='latin-1')
movie = pd.read_csv('dataset/u.item', sep='|', header=None, encoding='latin-1')

movie.columns=['item_id', 'movie_title', 'release date', 'video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure', 'Animation',
               'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance',
               'Sci-Fi', 'Thriller', 'War', 'Western']

movie.drop(['movie_title', 'release date', 'video release date', 'IMDb URL'], axis=1, inplace=True)

print(len(movie.columns))
print(movie.columns)

20
Index(['item_id', 'unknown', 'Action', 'Adventure', 'Animation', 'Children',
       'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir',
       'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War',
       'Western'],
      dtype='object')


## STEP 3: CLUSTER MOVIES

Why? To reduce the number of arms therefore limiting the exploration space. Also helps with generalization.

In [7]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=20, random_state=5)
movie_clusters = kmeans.fit_predict(movie.drop(['item_id'], axis=1))

movie['cluster'] = movie_clusters

## STEP 2: BUILDING & TRAINING THE LINUCB MODEL

In [8]:
# Implementation assisted by ChatGPT

import numpy as np

class LinUCB:
  def __init__(self, n_arms, context_dim, alpha):
    self.n_arms = n_arms
    self.context_dim = context_dim
    self.alpha = alpha
    self.A = [np.identity(context_dim) for arm in range(n_arms)]
    self.b = [np.zeros(context_dim) for arm in range(n_arms)]

  def score(self, arm, x):
        A_inv = np.linalg.inv(self.A[arm])
        theta = A_inv @ self.b[arm]
        score = np.transpose(theta) @ x + self.alpha * np.sqrt(np.transpose(x) @ A_inv @ x)
        return score

  def select_arm(self, x):
    p_vals = []
    for i in range(self.n_arms):
        p = self.score(i, x)
        p_vals.append(p)
    return np.argmax(p_vals)

  def update(self, arm_idx, x, r):
    self.A[arm_idx] += np.outer(x, x)
    self.b[arm_idx] += r * x

In [9]:
# Implementation assisted by ChatGPT

class ContextualEpsilonGreedy:
    def __init__(self, n_arms, context_dim, epsilon):
        self.n_arms = n_arms
        self.context_dim = context_dim
        self.epsilon = epsilon
        self.A = [np.identity(context_dim) for _ in range(n_arms)]
        self.b = [np.zeros(context_dim) for _ in range(n_arms)]

    def score(self, arm, x):
        A_inv = np.linalg.inv(self.A[arm])
        theta = A_inv @ self.b[arm]
        return np.transpose(theta) @ x

    def select_arm(self, x):
        if np.random.rand() < self.epsilon:
            # Explore randomly
            random_arm = np.random.randint(self.n_arms)
            scores = self.score(random_arm, x)
            return scores
        else:
            # Exploit best arm
            scores = [self.score(i, x) for i in range(self.n_arms)]
            return np.argmax(scores)

    def update(self, arm_idx, x, r):
        self.A[arm_idx] += np.outer(x, x)
        self.b[arm_idx] += r * x


In [10]:
n_training = int(df.shape[0]*0.9)

training_df = df.iloc[:n_training]
testing_df = df.iloc[n_training:]

from sklearn.utils import shuffle
training_df = shuffle(training_df)
training_df.reset_index(drop=True, inplace=True)

testing_df = shuffle(testing_df)
testing_df.reset_index(drop=True, inplace=True)

Training LinUCB and e-Greedy model

In [11]:
num_arms = len(movie['cluster'].unique())

context_dim = user.shape[1]+movie.shape[1]-3

linucb = LinUCB(num_arms, context_dim, 0.5)
cb_e_greedy = ContextualEpsilonGreedy(num_arms, context_dim, 0.1)

for idx, row in training_df.iterrows():
  user_id = row['user_id']
  item_id = row['item_id']

  user_features = user.loc[user['user_id'] == user_id]
  user_features = user_features.drop('user_id', axis=1)

  movie_row = movie.loc[movie['item_id'] == item_id]
  movie_features = movie_row.drop(['item_id', 'cluster'], axis=1)

  context = np.concatenate((user_features.values.flatten(), movie_features.values.flatten()))

  reward = row['rating']/5

  cluster = movie_row["cluster"].values[0]

  linucb.update(cluster, context, reward)
  cb_e_greedy.update(cluster, context, reward)

## Testing

Because we want to simulate a cold start problem, we assume that the testing data for new users only contain the user's age, occupation, and genre.

In [12]:
def dcg(relevances):
    return sum([(2**rel - 1) / np.log2(idx + 2) for idx, rel in enumerate(relevances)])

def ndcg_at_k(true_relevance, predicted_scores, k=10):
    idx_sorted = np.argsort(predicted_scores)[::-1][:k]
    relevances = [true_relevance[i] for i in idx_sorted]
    ideal_relevances = sorted(true_relevance, reverse=True)[:k]
    return dcg(relevances) / dcg(ideal_relevances) if dcg(ideal_relevances) > 0 else 0

In [13]:
# ndcg_scores = []

# for idx, row in testing_df.iterrows():
#     user_id = row['user_id']
#     true_rating = row['rating']/5
#     true_movie = row['item_id']

#     user_features = user.loc[user['user_id'] == user_id]
#     user_features = user_features.drop('user_id', axis=1)

#     movie_row = movie.loc[movie['item_id'] == true_movie]
#     movie_features = movie_row.drop(['item_id', 'cluster'], axis=1)

#     context = np.concatenate((user_features.values.flatten(), movie_features.values.flatten()))

#     true_cluster = movie_row["cluster"].values[0]

#     predicted_scores = []
#     true_relevance = []

#     for arm_idx in range(linucb.n_arms):
#         score = linucb.score(arm_idx, context)
#         predicted_scores.append(score)
#         true_relevance.append(1 if arm_idx == true_cluster else 0)

#     ndcg = ndcg_at_k(true_relevance, predicted_scores, k=10)
#     ndcg_scores.append(ndcg)

# print(f"Average NDCG@10: {np.mean(ndcg_scores):.4f}")

In [14]:
# ndcg_scores = []

# for idx, row in testing_df.iterrows():
#     user_id = row['user_id']
#     true_rating = row['rating']/5
#     true_movie = row['item_id']

#     user_features = user.loc[user['user_id'] == user_id]
#     user_features = user_features.drop('user_id', axis=1)

#     movie_row = movie.loc[movie['item_id'] == true_movie]
#     movie_features = movie_row.drop(['item_id', 'cluster'], axis=1)

#     context = np.concatenate((user_features.values.flatten(), movie_features.values.flatten()))

#     true_cluster = movie_row["cluster"].values[0]

#     predicted_scores = []
#     true_relevance = []

#     for arm_idx in range(linucb.n_arms):
#         score = cb_e_greedy.score(arm_idx, context)
#         predicted_scores.append(score)
#         true_relevance.append(1 if arm_idx == true_cluster else 0)

#     ndcg = ndcg_at_k(true_relevance, predicted_scores, k=10)
#     ndcg_scores.append(ndcg)

# print(f"Average NDCG@10: {np.mean(ndcg_scores):.4f}")

In [None]:
# Calculate NDCG for any model
def calculate_ndcg(row, model):
    user_id = row.user_id
    true_movie = row.item_id
    true_rating = row.rating

    user_features = user.loc[user['user_id'] == user_id]
    user_features = user_features.drop('user_id', axis=1)

    movie_row = movie.loc[movie['item_id'] == true_movie]
    movie_features = movie_row.drop(['item_id', 'cluster'], axis=1)

    context = np.concatenate((user_features.values.flatten(), movie_features.values.flatten()))

    true_cluster = movie_row["cluster"].values[0]

    predicted_scores = []
    true_relevance = []

    for arm_idx in range(model.n_arms):
        score = model.score(arm_idx, context)
        predicted_scores.append(score)
        true_relevance.append(1 if arm_idx == true_cluster else 0)

    ndcg = ndcg_at_k(true_relevance, predicted_scores, k=10)
    return ndcg

Testing and printin NDCG for LinUCB and e-Greedy model

In [16]:
from tqdm import tqdm
from joblib import Parallel, delayed
import numpy as np

# Parallel execution with progress bar
results_linucb = Parallel(n_jobs=-1)(delayed(calculate_ndcg)(row, linucb) for row in tqdm(testing_df.itertuples(index=False), total=len(testing_df), desc="LinUCB NDCG"))
results_cb_egreedy = Parallel(n_jobs=-1)(delayed(calculate_ndcg)(row, cb_e_greedy) for row in tqdm(testing_df.itertuples(index=False), total=len(testing_df), desc="e-Greedy NDCG"))

print(f"LinUCB Average NDCG@10: {np.mean(results_linucb):.4f}")
print(f"e-Greedy Average NDCG@10: {np.mean(results_cb_egreedy):.4f}")

LinUCB NDCG: 100%|██████████| 10000/10000 [00:04<00:00, 2187.45it/s]
e-Greedy NDCG: 100%|██████████| 10000/10000 [00:02<00:00, 3404.29it/s]


LinUCB Average NDCG@10: 0.0182
e-Greedy Average NDCG@10: 0.7296


Training new LinUCB model with alpha 0.1

In [39]:

num_arms = len(movie['cluster'].unique())

context_dim = user.shape[1]+movie.shape[1]-3

linucb_a1 = LinUCB(num_arms, context_dim, 0.1)
linucb_a2 = LinUCB(num_arms, context_dim, 0.001)
linucb_a3 = LinUCB(num_arms, context_dim, 0.0001)

# Training
for idx, row in training_df.iterrows():
  user_id = row['user_id']
  item_id = row['item_id']

  user_features = user.loc[user['user_id'] == user_id]
  user_features = user_features.drop('user_id', axis=1)

  movie_row = movie.loc[movie['item_id'] == item_id]
  movie_features = movie_row.drop(['item_id', 'cluster'], axis=1)

  context = np.concatenate((user_features.values.flatten(), movie_features.values.flatten()))

  reward = row['rating']/5

  cluster = movie_row["cluster"].values[0]

  linucb_a1.update(cluster, context, reward)
  linucb_a2.update(cluster, context, reward)
  linucb_a3.update(cluster, context, reward)

In [40]:
# Testing the LinUCB (alpha=0.1)
results_linucb_a1 = Parallel(n_jobs=-1)(delayed(calculate_ndcg)(row, linucb_a1) for row in tqdm(testing_df.itertuples(index=False), total=len(testing_df), desc="LinUCB (alpha=0.1) NDCG"))
print(f"LinUCB (alpha=0.1) Average NDCG@10: {np.mean(results_linucb_a1):.4f}")

LinUCB (alpha=0.1) NDCG: 100%|██████████| 10000/10000 [00:02<00:00, 3830.49it/s]


LinUCB (alpha=0.1) Average NDCG@10: 0.4755


In [41]:

results_linucb_a2 = Parallel(n_jobs=-1)(delayed(calculate_ndcg)(row, linucb_a2) for row in tqdm(testing_df.itertuples(index=False), total=len(testing_df), desc="LinUCB (alpha=0.001) NDCG"))
print(f"LinUCB (alpha=0.001) Average NDCG@10: {np.mean(results_linucb_a2):.4f}")

LinUCB (alpha=0.001) NDCG: 100%|██████████| 10000/10000 [00:02<00:00, 3675.43it/s]


LinUCB (alpha=0.001) Average NDCG@10: 0.7265


In [42]:
results_linucb_a3 = Parallel(n_jobs=-1)(delayed(calculate_ndcg)(row, linucb_a3) for row in tqdm(testing_df.itertuples(index=False), total=len(testing_df), desc="LinUCB (alpha=0.0001) NDCG"))
print(f"LinUCB (alpha=0.001) Average NDCG@10: {np.mean(results_linucb_a3):.4f}")

LinUCB (alpha=0.0001) NDCG: 100%|██████████| 10000/10000 [00:02<00:00, 3806.38it/s]


LinUCB (alpha=0.001) Average NDCG@10: 0.7290


In [None]:
# def train_linucb(row, model):
#     user_id = row.user_id
#     item_id = row.item_id
#     rating = row.rating
#     # user_id = row['user_id']
#     # item_id = row['item_id']

#     user_features = user.loc[user['user_id'] == user_id]
#     user_features = user_features.drop('user_id', axis=1)

#     movie_row = movie.loc[movie['item_id'] == item_id]
#     movie_features = movie_row.drop(['item_id', 'cluster'], axis=1)

#     context = np.concatenate((user_features.values.flatten(), movie_features.values.flatten()))

#     reward = rating/5

#     cluster = movie_row["cluster"].values[0]

#     model.update(cluster, context, reward)

In [None]:
# # Training new LinUCB (alpha=0.1)
# linucb_a1 = LinUCB(num_arms, context_dim, 0.1)
# results = Parallel(n_jobs=-1)(delayed(train_linucb)(row, linucb_a1) for row in tqdm(training_df.itertuples(index=False), total=len(training_df), desc="Parallel Training"))

Parallel Training: 100%|██████████| 90000/90000 [00:07<00:00, 12112.33it/s]


In [None]:
# # Testing the LinUCB (alpha=0.1)
# results_linucb_a1 = Parallel(n_jobs=-1)(delayed(calculate_ndcg)(row, linucb_a1) for row in tqdm(testing_df.itertuples(index=False), total=len(testing_df), desc="LinUCB (alpha=0.1) NDCG"))
# print(f"LinUCB (alpha=0.1) Average NDCG@10: {np.mean(results_linucb_a1):.4f}")

LinUCB (alpha=0.1) NDCG: 100%|██████████| 10000/10000 [00:02<00:00, 3969.65it/s]


LinUCB (alpha=0.1) Average NDCG@10: 0.1612


In [None]:
# # Training new LinUCB (alpha=0.1)
# linucb_a2 = LinUCB(num_arms, context_dim, 0.001)
# results = Parallel(n_jobs=-1)(delayed(train_linucb)(row, linucb_a2) for row in tqdm(training_df.itertuples(index=False), total=len(training_df), desc="Parallel Training"))

Parallel Training: 100%|██████████| 90000/90000 [00:07<00:00, 11812.29it/s]


In [None]:
# linucb_a2 = LinUCB(num_arms, context_dim, 0.001)

# # Training
# for idx, row in training_df.iterrows():
#   user_id = row['user_id']
#   item_id = row['item_id']

#   user_features = user.loc[user['user_id'] == user_id]
#   user_features = user_features.drop('user_id', axis=1)

#   movie_row = movie.loc[movie['item_id'] == item_id]
#   movie_features = movie_row.drop(['item_id', 'cluster'], axis=1)

#   context = np.concatenate((user_features.values.flatten(), movie_features.values.flatten()))

#   reward = row['rating']/5

#   cluster = movie_row["cluster"].values[0]

#   linucb_a1.update(cluster, context, reward)

KeyboardInterrupt: 

In [None]:
# # Testing the LinUCB (alpha=0.001)
# results_linucb_a2 = Parallel(n_jobs=-1)(delayed(calculate_ndcg)(row, linucb_a2) for row in tqdm(testing_df.itertuples(index=False), total=len(testing_df), desc="LinUCB (alpha=0.001) NDCG"))
# print(f"LinUCB (alpha=0.001) Average NDCG@10: {np.mean(results_linucb_a2):.4f}")

LinUCB (alpha=0.001) NDCG: 100%|██████████| 10000/10000 [00:02<00:00, 3847.49it/s]


LinUCB (alpha=0.001) Average NDCG@10: 0.1612
