In [1]:
%pip install mab2rec

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [3]:
rename_dict = {
    'id_user': 'user_id',
    'id_item': 'item_id',
    'rating': 'response',
}

df_full = pd.read_csv('data/interactions-timestamp.csv', sep=';')
df_full = df_full.sort_values(by='timestamp')
df_full = df_full.rename(columns=rename_dict)
df_full = df_full[['user_id', 'item_id', 'response']]
df_full['user_id'] = LabelEncoder().fit_transform(df_full['user_id'])
df_full['item_id'] = LabelEncoder().fit_transform(df_full['item_id'])

df_train = df_full.iloc[:int(len(df_full)*0.9)]
df_test = df_full.iloc[int(len(df_full)*0.9):]

In [4]:
# Example of how to train an singler recommender to generate top-4 recommendations

# Import 
from mab2rec import BanditRecommender, LearningPolicy
from mab2rec.pipeline import train, score

rec = BanditRecommender(LearningPolicy.UCB1(alpha=2), top_k=5)

# Train on (user, item, response) interactions in train data using user features 
train(rec, data=df_train)

# Score recommendations for users in test data. The output df holds 
# user_id, item_id, score columns for every test user for top-k items 
df = score(rec, data=df_test)


In [5]:
df['score'].unique()

array([0.99999945])

In [6]:
rec.recommend()

[12146, 6702, 9514, 9249, 11026]

In [7]:
rec.recommend()

[12146, 6702, 9514, 9249, 11026]

In [8]:
rec = BanditRecommender(LearningPolicy.EpsilonGreedy(epsilon=0.1), top_k=5)

# Train on (user, item, response) interactions in train data using user features 
train(rec, data=df_train)

# Score recommendations for users in test data. The output df holds 
# user_id, item_id, score columns for every test user for top-k items 
df = score(rec, data=df_test)

df

Unnamed: 0,user_id,item_id,score
0,1768,10667,0.993307
1,1768,10554,0.993307
2,1768,1624,0.993307
3,1768,9273,0.993307
4,1768,11349,0.993307
...,...,...,...
5125,14475,10667,0.993307
5126,14475,10554,0.993307
5127,14475,1624,0.993307
5128,14475,9273,0.993307


In [9]:
df['score'].unique()

array([0.99330715, 0.73105616, 0.73105466, 0.73104189, 0.73103635,
       0.73101558, 0.73105414, 0.73104963, 0.73104914, 0.73103944,
       0.73103811, 0.7310447 , 0.73104185, 0.73103221, 0.73103035,
       0.73101935, 0.73105733, 0.73103137, 0.73101284, 0.7310055 ,
       0.73099072, 0.73103219, 0.73098477, 0.73097078, 0.73096736,
       0.7309599 , 0.73103351, 0.73102055, 0.73099993, 0.73099932,
       0.73098359, 0.73103197, 0.73102758, 0.7310084 , 0.73100648,
       0.73100089, 0.73103477, 0.73102584, 0.73102306, 0.73102298,
       0.73102066, 0.73105715, 0.73105418, 0.7310504 , 0.73103173,
       0.7310221 , 0.73102065, 0.73101734, 0.73100028, 0.7309842 ,
       0.73097556, 0.73104583, 0.73104232, 0.7310171 , 0.73098612,
       0.73098313, 0.73101072, 0.73099979, 0.73096864, 0.73093786,
       0.73093498, 0.73104916, 0.73103457, 0.73098242, 0.73096374,
       0.7309547 , 0.73102635, 0.73095699, 0.73092795, 0.73090739,
       0.7309013 , 0.73104297, 0.73102333, 0.73101585, 0.73100

In [10]:
rec.recommend()

[10667, 10554, 1624, 9273, 11349]

In [11]:
rec.recommend()

[10667, 10554, 1624, 9273, 11349]

In [12]:
import implicit
from scipy.sparse import csr_matrix
from implicit.nearest_neighbours import bm25_weight

num_users = df_full['user_id'].nunique()
num_items = df_full['item_id'].nunique()

import numpy as np

In [13]:
# Cria a matriz esparsa
sparse_matrix = csr_matrix((np.ones(len(df_train)), (df_train['user_id'], df_train['item_id'])), shape=(num_users, num_items))
sparse_matrix = bm25_weight(sparse_matrix, K1=100, B=0.8)

# Treina o modelo
model = implicit.als.AlternatingLeastSquares(factors=100)
model.fit(sparse_matrix)

print(model.user_factors)



  0%|          | 0/15 [00:00<?, ?it/s]

[[ 0.6330413   0.0843142  -0.31236422 ... -0.05614147  0.830504
   0.22690941]
 [-0.12907347 -0.47721604 -0.05615124 ... -0.34586567  0.23010033
   0.05426725]
 [-0.18411027 -0.1761145  -0.39935857 ... -0.22506991  0.12009435
   0.09559502]
 ...
 [ 0.          0.          0.         ...  0.          0.
   0.        ]
 [ 0.          0.          0.         ...  0.          0.
   0.        ]
 [ 0.          0.          0.         ...  0.          0.
   0.        ]]


In [14]:
user_features_list = []

for user_id in df_train['user_id'].unique():
    user_factors = model.user_factors[user_id]
    user_features_list.append([user_id] + list(user_factors))

df_user_features = pd.DataFrame(user_features_list, columns=['user_id'] + [f'u{i}' for i in range(100)])
df_user_features

Unnamed: 0,user_id,u0,u1,u2,u3,u4,u5,u6,u7,u8,...,u90,u91,u92,u93,u94,u95,u96,u97,u98,u99
0,11388,-0.033340,0.081630,0.152415,-0.068996,-0.096851,-0.103621,-0.470365,-0.113229,0.135332,...,-0.267792,-0.418621,-0.103733,-0.033490,0.190552,0.312165,0.164184,0.117681,-0.094189,0.023518
1,8735,0.014926,-0.079454,-0.017231,0.059773,-0.104999,0.273358,0.129280,-0.218305,0.173948,...,-0.189132,-0.102789,0.152247,-0.011060,0.038700,0.249593,-0.290637,-0.106430,-0.186493,0.021599
2,4887,-0.577694,-0.112263,-0.259985,-0.141879,0.128424,0.439804,-0.141219,-0.080905,0.381873,...,0.013544,0.328326,-0.150305,-0.016941,-0.067119,0.473229,0.249479,-0.115506,-0.283730,-0.342462
3,9000,0.117287,-0.228235,0.007831,-0.046745,-0.058583,-0.282060,-0.251520,-0.261794,-0.011107,...,0.175441,-0.252713,0.057018,-0.183095,0.128450,-0.245600,-0.488412,-0.019402,0.350610,0.362611
4,8681,0.014926,-0.079454,-0.017231,0.059773,-0.104999,0.273358,0.129280,-0.218305,0.173948,...,-0.189132,-0.102789,0.152247,-0.011060,0.038700,0.249593,-0.290637,-0.106431,-0.186493,0.021599
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16837,13186,-0.002521,0.001730,0.011658,0.000952,-0.000287,0.001521,-0.001698,-0.001338,-0.001524,...,0.004274,-0.003989,-0.001443,-0.003281,0.006048,-0.004913,-0.001159,-0.010253,0.002598,-0.000484
16838,6404,0.222217,0.060577,-0.486655,-0.142497,0.140951,0.132682,-0.166684,-0.333449,-0.220422,...,-0.090139,0.174736,-0.056783,-0.085123,0.377471,0.204584,0.130715,0.167460,-0.002300,0.035537
16839,7084,0.003820,-0.009329,-0.021676,-0.011839,0.025496,-0.027309,-0.022356,-0.006933,-0.000317,...,0.002639,0.042628,0.007470,0.008246,0.021999,0.008617,0.023006,-0.045583,0.023994,0.026574
16840,3313,-0.146590,0.158111,0.164271,-0.155714,-0.000911,-0.402359,-0.195816,-0.111904,-0.140433,...,0.045718,-0.199365,0.090152,-0.529188,0.334771,0.330280,-0.260558,0.213978,-0.075255,0.060183


In [15]:
rec = BanditRecommender(LearningPolicy.LinUCB(alpha=1.0), top_k=10)

# Train on (user, item, response) interactions in train data using user features 
train(rec, data=df_train, user_features=df_user_features)

In [61]:

df_test_filtered = df_test[(df_test['user_id'].isin(df_train['user_id'])) & (df_test['item_id'].isin(df_train['item_id'])) & (df_test['response'] == 5)]
df_test_filtered = df_test_filtered.reset_index(drop=True)

recomended_itens = rec.recommend(model.user_factors[df_test_filtered['user_id'].values])

hits = 0
for i, interaction in df_test_filtered.iterrows():
    if interaction['item_id'] in recomended_itens[i]:
        hits += 1

print(f'Qtd hits: {hits}')
print(f'Qtd interaction: {len(df_test_filtered)}')
print(f'HR: {hits/len(df_test_filtered)}')

Qtd hits: 3
Qtd interaction: 715
HR: 0.004195804195804196
