In [1]:
%pip install mab2rec

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [3]:
rename_dict = {
    'id_user': 'user_id',
    'id_item': 'item_id',
    'rating': 'response',
}

df_full = pd.read_csv('data/interactions-timestamp.csv', sep=';')
df_full = df_full.sort_values(by='timestamp')
df_full = df_full.rename(columns=rename_dict)
df_full = df_full[['user_id', 'item_id', 'response']]
df_full['user_id'] = LabelEncoder().fit_transform(df_full['user_id'])
df_full['item_id'] = LabelEncoder().fit_transform(df_full['item_id'])

df_train = df_full.iloc[:int(len(df_full)*0.9)]
df_test = df_full.iloc[int(len(df_full)*0.9):]

In [4]:
# Example of how to train an singler recommender to generate top-4 recommendations

# Import 
from mab2rec import BanditRecommender, LearningPolicy
from mab2rec.pipeline import train, score

rec = BanditRecommender(LearningPolicy.UCB1(alpha=2), top_k=5)

# Train on (user, item, response) interactions in train data using user features 
train(rec, data=df_train)

# Score recommendations for users in test data. The output df holds 
# user_id, item_id, score columns for every test user for top-k items 
df = score(rec, data=df_test)


In [5]:
df['score'].unique()

array([0.99999945])

In [6]:
rec.recommend()

[12146, 6702, 9514, 9249, 11026]

In [7]:
rec.recommend()

[12146, 6702, 9514, 9249, 11026]

In [8]:
rec = BanditRecommender(LearningPolicy.EpsilonGreedy(epsilon=0.1), top_k=5)

# Train on (user, item, response) interactions in train data using user features 
train(rec, data=df_train)

# Score recommendations for users in test data. The output df holds 
# user_id, item_id, score columns for every test user for top-k items 
df = score(rec, data=df_test)

df

Unnamed: 0,user_id,item_id,score
0,1768,10667,0.993307
1,1768,10554,0.993307
2,1768,1624,0.993307
3,1768,9273,0.993307
4,1768,11349,0.993307
...,...,...,...
5125,14475,10667,0.993307
5126,14475,10554,0.993307
5127,14475,1624,0.993307
5128,14475,9273,0.993307


In [9]:
df['score'].unique()

array([0.99330715, 0.73105616, 0.73105466, 0.73104189, 0.73103635,
       0.73101558, 0.73105414, 0.73104963, 0.73104914, 0.73103944,
       0.73103811, 0.7310447 , 0.73104185, 0.73103221, 0.73103035,
       0.73101935, 0.73105733, 0.73103137, 0.73101284, 0.7310055 ,
       0.73099072, 0.73103219, 0.73098477, 0.73097078, 0.73096736,
       0.7309599 , 0.73103351, 0.73102055, 0.73099993, 0.73099932,
       0.73098359, 0.73103197, 0.73102758, 0.7310084 , 0.73100648,
       0.73100089, 0.73103477, 0.73102584, 0.73102306, 0.73102298,
       0.73102066, 0.73105715, 0.73105418, 0.7310504 , 0.73103173,
       0.7310221 , 0.73102065, 0.73101734, 0.73100028, 0.7309842 ,
       0.73097556, 0.73104583, 0.73104232, 0.7310171 , 0.73098612,
       0.73098313, 0.73101072, 0.73099979, 0.73096864, 0.73093786,
       0.73093498, 0.73104916, 0.73103457, 0.73098242, 0.73096374,
       0.7309547 , 0.73102635, 0.73095699, 0.73092795, 0.73090739,
       0.7309013 , 0.73104297, 0.73102333, 0.73101585, 0.73100

In [10]:
rec.recommend()

[10667, 10554, 1624, 9273, 11349]

In [11]:
rec.recommend()

[10667, 10554, 1624, 9273, 11349]

In [12]:
import implicit
from scipy.sparse import csr_matrix
from implicit.nearest_neighbours import bm25_weight

num_users = df_full['user_id'].nunique()
num_items = df_full['item_id'].nunique()

import numpy as np

In [13]:
# Cria a matriz esparsa
sparse_matrix = csr_matrix((np.ones(len(df_train)), (df_train['user_id'], df_train['item_id'])), shape=(num_users, num_items))
sparse_matrix = bm25_weight(sparse_matrix, K1=100, B=0.8)

# Treina o modelo
model = implicit.als.AlternatingLeastSquares(factors=100)
model.fit(sparse_matrix)

print(model.user_factors)



  0%|          | 0/15 [00:00<?, ?it/s]

[[-0.40210882 -0.16912425 -0.5583178  ... -0.16017601 -0.10320385
  -0.40288007]
 [ 0.3855679   0.05889401 -0.22290105 ... -0.13144343 -0.04424416
  -0.27088004]
 [ 0.06104115  0.0186722  -0.58104753 ... -0.7199946  -0.07642151
  -0.19675371]
 ...
 [ 0.          0.          0.         ...  0.          0.
   0.        ]
 [ 0.          0.          0.         ...  0.          0.
   0.        ]
 [ 0.          0.          0.         ...  0.          0.
   0.        ]]


In [14]:
user_features_list = []

for user_id in df_train['user_id'].unique():
    user_factors = model.user_factors[user_id]
    user_features_list.append([user_id] + list(user_factors))

df_user_features = pd.DataFrame(user_features_list, columns=['user_id'] + [f'u{i}' for i in range(100)])
df_user_features

Unnamed: 0,user_id,u0,u1,u2,u3,u4,u5,u6,u7,u8,...,u90,u91,u92,u93,u94,u95,u96,u97,u98,u99
0,11388,0.152003,-0.394933,0.542224,0.185572,0.028992,-0.148533,-0.012656,-0.321448,-0.013264,...,-0.109907,-0.049111,0.089497,-0.448419,0.473361,0.019654,0.429771,-0.093650,0.220166,0.078266
1,8735,-0.109580,0.033833,-0.178607,0.104085,0.293824,0.242556,0.148482,-0.182125,-0.106203,...,0.068668,-0.192617,-0.261302,0.010963,0.295600,-0.004168,-0.040178,-0.157377,0.079962,-0.058757
2,4887,-0.149574,-0.215119,0.237992,0.210666,0.131818,0.273594,0.160338,-0.157377,-0.040563,...,-0.130770,0.165178,-0.051746,0.081654,0.373102,-0.044136,0.745974,-0.028717,-0.474286,-0.428504
3,9000,0.020577,-0.209371,-0.151863,-0.014367,0.181815,-0.307750,0.172838,0.473256,-0.247174,...,0.169761,-0.133803,0.192819,-0.140820,0.315678,0.061141,0.257007,-0.219354,-0.239609,0.150926
4,8681,-0.109580,0.033833,-0.178607,0.104085,0.293824,0.242556,0.148482,-0.182125,-0.106203,...,0.068668,-0.192617,-0.261302,0.010963,0.295600,-0.004168,-0.040178,-0.157377,0.079962,-0.058757
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16837,13186,0.003678,-0.003293,0.005994,-0.000141,0.011212,-0.006449,0.007356,-0.010782,-0.003087,...,-0.002945,0.007415,-0.001751,0.007136,-0.006404,0.000711,0.008261,0.006151,0.004754,0.002639
16838,6404,0.332376,-0.446171,0.076789,0.026138,0.294959,-0.043189,0.073488,0.297872,0.320538,...,0.145667,0.217302,-0.045455,0.243219,0.480914,-0.386197,-0.216364,-0.166227,-0.050695,-0.143461
16839,7084,0.020158,0.009932,0.020581,-0.006596,0.000547,0.029991,0.010869,-0.006767,-0.006718,...,-0.018102,0.011166,0.010106,-0.004126,0.002188,0.029576,-0.027060,0.039976,0.019123,-0.007104
16840,3313,-0.152187,-0.090835,-0.128128,0.043058,-0.223468,0.044747,0.117984,0.143654,0.044521,...,0.149099,0.134068,-0.516757,0.070886,0.515678,0.202795,-0.363496,0.545432,0.023426,-0.028730


In [15]:
rec = BanditRecommender(LearningPolicy.LinUCB(alpha=1.0), top_k=10)

# Train on (user, item, response) interactions in train data using user features 
train(rec, data=df_train, user_features=df_user_features)

In [29]:
hits = 0
total = 0

df_test = df_test.reset_index(drop=True)

for i, interaction in df_test.iterrows():
    print(f'{i} of {len(df_test)} ({(i/len(df_test)*100):.2f} %)', end='\r')
    user_id = interaction['user_id']
    item_id = interaction['item_id']
    response = interaction['response']
    if interaction['user_id'] in df_train['user_id'] and response == 5:
        recomended_itens = rec.recommend([model.user_factors[user_id]])
        if item_id in recomended_itens:
            hits += 1
        total += 1

print(hits)
print(total)
print(hits/total)

3266 of 7267 (99.99 %)
2101
0.0014278914802475012


In [31]:
print(hits)
print(total)
print(hits/total)

3
2101
0.0014278914802475012
