In [1]:
import numpy as np
import matplotlib.pyplot as pyplot
import pandas as pd
import scipy.sparse as sps
%matplotlib inline  

from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample


In [2]:
# data_train_path="/kaggle/input/recommender-system-2023-challenge-polimi/data_train.csv"
# data_target_user_path="/kaggle/input/recommender-system-2023-challenge-polimi/data_target_users_test.csv"
data_train_path="data_train.csv"
data_target_user_path="data_target_users_test.csv"
data_train = pd.read_csv(data_train_path)
data_target = pd.read_csv(data_target_user_path)

In [3]:
URM_all =  data_train.pivot(index='row', columns='col', values='data').fillna(0)
item_map = {i : item for i, item in enumerate(URM_all.columns)}
user_map = {i : user for i, user in enumerate(data_target["user_id"])}
item_map_inv = {item : i for i, item in item_map.items()}
user_map_inv = {user : i for i, user in user_map.items()}
missing_index = [x for x in range(1,13025) if x not in URM_all.index.tolist()]
add_urm = pd.DataFrame(index = missing_index, columns = URM_all.columns).fillna(0)
URM_all = pd.concat([URM_all, add_urm]).sort_index()
del add_urm
del missing_index
#data_target["user_id"] = data_target["user_id"]
URM_all = URM_all.to_numpy()
URM_all = sps.csr_matrix(URM_all)
URM_all

<13024x22222 sparse matrix of type '<class 'numpy.float64'>'
	with 478730 stored elements in Compressed Sparse Row format>

In [4]:
URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)



In [5]:
from sklearn.decomposition import NMF
num_factors = 10

nmf_solver = NMF(n_components  = num_factors,
                 init = "random",
                 solver = "mu", #"multiplicative_update",
                 beta_loss = "frobenius",
                 l1_ratio = 0.01,
                 shuffle = True,
                 verbose = True,
                 max_iter = 500)

In [6]:
nmf_solver.fit(URM_train)

ITEM_factors = nmf_solver.components_.copy().T
USER_factors = nmf_solver.transform(URM_all)

Epoch 10 reached after 0.110 seconds, error: 604.569816
Epoch 20 reached after 0.193 seconds, error: 600.928523
Epoch 30 reached after 0.293 seconds, error: 600.453189
Epoch 40 reached after 0.390 seconds, error: 600.315053
Epoch 50 reached after 0.493 seconds, error: 600.259206
Epoch 10 reached after 0.029 seconds, error: 667.866017
Epoch 20 reached after 0.041 seconds, error: 667.852889


In [7]:
user_item_similarity = np.multiply(np.dot(USER_factors, ITEM_factors.T), 1-URM_all.todense())

In [8]:
prediction = np.flip(np.argsort(user_item_similarity, axis=1), axis = 1)[:,:10]
prediction

matrix([[ 35,   9, 111, ...,  67, 136,  31],
        [ 14,  10,  35, ...,  37,  53,  17],
        [ 10,  16,   0, ...,   6,   4,  41],
        ...,
        [ 65,  59,  76, ..., 137, 174, 181],
        [ 31,  76,  59, ..., 137, 174, 207],
        [ 14, 138,  35, ..., 268, 164, 253]], dtype=int64)

In [9]:
prediction_values = np.flip(np.sort(user_item_similarity, axis=1), axis = 1)[:,:10]
prediction_values

matrix([[0.28294343, 0.22136593, 0.21174182, ..., 0.13303132, 0.13069007,
         0.11883304],
        [0.38302221, 0.26318491, 0.23538385, ..., 0.19069374, 0.17888173,
         0.17501756],
        [0.01531776, 0.01393463, 0.01349276, ..., 0.01065761, 0.01015864,
         0.00994775],
        ...,
        [0.03092407, 0.02881409, 0.0287427 , ..., 0.01696586, 0.01693397,
         0.01583334],
        [0.20754692, 0.20287945, 0.20268314, ..., 0.12952236, 0.12499906,
         0.11976217],
        [0.29094696, 0.27023305, 0.22733445, ..., 0.18424269, 0.18260164,
         0.17760088]])

In [10]:
suggestions = pd.DataFrame(columns = ["user_id", "item_list", "item_values"])

for user_id in (data_target["user_id"]-1):
    suggestions.loc[len(suggestions)] = [user_id+1, " ".join([str(item_map[x]) for x in prediction[user_id].flat]), " ".join([str(x) for x in prediction_values[user_id].flat])]

suggestions
# suggestions.to_csv("outputs/NMF.csv", index = False)

Unnamed: 0,user_id,item_list,item_values
0,1,36 10 112 54 101 18 3 68 137 32,0.2829434340369058 0.2213659277880494 0.211741...
1,2,15 11 36 6 5 3 28 38 54 18,0.38302221358388455 0.26318490942849343 0.2353...
2,3,11 17 1 28 9 47 27 7 5 42,0.01531775764605701 0.013934632138844837 0.013...
3,4,28 50 9 5 35 145 56 136 104 122,0.4975431638916785 0.383577149639085 0.3511128...
4,5,4 6 77 32 34 38 116 88 51 99,0.25796632838484695 0.21235349897591843 0.1979...
...,...,...,...
10877,13020,7 10 34 112 51 87 85 88 54 29,0.5142445808931193 0.10370648051327588 0.09934...
10878,13021,32 96 138 182 208 133 139 324 344 254,0.873747040593103 0.6272023724942213 0.5726781...
10879,13022,66 60 77 32 116 148 96 138 175 182,0.030924072499808435 0.02881409110548711 0.028...
10880,13023,32 77 60 116 7 148 96 138 175 208,0.20754691576002393 0.2028794519972246 0.20268...
