## Import

In [1]:
import pandas as pd
import numpy as np
import math
import re
from scipy.sparse import csr_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise import accuracy
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics.pairwise import cosine_similarity

sns.set_style("darkgrid")

### Importare dataset

In [2]:
df1 = pd.read_csv('./combined_data_1.txt', header = None, names = ['Cust_Id', 'Rating'], usecols = [0,1])

df1['Rating'] = df1['Rating'].astype(float)

print('Dataset 1 shape:',df1.shape)

# Mi genera un nuovo indice da 1
df1 = df1.reset_index()
print('Dataset 1 shape without rating null:',df1.shape)

print('-Dataset examples-')
print(df1[:5])


Dataset 1 shape: (24058263, 2)
Dataset 1 shape without rating null: (24058263, 3)
-Dataset examples-
   index  Cust_Id  Rating
0      0       1:     NaN
1      1  1488844     3.0
2      2   822109     5.0
3      3   885013     4.0
4      4    30878     4.0


### Prendo i primi 1000

In [3]:
df1 = df1.head(100000)

### Non so cosa faccia ma mi mete movie id 

In [4]:
df_nan = pd.DataFrame(pd.isnull(df1.Rating))
df_nan = df_nan[df_nan['Rating'] == True]
df_nan = df_nan.reset_index()

movie_np = []
movie_id = 1

for i,j in zip(df_nan['index'][1:],df_nan['index'][:-1]):
    # numpy approach
    temp = np.full((1,i-j-1), movie_id)
    movie_np = np.append(movie_np, temp)
    movie_id += 1

# Account for last record and corresponding length
# numpy approach
last_record = np.full((1,len(df1) - df_nan.iloc[-1, 0] - 1),movie_id)
movie_np = np.append(movie_np, last_record)

#print('Movie numpy:',movie_np)
#print('Length:',len(movie_np))

df = df1[pd.notnull(df1['Rating'])]

df['Movie_Id'] = movie_np.astype(int)
df['Cust_Id'] = df['Cust_Id'].astype(int)
print('-Dataset examples-')
print(df)


-Dataset examples-
       index  Cust_Id  Rating  Movie_Id
1          1  1488844     3.0         1
2          2   822109     5.0         1
3          3   885013     4.0         1
4          4    30878     4.0         1
5          5   823519     3.0         1
...      ...      ...     ...       ...
99995  99995   735848     4.0        30
99996  99996   254710     4.0        30
99997  99997   865725     4.0        30
99998  99998   568153     3.0        30
99999  99999  2502775     4.0        30

[99970 rows x 4 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Movie_Id'] = movie_np.astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Cust_Id'] = df['Cust_Id'].astype(int)


In [None]:
#df_title = pd.read_csv('./movie_titles.csv', encoding = "ISO-8859-1", header = None, names = ['Movie_Id', 'Year', 'Name'],usecols=[0,1,2])
#df_title.set_index('Movie_Id', inplace = True)
#print (df_title[:5])

In [5]:
df.loc[df['Cust_Id']==1333]

Unnamed: 0,index,Cust_Id,Rating,Movie_Id
772,772,1333,4.0,3
5549,5549,1333,3.0,8
31741,31741,1333,4.0,18
46573,46573,1333,3.0,26
53594,53594,1333,2.0,28
96010,96010,1333,3.0,30


### Creo matrice di rating

In [6]:
ratings_matrix = df.pivot(index='Cust_Id', columns='Movie_Id', values='Rating')

# Se vuoi gestire eventuali valori mancanti con uno zero, puoi utilizzare il metodo fillna
ratings_matrix = ratings_matrix.fillna(0)

# Se preferisci avere la matrice come array numpy, puoi utilizzare values
#ratings_matrix_array = ratings_matrix.values

print(ratings_matrix)

Movie_Id   1    2    3    4    5    6    7    8    9    10  ...   21   22  \
Cust_Id                                                     ...             
7         0.0  0.0  0.0  0.0  0.0  0.0  0.0  5.0  0.0  0.0  ...  0.0  0.0   
134       0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
201       0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
261       0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
265       0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
...       ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...   
2649331   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
2649336   0.0  0.0  0.0  0.0  0.0  0.0  0.0  4.0  0.0  0.0  ...  0.0  0.0   
2649375   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
2649378   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
2649426   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   

## Usare user based NN

#### Calcolo similarità

In [7]:
id_user_target = 1333

In [8]:
user_target = ratings_matrix.loc[id_user_target]
print(user_target)

Movie_Id
1     0.0
2     0.0
3     4.0
4     0.0
5     0.0
6     0.0
7     0.0
8     3.0
9     0.0
10    0.0
11    0.0
12    0.0
13    0.0
14    0.0
15    0.0
16    0.0
17    0.0
18    4.0
19    0.0
20    0.0
21    0.0
22    0.0
23    0.0
24    0.0
25    0.0
26    3.0
27    0.0
28    2.0
29    0.0
30    3.0
Name: 1333, dtype: float64


In [9]:
rating_medi = {}
# calcolo rating medio senza considerare gli 0
for i in ratings_matrix.iterrows():
        #print(i[0])
        count = 0
        countn = 0
        for j in i[1].values:
            if j != 0:
                  count += j
                  countn += 1
        rating_medi[i[0]] = count/countn
print(rating_medi[1333])
print(rating_medi)

3.1666666666666665
{7: 4.5, 134: 5.0, 201: 4.0, 261: 4.0, 265: 3.0, 307: 3.0, 383: 3.0, 384: 4.0, 424: 4.0, 462: 2.0, 478: 4.0, 491: 3.0, 527: 5.0, 592: 4.0, 685: 3.0, 695: 1.0, 734: 1.0, 742: 4.0, 815: 4.0, 834: 3.0, 840: 2.0, 906: 3.0, 911: 4.0, 915: 5.0, 933: 4.0, 967: 1.0, 979: 3.0, 1034: 5.0, 1086: 3.0, 1146: 3.0, 1186: 3.0, 1243: 4.0, 1310: 3.0, 1333: 3.1666666666666665, 1353: 3.0, 1409: 3.0, 1427: 3.0, 1442: 4.0, 1500: 1.0, 1527: 5.0, 1584: 3.0, 1611: 4.0, 1636: 4.0, 1650: 5.0, 1658: 3.0, 1664: 5.0, 1810: 3.0, 1811: 2.5, 1830: 5.0, 1856: 5.0, 1871: 4.0, 1878: 5.0, 1894: 4.0, 1900: 4.0, 1913: 3.0, 1918: 3.0, 1983: 3.0, 2000: 3.5, 2040: 2.0, 2133: 2.3333333333333335, 2213: 4.0, 2225: 3.0, 2264: 3.0, 2273: 3.0, 2276: 3.0, 2307: 3.0, 2363: 4.0, 2442: 3.0, 2455: 3.0, 2469: 2.0, 2555: 5.0, 2614: 3.0, 2630: 2.0, 2678: 2.0, 2757: 2.0, 2775: 5.0, 2787: 2.0, 2878: 5.0, 2905: 5.0, 2976: 3.5, 3039: 3.0, 3104: 3.0, 3168: 4.0, 3184: 2.0, 3210: 5.0, 3285: 1.0, 3292: 4.0, 3321: 2.57142857142857

In [10]:
# Definire la funzione per il calcolo della similarità tra utenti utilizzando la person coefficent
similarità = {}
def user_similarity(ratings,user_target):
    # scorro UTENTI
    for i in ratings.iterrows():
        # se l'UTENTE è diverso dall'utente target
        #print(i[0])
        if i[0] != id_user_target:
            # calcolo similarità
            #print(i[1].values[0])
            numeratore = 0
            denominatoreP1 = 0
            denominatoreP2 = 0
            for j in range(len(i[1].values)):
                numeratore += (i[1].values[j]-rating_medi[i[0]])*(user_target.values[j]-rating_medi[id_user_target])
                denominatoreP1 += (i[1].values[j]-rating_medi[i[0]])**2
                denominatoreP2 +=(user_target.values[j]-rating_medi[id_user_target])**2
            sim = numeratore/(math.sqrt(denominatoreP1)*math.sqrt(denominatoreP2))
            similarità[(i[0],id_user_target)] = sim

# Calcolare la similarità tra utenti
user_similarity(ratings_matrix,user_target)

{(7, 1333): 0.9052168987648441, (134, 1333): 0.8905260105598528, (201, 1333): 0.8905260105598534, (261, 1333): 0.9024261354225686, (265, 1333): 0.8905260105598534, (307, 1333): 0.9024261354225686, (383, 1333): 0.8905260105598534, (384, 1333): 0.9024261354225686, (424, 1333): 0.9143262602852837, (462, 1333): 0.866725760834423, (478, 1333): 0.9024261354225686, (491, 1333): 0.866725760834423, (527, 1333): 0.8905260105598528, (592, 1333): 0.8905260105598534, (685, 1333): 0.866725760834423, (695, 1333): 0.9024261354225686, (734, 1333): 0.9024261354225686, (742, 1333): 0.9024261354225686, (815, 1333): 0.8905260105598534, (834, 1333): 0.9024261354225686, (840, 1333): 0.866725760834423, (906, 1333): 0.8905260105598534, (911, 1333): 0.866725760834423, (915, 1333): 0.8667257608344224, (933, 1333): 0.9143262602852837, (967, 1333): 0.866725760834423, (979, 1333): 0.9024261354225686, (1034, 1333): 0.8905260105598528, (1086, 1333): 0.8905260105598534, (1146, 1333): 0.9143262602852837, (1186, 1333): 

In [45]:
utenteSimileUno = ratings_matrix.loc[7]
utenteSimileDue = ratings_matrix.loc[1333]
#print(utenteSimileUno)
#print(utenteSimileDue)
for i in range(len(utenteSimileUno)):
    if utenteSimileUno[i] != utenteSimileDue[i]:
        print(utenteSimileUno[i])
        print(utenteSimileDue[i])

KeyError: 0

In [11]:
# FACCIO UNA SOGLIO K DI VICINATO SULLA BASE DELLA SIMILARITÀ
sim_soglia = {}
for i in similarità:
    if similarità[i] > 0.96:
        sim_soglia[i] = similarità[i]
print(sim_soglia)

{(6460, 1333): 0.9617155540226007, (21983, 1333): 0.9706624830208426, (61765, 1333): 0.9715058580339183, (1086213, 1333): 0.9606728176724295, (1214640, 1333): 0.9632295783722342, (1331154, 1333): 0.9692508031861113, (1733406, 1333): 0.9609412121430945, (1774181, 1333): 0.9651662101927648}


#### Prediction

In [12]:
# ITEM PER IL QUALE VOGLIO PREDIRRE RATING
id_item_target = 1

In [17]:
# Definire una funzione per la predizione delle valutazioni utilizzando User-Based Collaborative Filtering
def predict_user_based(item,user_similarities):
    # Calcolare le valutazioni previste
    num = 0
    den = 0
    if ratings_matrix.loc[id_user_target].values[item] == 0:
        for i in user_similarities:
            num += (user_similarities[i]* (ratings_matrix.loc[i[0]].values[item]-rating_medi[i[0]]))
            den += user_similarities[i]
        """ print(rating_medi[id_user_target])
        print(num)
        print(den) """
        pred_ratings = rating_medi[id_user_target] + (num / den)
        return pred_ratings
    else:
        return ratings_matrix.loc[id_user_target].values[item]

# Ottenere le previsioni per tutte le valutazioni
predicted_ratings_user_based = predict_user_based(id_item_target, sim_soglia)
print(f'Rating per l item {id_item_target} è di {predicted_ratings_user_based} per l utente {id_user_target}')


Rating per l item 1 è di -0.36473040891583564 per l utente 1333


### Ranking

In [18]:
# Ottenere le previsioni per tutte le valutazioni
for m in range(len(user_target)):
    predicted_ratings_user_based = predict_user_based(m, sim_soglia)
    if predicted_ratings_user_based > 0:    
        print(f'Rating per l item {m} è di {predicted_ratings_user_based} per l utente {id_user_target}')
for m in range(len(user_target)):
    if user_target.values[m] > 0:
        print(f'Rating ESPLICITI per l item {m} è di {user_target.values[m]} per l utente {id_user_target}')


Rating per l item 2 è di 4.0 per l utente 1333
Rating per l item 7 è di 3.0 per l utente 1333
Rating per l item 17 è di 4.0 per l utente 1333
Rating per l item 25 è di 3.0 per l utente 1333
Rating per l item 27 è di 2.0 per l utente 1333
Rating per l item 29 è di 3.0 per l utente 1333
Rating ESPLICITI per l item 2 è di 4.0 per l utente 1333
Rating ESPLICITI per l item 7 è di 3.0 per l utente 1333
Rating ESPLICITI per l item 17 è di 4.0 per l utente 1333
Rating ESPLICITI per l item 25 è di 3.0 per l utente 1333
Rating ESPLICITI per l item 27 è di 2.0 per l utente 1333
Rating ESPLICITI per l item 29 è di 3.0 per l utente 1333


## Usare item based NN 

In [19]:
ratings_matrix_transpose = df.pivot(index='Movie_Id', columns='Cust_Id', values='Rating')

# Se vuoi gestire eventuali valori mancanti con uno zero, puoi utilizzare il metodo fillna
ratings_matrix_transpose = ratings_matrix_transpose.fillna(0)

# Se preferisci avere la matrice come array numpy, puoi utilizzare values
#ratings_matrix_array = ratings_matrix.values

print(ratings_matrix_transpose)

Cust_Id   7        134      201      261      265      307      383      \
Movie_Id                                                                  
1             0.0      0.0      0.0      0.0      0.0      0.0      0.0   
2             0.0      0.0      0.0      0.0      0.0      0.0      0.0   
3             0.0      0.0      0.0      0.0      0.0      0.0      0.0   
4             0.0      0.0      0.0      0.0      0.0      0.0      0.0   
5             0.0      0.0      0.0      0.0      0.0      0.0      0.0   
6             0.0      0.0      0.0      0.0      0.0      0.0      0.0   
7             0.0      0.0      0.0      0.0      0.0      0.0      0.0   
8             5.0      0.0      0.0      0.0      0.0      3.0      0.0   
9             0.0      0.0      0.0      0.0      0.0      0.0      0.0   
10            0.0      0.0      0.0      0.0      0.0      0.0      0.0   
11            0.0      0.0      0.0      0.0      0.0      0.0      0.0   
12            0.0      0.

In [21]:
rating_medi_transpose = {}
# calcolo rating medio senza considerare gli 0
for i in ratings_matrix_transpose.iterrows():
        #print(i[0])
        count = 0
        countn = 0
        for j in i[1].values:
            if j != 0:
                  count += j
                  countn += 1
        rating_medi_transpose[i[0]] = count/countn
print(rating_medi_transpose)

{1: 3.749542961608775, 2: 3.5586206896551724, 3: 3.6411530815109345, 4: 2.73943661971831, 5: 3.9192982456140353, 6: 3.084396467124632, 7: 2.129032258064516, 8: 3.1898054996646548, 9: 2.6210526315789475, 10: 3.180722891566265, 11: 3.0303030303030303, 12: 3.4175824175824174, 13: 4.552, 14: 3.0254237288135593, 15: 3.286206896551724, 16: 3.0985550203779177, 17: 2.90320765334834, 18: 3.7843685879500093, 19: 3.324675324675325, 20: 3.146551724137931, 21: 3.463302752293578, 22: 2.2463054187192117, 23: 3.55609756097561, 24: 2.9939984996249063, 25: 3.9701739850869924, 26: 2.7937212079849854, 27: 3.5274725274725274, 28: 3.823254175890521, 29: 3.598470363288719, 30: 3.7582264361405464}


In [22]:
item_target = ratings_matrix_transpose.loc[id_item_target]
print(item_target)

Cust_Id
7          0.0
134        0.0
201        0.0
261        0.0
265        0.0
          ... 
2649331    0.0
2649336    0.0
2649375    0.0
2649378    0.0
2649426    0.0
Name: 1, Length: 81472, dtype: float64


In [24]:
# Definire la funzione per il calcolo della similarità tra utenti utilizzando la person coefficent
similarita_transpose = {}
def item_similarity(ratings,item_target):
    # scorro UTENTI
    for i in ratings.iterrows():
        # se l'ITEM è diverso dall'item target
        #print(i[0])
        if i[0] != id_item_target:
            # calcolo similarità
            #print(i[1].values[0])
            numeratore = 0
            denominatoreP1 = 0
            denominatoreP2 = 0
            for j in range(len(i[1].values)):
                #print(item_target.values[j])
                numeratore += (i[1].values[j]-rating_medi_transpose[i[0]])*(item_target.values[j]-rating_medi_transpose[id_item_target])
                denominatoreP1 += (i[1].values[j]-rating_medi_transpose[i[0]])**2
                denominatoreP2 +=(item_target.values[j]-rating_medi_transpose[id_item_target])**2
            sim = numeratore/(math.sqrt(denominatoreP1)*math.sqrt(denominatoreP2))
            similarita_transpose[(i[0],id_item_target)] = sim

# Calcolare la similarità tra utenti
item_similarity(ratings_matrix_transpose,item_target)
print(similarita_transpose)

{(2, 1): 0.9954090499357793, (3, 1): 0.9830503411404298, (4, 1): 0.9953791364917691, (5, 1): 0.9887715110774403, (6, 1): 0.9890314804875774, (7, 1): 0.9957679998976511, (8, 1): 0.8832617291061291, (9, 1): 0.9956843484644764, (10, 1): 0.9947121930336542, (11, 1): 0.9950302007466807, (12, 1): 0.9929214617064804, (13, 1): 0.9956240801718327, (14, 1): 0.995585929874714, (15, 1): 0.994419074651809, (16, 1): 0.9784197309631284, (17, 1): 0.9462232103390528, (18, 1): 0.924161585562065, (19, 1): 0.9928610319089957, (20, 1): 0.9956077081134255, (21, 1): 0.9949754614227478, (22, 1): 0.9948883419998076, (23, 1): 0.992471167091568, (24, 1): 0.9873116958220781, (25, 1): 0.9885139273126919, (26, 1): 0.9541656680596866, (27, 1): 0.9946458777701598, (28, 1): 0.691205360447981, (29, 1): 0.9929325130074333, (30, 1): 0.9482389750066917}


In [25]:
# FACCIO UNA SOGLIO K DI VICINATO SULLA BASE DELLA SIMILARITÀ
sim_soglia_transpose = {}
for i in similarita_transpose:
    if similarita_transpose[i] > 0.96:
        sim_soglia_transpose[i] = similarita_transpose[i]
print(sim_soglia_transpose)

{(2, 1): 0.9954090499357793, (3, 1): 0.9830503411404298, (4, 1): 0.9953791364917691, (5, 1): 0.9887715110774403, (6, 1): 0.9890314804875774, (7, 1): 0.9957679998976511, (9, 1): 0.9956843484644764, (10, 1): 0.9947121930336542, (11, 1): 0.9950302007466807, (12, 1): 0.9929214617064804, (13, 1): 0.9956240801718327, (14, 1): 0.995585929874714, (15, 1): 0.994419074651809, (16, 1): 0.9784197309631284, (19, 1): 0.9928610319089957, (20, 1): 0.9956077081134255, (21, 1): 0.9949754614227478, (22, 1): 0.9948883419998076, (23, 1): 0.992471167091568, (24, 1): 0.9873116958220781, (25, 1): 0.9885139273126919, (27, 1): 0.9946458777701598, (29, 1): 0.9929325130074333}


In [26]:
# Definire una funzione per la predizione delle valutazioni utilizzando User-Based Collaborative Filtering
def predict_item_based(item,item_similarities):
    # Calcolare le valutazioni previste
    num = 0
    den = 0
    if ratings_matrix_transpose.loc[id_item_target].values[item] == 0:
        for i in item_similarities:
            num += (item_similarities[i]* (ratings_matrix_transpose.loc[i[0]].values[item]))
            den += item_similarities[i]
        pred_ratings = (num / den)
        return pred_ratings
    else:
        return ratings_matrix.loc[id_user_target].values[item]

# Ottenere le previsioni per tutte le valutazioni
predicted_ratings_user_based = predict_item_based(id_item_target, sim_soglia_transpose)
print(f'Rating per l item {id_item_target} è di {predicted_ratings_user_based} per l utente {id_user_target}')


Rating per l item 1 è di 0.0 per l utente 1333


In [27]:
# Ottenere le previsioni per tutte le valutazioni
for m in range(len(user_target)):
    predicted_ratings_user_based = predict_item_based(m, sim_soglia_transpose)
    if predicted_ratings_user_based > 0:    
        print(f'Rating per l item {m} è di {predicted_ratings_user_based} per l utente {id_user_target}')
for m in range(len(user_target)):
    if user_target.values[m] > 0:
        print(f'Rating ESPLICITI per l item {m} è di {user_target.values[m]} per l utente {id_user_target}')

Rating per l item 14 è di 0.1299646284408879 per l utente 1333
Rating per l item 25 è di 0.04361104602450898 per l utente 1333
Rating ESPLICITI per l item 2 è di 4.0 per l utente 1333
Rating ESPLICITI per l item 7 è di 3.0 per l utente 1333
Rating ESPLICITI per l item 17 è di 4.0 per l utente 1333
Rating ESPLICITI per l item 25 è di 3.0 per l utente 1333
Rating ESPLICITI per l item 27 è di 2.0 per l utente 1333
Rating ESPLICITI per l item 29 è di 3.0 per l utente 1333


## Paradigma : User based vs item base

## Matrix factorization: SVD 

In [45]:
reader = Reader()

# get just top 100K rows for faster run time
data = Dataset.load_from_df(df[['Cust_Id', 'Movie_Id', 'Rating']][:], reader)
#data.split(n_folds=3)
trainsetData, testsetData = train_test_split(data, test_size=0.2)

In [46]:
svd = SVD()
cross_validate(svd, data, measures=['RMSE', 'MAE'],verbose=True,return_train_measures=True,n_jobs=100)


Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.2254  1.2131  1.2170  1.2565  1.2325  1.2289  0.0153  
MAE (testset)     0.9988  0.9996  0.9942  1.0366  1.0205  1.0099  0.0161  
RMSE (trainset)   0.5322  0.5382  0.5182  0.5279  0.5203  0.5273  0.0074  
MAE (trainset)    0.4281  0.4322  0.4164  0.4230  0.4179  0.4235  0.0060  
Fit time          0.12    0.11    0.12    0.14    0.15    0.13    0.01    
Test time         0.01    0.01    0.01    0.01    0.01    0.01    0.00    


{'test_rmse': array([1.22535503, 1.21308528, 1.21703509, 1.25645305, 1.23246136]),
 'train_rmse': array([0.53220637, 0.53822074, 0.51816037, 0.52785702, 0.52029843]),
 'test_mae': array([0.9987915 , 0.99960116, 0.99415463, 1.03657082, 1.02049136]),
 'train_mae': array([0.42813225, 0.43215188, 0.41635078, 0.4229511 , 0.41787346]),
 'fit_time': (0.12108469009399414,
  0.11477518081665039,
  0.12203168869018555,
  0.13921070098876953,
  0.14701271057128906),
 'test_time': (0.010886192321777344,
  0.00849604606628418,
  0.009485721588134766,
  0.007188558578491211,
  0.007143974304199219)}

## Metriche : Precision vs Recal vs F1 vs MAE vs RMSE vs NMAE

In [47]:
ts = data.build_full_trainset()
svd.fit(ts)

predictions = svd.test(testsetData)

y_true = [int(pred.r_ui) for pred in predictions]
y_pred = [int(pred.est) for pred in predictions]
print(y_true)
print(y_pred)


# Calcola le metriche aggiuntive
precision = precision_score(y_true, [round(pred) for pred in y_pred], average='micro')
recall = recall_score(y_true, [round(pred) for pred in y_pred], average='micro')
f1 = f1_score(y_true, [round(pred) for pred in y_pred], average='micro')
nmae = accuracy.mae(predictions) / (max(y_true) - min(y_true))
mae = accuracy.mae(predictions)

# Stampa i risultati
print(f'Precision:',precision)
print(f'Recall:',recall)
print(f'F1 Score:',f1)
print(f'NMAE:',nmae)
print(f'MAE:',mae)

[4, 1, 5, 2, 3, 5, 3, 4, 5, 5, 3, 5, 4, 4, 3, 4, 3, 2, 3, 2, 4, 3, 3, 2, 4, 3, 3, 3, 2, 5, 5, 4, 5, 5, 4, 5, 3, 2, 3, 5, 1, 4, 2, 1, 5, 4, 1, 2, 3, 5, 5, 5, 5, 4, 1, 1, 4, 3, 4, 5, 1, 4, 3, 4, 3, 2, 2, 4, 4, 2, 3, 4, 4, 1, 4, 5, 5, 4, 2, 5, 2, 3, 3, 3, 5, 5, 3, 4, 5, 3, 5, 4, 2, 5, 5, 4, 4, 4, 4, 2, 5, 5, 1, 4, 3, 4, 4, 1, 4, 3, 4, 5, 3, 4, 5, 2, 3, 4, 1, 2, 1, 4, 4, 4, 2, 5, 5, 5, 1, 4, 3, 3, 2, 3, 1, 1, 4, 2, 4, 5, 1, 4, 1, 4, 5, 3, 4, 3, 5, 1, 3, 4, 3, 3, 3, 3, 4, 1, 2, 1, 3, 5, 5, 3, 2, 5, 1, 4, 4, 5, 2, 4, 3, 1, 2, 3, 3, 1, 4, 5, 1, 3, 5, 2, 5, 3, 4, 1, 4, 5, 4, 3, 1, 4, 3, 4, 3, 4, 1, 5, 3, 3, 4, 3, 4, 5, 2, 4, 1, 4, 2, 4, 5, 2, 3, 2, 4, 2, 4, 4, 4, 1, 2, 2, 4, 3, 5, 5, 4, 5, 4, 4, 3, 2, 4, 2, 5, 4, 1, 4, 4, 2, 3, 4, 3, 5, 1, 5, 3, 2, 4, 3, 1, 4, 1, 4, 3, 2, 2, 5, 4, 3, 1, 4, 2, 1, 3, 2, 5, 1, 5, 4, 4, 3, 5, 5, 1, 4, 4, 4, 3, 4, 3, 1, 4, 3, 3, 5, 2, 5, 3, 2, 1, 5, 2, 4, 4, 1, 5, 3, 4, 3, 3, 2, 3, 4, 5, 3, 4, 3, 3, 4, 2, 3, 2, 2, 3, 1, 4, 4, 3, 3, 1, 2, 4, 5, 4, 2, 1, 5, 3, 4, 3, 

Probabilistic approch : Classificatore bayesiano 