In [1]:
import numpy as np
import pandas as pd

from helper.loader import load_ratings, load_movies, load_lists, load_500_1000_corr
from helper.preprocesser import preprocess_ratings, prepare_test_data_dense
from helper.application import application
import helper.collaborative as coll
import helper.showResults as sh

from tqdm import tqdm

# Performance

Notebook to check the performance of our different models by predicting ratings which we manually removed to have a ground truth

In [2]:
ratings = load_ratings()
movies = load_movies()
lists = load_lists()

In [3]:
ratings_new, lists_new = preprocess_ratings(ratings, lists, 500,1000)
dense_user_item = coll.get_dense_user_item(ratings_new)
popu_matrix = coll.get_popularity(lists_new, dense_user_item)

average_ratings = coll.compute_average_ratings(dense_user_item)

In [4]:
sum(dense_user_item.count())

3188952

starting with only 1/1000 of data missing, this represent 3188 ratings to predict

In [17]:
train_data,train_label = prepare_test_data_dense(dense_user_item,1/1000)

100%|██████████| 3188/3188 [00:00<00:00, 13305.83it/s]


In [6]:
#train_label

{(632, 1141): 3.0,
 (1345, 146): 4.0,
 (260, 2554): 4.0,
 (1195, 238): 4.0,
 (3513, 2417): 2.0,
 (2615, 2722): 3.0,
 (2111, 1575): 3.0,
 (2932, 2096): 4.0,
 (3697, 1494): 3.0,
 (2862, 936): 3.0,
 (881, 2428): 4.0,
 (700, 405): 4.0,
 (864, 267): 4.0,
 (2442, 1984): 3.0,
 (771, 1375): 3.0,
 (2318, 1726): 3.0,
 (3112, 1333): 4.0,
 (3899, 1801): 2.0,
 (1285, 2040): 3.0,
 (1690, 1427): 4.0,
 (3387, 1025): 4.0,
 (792, 1228): 4.0,
 (524, 2704): 3.0,
 (2884, 67): 5.0,
 (132, 2322): 3.0,
 (2228, 2111): 3.0,
 (3338, 86): 4.0,
 (2655, 1442): 2.0,
 (63, 620): 5.0,
 (1653, 1630): 3.0,
 (863, 246): 3.0,
 (1019, 2): 5.0,
 (977, 9): 4.0,
 (1774, 107): 4.0,
 (1593, 110): 4.0,
 (857, 1225): 5.0,
 (3233, 75): 2.0,
 (1185, 904): 5.0,
 (2548, 1064): 5.0,
 (2643, 1765): 1.0,
 (1207, 443): 4.0,
 (3823, 2502): 5.0,
 (3521, 707): 5.0,
 (1076, 2208): 3.0,
 (3401, 177): 3.0,
 (474, 564): 4.0,
 (2011, 2533): 3.0,
 (1577, 586): 3.0,
 (1389, 2138): 3.0,
 (3404, 369): 3.0,
 (1827, 2427): 2.0,
 (3193, 2713): 2.0,
 (1

In [10]:
#dense_user_item

Unnamed: 0,10,12,13,21,30,44,50,55,61,75,...,229011,232146,233827,234212,234623,234658,235002,235016,241947,243910
2941,,,,,,,,,,5.0,...,,,4.0,,,,,,,
15344,,,,,,,,3.0,,4.0,...,,,,,,,,,,
29237,,,4.0,,4.0,,,3.0,,5.0,...,,5.0,,,,,,,,
78410,,,,5.0,4.0,3.0,,,4.0,5.0,...,,,,,,,,,,
83575,5.0,,,,,,,,5.0,5.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99845748,4.0,,,,,,,,,,...,,,,,,,,,,
99869481,,,,,,,,2.0,4.0,5.0,...,,,,,,,,,,
99909118,,4.0,5.0,,,,5.0,,,4.0,...,,,,,,,,,,
99927639,,3.0,,,,,4.0,2.0,,5.0,...,,,,,,,,,,


In [9]:
#key1,key2 = list(train_label.keys())[0]


632

In [13]:
#dense_user_item.columns[key2]

2845

In [18]:
# Each removed ratings is for a new user -> will have to compute a lot of prediction, maybe something cleaner but didn't see

classic_norm_SSE = 0
classic_SSE = 0
hybrid_norm_SSE = 0
hybrid_SSE = 0
pop_norm_SSE = 0
pop_SSE = 0

rounded_classic_norm_error = 0
rounded_classic_error = 0
rounded_hybrid_norm_error = 0
rounded_hybrid_error = 0
rounded_pop_norm_error = 0
rounded_pop_error = 0


for key1,key2 in tqdm(train_label.keys()):

    user = train_data.index[key1]
    item = train_data.columns[key2]

    similarity = coll.get_k_dynamic_similar_users(train_data,user,40)
    popularity = coll.get_k_popularity(popu_matrix,similarity)

    ##################
    weight_classic = similarity
    
    pred_norm = coll.predict_value_norm(user,item,weight_classic,train_data,average_ratings)
    if np.isnan(pred_norm):
        pred_norm = 2.5
    pred = coll.predict_value(user,item,weight_classic,train_data,average_ratings)
    if np.isnan(pred):
        pred = 2.5

    classic_norm_SSE += (pred_norm - train_label[(key1,key2)])**2
    classic_SSE += (pred - train_label[(key1,key2)])**2

    rounded_classic_norm_error += abs(round(pred_norm) - train_label[(key1,key2)])
    rounded_classic_error += abs(round(pred) - train_label[(key1,key2)])

    ##################
    hybrid = similarity.loc[user] + popularity.loc["popularity"]
    weight_hybrid = hybrid.to_frame().transpose() 

    pred_norm = coll.predict_value_norm(user,item,weight_hybrid,train_data,average_ratings)
    if np.isnan(pred_norm):
        pred_norm = 2.5
    pred = coll.predict_value(user,item,weight_hybrid,train_data,average_ratings)
    if np.isnan(pred):
        pred = 2.5

    hybrid_norm_SSE += (pred_norm - train_label[(key1,key2)])**2
    hybrid_SSE += (pred - train_label[(key1,key2)])**2

    rounded_hybrid_norm_error += abs(round(pred_norm) - train_label[(key1,key2)])
    rounded_hybrid_error += abs(round(pred) - train_label[(key1,key2)])

    ##################
    weight_pop = popularity

    pred_norm = coll.predict_value_norm(user,item,weight_pop,train_data,average_ratings)
    if np.isnan(pred_norm):
        pred_norm = 2.5
    pred = coll.predict_value(user,item,weight_pop,train_data,average_ratings)
    if np.isnan(pred):
        pred = 2.5
        
    pop_norm_SSE += (pred_norm - train_label[(key1,key2)])**2
    pop_SSE += (pred - train_label[(key1,key2)])**2

    rounded_pop_norm_error += abs(round(pred_norm) - train_label[(key1,key2)])
    rounded_pop_error += abs(round(pred) - train_label[(key1,key2)])
    ##################



100%|██████████| 3188/3188 [37:16<00:00,  1.43it/s]


In [19]:
nbr = len(train_label)

print("For classical model: ")
print("With normalization: average SSE = {}, average rounded diff = {}".format(classic_norm_SSE/nbr,rounded_classic_norm_error/nbr))
print("Without normalization: average SSE = {}, average rounded diff = {}".format(classic_SSE/nbr,rounded_classic_error/nbr))

print("For Hybrid model: ")
print("With normalization: average SSE = {}, average rounded diff = {}".format(hybrid_norm_SSE/nbr,rounded_hybrid_norm_error/nbr))
print("Without normalization: average SSE = {}, average rounded diff = {}".format(hybrid_SSE/nbr,rounded_hybrid_error/nbr))

print("For Popularity only based model: ")
print("With normalization: average SSE = {}, average rounded diff = {}".format(pop_norm_SSE/nbr,rounded_pop_norm_error/nbr))
print("Without normalization: average SSE = {}, average rounded diff = {}".format(pop_SSE/nbr,rounded_pop_error/nbr))

For classical model: 
With normalization: average SSE = 0.698974102466047, average rounded diff = 0.6195106649937264
Without normalization: average SSE = 0.7849902717628705, average rounded diff = 0.6543287327478042
For Hybrid model: 
With normalization: average SSE = 0.7055187371775984, average rounded diff = 0.6213927227101631
Without normalization: average SSE = 0.7954297948509171, average rounded diff = 0.661543287327478
For Popularity only based model: 
With normalization: average SSE = 0.9005948464913549, average rounded diff = 0.7076537013801757
Without normalization: average SSE = 1.042836190160484, average rounded diff = 0.7688205771643664


Plotting everything for 10 removed ratings just to see

In [12]:
train_data_2,train_label_2 = prepare_test_data_dense(dense_user_item,1/300000)

100%|██████████| 10/10 [00:00<00:00, 9414.82it/s]


In [15]:
for key1,key2 in train_label_2.keys():

    user = train_data_2.index[key1]
    item = train_data_2.columns[key2]

    similarity = coll.get_k_dynamic_similar_users(train_data_2,user,40)
    popularity = coll.get_k_popularity(popu_matrix,similarity)

    ##################
    weight_classic = similarity
    
    cla_pred_norm = coll.predict_value_norm(user,item,weight_classic,train_data_2,average_ratings)
    cla_pred = coll.predict_value(user,item,weight_classic,train_data_2,average_ratings)

    ##################
    hybrid = similarity.loc[user] + popularity.loc["popularity"]
    weight_hybrid = hybrid.to_frame().transpose() 

    hyb_pred_norm = coll.predict_value_norm(user,item,weight_hybrid,train_data_2,average_ratings)
    hyb_pred = coll.predict_value(user,item,weight_hybrid,train_data_2,average_ratings)

    ##################
    weight_pop = popularity

    pop_pred_norm = coll.predict_value_norm(user,item,weight_pop,train_data_2,average_ratings)
    pop_pred = coll.predict_value(user,item,weight_pop,train_data_2,average_ratings)
    ##################

    print("For user {} and movie {}, the true ratings was {}".format(user,item,train_label_2[(key1,key2)]))
    print("The classical model predicted {} with normalization and {} without".format(cla_pred_norm,cla_pred))
    print("The hybrid model predicted {} with normalization and {} without".format(hyb_pred_norm,hyb_pred))
    print("The popularity model predicted {} with normalization and {} without".format(pop_pred_norm,pop_pred))
    print("################################")

For user 70929901 and movie 2083, the true ratings was 4.0
The classical model predicted 4.366124772998244 with normalization and 4.08230020935102 without
The hybrid model predicted 4.334833886793348 with normalization and 4.036882293536396 without
The popularity model predicted 3.4741589719285715 with normalization and 2.7876348107032083 without
################################
For user 10040458 and movie 402, the true ratings was 4.0
The classical model predicted 5 with normalization and 4.237505533225593 without
The hybrid model predicted 4.889349607306551 with normalization and 4.234722687670517 without
The popularity model predicted 4.579815126019368 with normalization and 4.227625664078463 without
################################
For user 33246005 and movie 4124, the true ratings was 3.0
The classical model predicted 2.2888367633349596 with normalization and 2.584491556826118 without
The hybrid model predicted 2.2888367633349596 with normalization and 2.584491556826118 without
Th