In [1]:
import json

with open("train.json") as training:
    training = json.loads(training.read())
    
with open("test.json") as testing:
    testing = json.loads(testing.read())

In [2]:
training_dict = {}
testing_dict = {}

for user in training.keys():
    for [item, rating] in training[user]:
        if user in training_dict:
            training_dict[user][item] = rating
        else:
            training_dict[user] = {item: rating}
            
for user in testing.keys():
    for [item, rating] in testing[user]:
        if user in testing_dict:
            testing_dict[user][item] = rating
        else:
            testing_dict[user] = {item: rating}

# print(training_dict['A12R54MKO17TW0'])

In [3]:
def sub_row_mean(ratings_dict):
    sub_row_mean_ratings_dict = {}
    for user in ratings_dict:
        list_item = list(ratings_dict[user].values())
        avg = sum(list_item) / len(list_item)
        for item in ratings_dict[user].keys():
            if user in sub_row_mean_ratings_dict:
                sub_row_mean_ratings_dict[user][item] = ratings_dict[user][item] - avg
            else:
                sub_row_mean_ratings_dict[user] = {item: ratings_dict[user][item] - avg}
    
    return sub_row_mean_ratings_dict
        
# training_dict2 = sub_row_mean(training_dict)
# print(training_dict2['A12R54MKO17TW0'])
# print(training_dict['A12R54MKO17TW0'])

In [4]:
import numpy as np
from numpy.linalg import norm
import math

def cosine_sim(ratings_dict, user):
    cosine_sim_val = {}

    for other_user in ratings_dict.keys():
        dot_prod = sum(ratings_dict[user][item]*ratings_dict[other_user].get(item, 0) for item in ratings_dict[user])
        user_norm = np.linalg.norm(np.array([rating for rating in ratings_dict[user].values()]))
        other_user_norm = np.linalg.norm(np.array([rating for rating in ratings_dict[other_user].values()]))
        norm_mul = user_norm * other_user_norm
        if norm_mul == 0:
            cos_sim = 0
        else:
            cos_sim = dot_prod / norm_mul
        if math.isnan(cos_sim):
            cos_sim = 0
        cosine_sim_val[other_user] = cos_sim
    
        if cosine_sim_val[other_user] > 1:
            cosine_sim_val[other_user] = 1
        if cosine_sim_val[other_user] < 0:

            cosine_sim_val[other_user] = 0        
    
    return cosine_sim_val

In [5]:

def predict_item_rating_avg(ratings_dict, cos_similarities_dict, k, item, user):

    cos_similarities_dict.pop(user)
    other_users = list(cos_similarities_dict.keys())
    for other_user in other_users:
        if item not in ratings_dict[other_user]:
            cos_similarities_dict.pop(other_user)
    
    list_cos_sim = sorted(cos_similarities_dict.items(), key=lambda x: x[1], reverse=True)[:k]

    avg_rating = 0
    
    if len(list_cos_sim) != 0 and list_cos_sim[0][1] != 0:
        for pair in list_cos_sim:
            avg_rating += ratings_dict[pair[0]][item]
        
        avg_rating = avg_rating / len(list_cos_sim)
    else:
        total = 0
        rating = 0
        for rat in training_dict[user]:
            rating += training_dict[user][rat]
            total += 1
        rating = rating / total
        return rating
    
    return avg_rating


    
    

In [6]:


def predict_item_rating_sim(ratings_dict, cos_similarities_dict, k, item, user):

    if user in cos_similarities_dict:
        cos_similarities_dict.pop(user)
    other_users = list(cos_similarities_dict.keys())
    for other_user in other_users:
        if item not in ratings_dict[other_user]:
            cos_similarities_dict.pop(other_user)
    
    list_cos_sim = sorted(cos_similarities_dict.items(), key=lambda x: x[1], reverse=True)[:k]
    rating = 0
    similarities = 0
    
    if len(list_cos_sim) != 0 and list_cos_sim[0][1] != 0:
        for pair in list_cos_sim:
            rating += ratings_dict[pair[0]][item] * cos_similarities_dict[pair[0]]
            similarities += cos_similarities_dict[pair[0]]
        rating = rating / similarities
    else:
        total = 0
        rating = 0
        for rat in training_dict[user]:
            rating += training_dict[user][rat]
            total += 1
        rating = rating / total
        
    if rating > 5.1:
        print("Flag")
        print("")
        rating = 4.99
    return rating

  
    

In [7]:
def predict(ratings_dict, user, items):
    predicted_ratings_dict = {}
    sub_mean_dict = sub_row_mean(ratings_dict)
    cos_sim_dict = cosine_sim(sub_mean_dict, user)
    for item in items:
        avg_rating_prediction = predict_item_rating_avg(training_dict, cos_sim_dict.copy(), k, item, user)
        sim_rating_prediction = predict_item_rating_sim(training_dict, cos_sim_dict.copy(), k, item, user)
        
        predicted_ratings_dict[item] = (avg_rating_prediction, sim_rating_prediction)
        
    return predicted_ratings_dict

In [7]:
def first_one_in_dict(dict_first_one):
    count = 0
    for item,rating in dict_first_one.items():
        if rating == 1:
            return count
        else:
            count+=1
    return -1


In [7]:
def ndcg(recommended_items_ratings_avg, recommended_items_ratings_sim, user):
    orig_reviewed_items = (sorted(testing_dict[user].items(), key=lambda x: x[1], reverse=True)).keys()
    orig_reviewed_items_hit_avg = {}
    orig_reviewed_items_hit_sim = {}
    
    for item in orig_reviewed_items:
        orig_reviewed_items_hit_avg[item] = 0
        orig_reviewed_items_hit_sim[item] = 0
    
    recommended_items_avg = []#recommended_items_ratings_avg.keys()
    recommended_items_sim = []#recommended_items_ratings_sim.keys()

    for item in recommended_items_ratings_avg:
        recommended_items_avg.append(item[0])

    for item in recommended_items_ratings_sim:
        recommended_items_sim.append(item[0])

    same_avg_items = 0
    same_sim_items = 0

    for item in recommended_items_avg:
        if item in orig_reviewed_items:
            same_avg_items += 1
            orig_reviewed_items_hit_avg[item] = 1
            
            

    for item in recommended_items_sim:
        if item in orig_reviewed_items:
            same_sim_items += 1
            orig_reviewed_items_hit_sim[item] = 1
            
    dcg_avg = 0
    idcg_avg = 0
    ndcg_avg = 0
    if same_avg_items == 0:
        ndcg_avg = 0
    else:
        first_one = first_one_in_dict(orig_reviewed_items_hit_avg)
        for i in range(same_avg_items):
            dcg_avg += (1/math.log(1 + first_one + i))
            idcg_avg += (1/math.log(1 + i))
            
        ndcg_avg = dcg_avg/idcg_avg
        
        
    dcg_sim = 0
    idcg_sim = 0
    ndcg_sim = 0
    if same_sim_items == 0:
        ndcg_sim = 0
    else:
        first_one = first_one_in_dict(orig_reviewed_items_hit_sim)
        for i in range(same_sim_items):
            dcg_sim += (1/math.log(1 + first_one + i))
            idcg_sim += (1/math.log(1 + i))
            
        ndcg_sim = dcg_sim/idcg_sim
    print('NDCG Avg: ')
    print(ndcg_avg)
    print('NDCG Sim: ')
    print(ndcg_sim)
    #return (ndcg_avg, ndcg_sim)     
    


In [8]:
def scores(recommended_items_ratings_avg, recommended_items_ratings_sim, user):

#     recommended_items_ratings_avg = recommended_items_avg
#     recommended_items_ratings_sim = recommended_items_avg

    print((user, recommended_items_ratings_avg))
    print((user, recommended_items_ratings_sim))

    orig_reviewed_items = testing_dict[user].keys()
    recommended_items_avg = []#recommended_items_ratings_avg.keys()
    recommended_items_sim = []#recommended_items_ratings_sim.keys()
    recommended_items_pair_avg = {}#recommended_items_ratings_avg.keys()
    recommended_items_pair_sim = {}#recommended_items_ratings_sim.keys()
    

    for item in recommended_items_ratings_avg:
        recommended_items_avg.append(item[0])
        recommended_items_pair_avg[item[0]] = 0 
        

    for item in recommended_items_ratings_sim:
        recommended_items_sim.append(item[0])
        recommended_items_pair_sim[item[0]] = 0 

    same_avg_items = 0
    same_sim_items = 0

    for item in recommended_items_avg:
        if item in orig_reviewed_items:
            same_avg_items += 1
            recommended_items_pair_avg[item] = 1

    for item in recommended_items_sim:
        if item in orig_reviewed_items:
            same_sim_items += 1
            recommended_items_pair_sim[item] = 1

    precision_avg = (same_avg_items/k) *100
    precision_sim = (same_sim_items/k) * 100

    print("Precision Avg: ")
    print(precision_avg)
    print('Precision Sim: ')
    print(precision_sim)


    recall_avg = (same_avg_items/len(orig_reviewed_items)) *100
    recall_sim = (same_sim_items/len(orig_reviewed_items)) * 100


    print(recall_avg)
    print(recall_sim)
    if precision_avg+recall_avg == 0:
        f_score_avg = 0
    else:
        f_score_avg = (2*precision_avg*recall_avg)/(precision_avg+recall_avg)

    if precision_sim+recall_sim == 0:
        f_score_sim = 0
    else:
        f_score_sim = (2*precision_sim*recall_sim)/(precision_sim+recall_sim)

    print('F_Score Avg: ')
    print(f_score_avg)
    print('F_Score Sim: ')
    print(f_score_sim)
    print(testing_dict[user])
    
    
    #ndcg(recommended_items_ratings_avg, recommended_items_ratings_sim, user)
    
    dcg_avg = 0
    idcg_avg = 0
    ndcg_avg = 0
    if same_avg_items != 0:
        first_one = first_one_in_dict(recommended_items_pair_avg)
        for i in range(same_avg_items):
            dcg_avg += (1/math.log2(1 + first_one + i))
            idcg_avg += (1/math.log2(1 + i))
            
        ndcg_avg = dcg_avg/idcg_avg
        
        
    dcg_sim = 0
    idcg_sim = 0
    ndcg_sim = 0
    if same_sim_items != 0:
        first_one = first_one_in_dict(recommended_items_pair_sim)
        for i in range(same_sim_items):
            dcg_sim += (1/math.log2(1 + first_one + i))
            idcg_sim += (1/math.log2(1 + i))
            
        ndcg_sim = dcg_sim/idcg_sim
    print('NDCG Avg: ')
    print(ndcg_avg)
    print('NDCG Sim: ')
    print(ndcg_sim)
    #return (ndcg_avg, ndcg_sim)     
    

    
    
    
    
    
    
    
    

In [9]:
all_items = set()
for user,list_ratings in training_dict.items():
    all_items.update(list(training_dict[user].keys()))

for user,list_ratings in testing_dict.items():
    all_items.update(list(testing_dict[user].keys()))
#print(len(all_items))

In [10]:
top_n_testing_dict = dict(sorted(testing_dict.items(), key=lambda x: len(x[1]), reverse=True)[:100])

In [11]:
#top_n_testing_dict

In [12]:
import time

k = 10
# item = ["B0048P1XXQ"]
#user = "A12R54MKO17TW0"


start = time.time()
i = 0
j = 0

for user, list_ratings in top_n_testing_dict.items():
    i += 1
    predicted_vals = predict(training_dict, user, list(all_items))

    orig_vals = training_dict[user]

    for item,rating in orig_vals.items():

        predicted_vals.pop(item)
        
    print('User Count: ' + str(i))
    print(time.time() - start)


    recommended_items_ratings_avg = sorted(predicted_vals.items(), key=lambda x: x[1][0], reverse=True)[:k]
    recommended_items_ratings_sim = sorted(predicted_vals.items(), key=lambda x: x[1][1], reverse=True)[:k]

    scores(recommended_items_ratings_avg, recommended_items_ratings_sim, user)
    





end = time.time()

print(end - start) #=> 0.0011411383748054504

User Count: 1
324.7139480113983
('A3W4D8XOGLWUN5', [('B000TEP8MM', (5.0, 3.2850467289719627)), ('B000T064EC', (5.0, 3.2850467289719627)), ('B001VFUMII', (5.0, 5.0)), ('B00FPX585Y', (5.0, 3.2850467289719627)), ('B0042UE2DU', (5.0, 5.0)), ('B0184WM2AG', (5.0, 3.2850467289719627)), ('B000W08IM0', (5.0, 5.0)), ('B00M1WRFGW', (5.0, 3.2850467289719627)), ('B00R8JSVPM', (5.0, 3.2850467289719627)), ('B002YRZ2C8', (5.0, 5.0))])
('A3W4D8XOGLWUN5', [('B001VFUMII', (5.0, 5.0)), ('B0042UE2DU', (5.0, 5.0)), ('B00DE0RN5I', (4.5, 5.0)), ('B000W08IM0', (5.0, 5.0)), ('B002YRZ2C8', (5.0, 5.0)), ('B00367S8C6', (5.0, 5.0)), ('B003ALJM0A', (5.0, 5.0)), ('B000W0YZ9U', (4.7, 5.0)), ('B001Q1O4F4', (5.0, 5.0)), ('B005KG3KYC', (3.6666666666666665, 5.0))])
Precision Avg: 
0.0
Precision Sim: 
0.0
0.0
0.0
F_Score Avg: 
0
F_Score Sim: 
0
{'B00A7299JY': 4.0, 'B000WLH8PW': 3.0, 'B00137GJZO': 4.0, 'B00JBTM2JS': 4.0, 'B007IR7RWC': 3.0, 'B00137KS2O': 4.0, 'B01CEZ5HGU': 3.0, 'B00EQRPB98': 3.0, 'B00EW0VRQ0': 2.0, 'B00136NM