In [1]:
import numpy as np
import pandas as pd

In [2]:
from similarity import pearson_similarity
from predict import prediction_function
from group import average_aggregation, least_misery_aggregation, group_recommendation

In [3]:
# Constants
MAX_NEIGHBORS = 50

In [4]:
ratings, movies = pd.read_csv('./datasets/ratings.csv'), pd.read_csv('./datasets/movies.csv')

In [5]:
user_ids = ratings['userId'].unique().tolist()
movie_ids = movies['movieId'].unique().tolist()

matrix = pd.DataFrame(index=user_ids, columns=movie_ids, dtype=np.float32)

for i in range(len(ratings)):
    user_id, movie_id, rating = ratings.iloc[i]['userId'], ratings.iloc[i]['movieId'], ratings.iloc[i]['rating']
    matrix.at[user_id, movie_id] = rating

print("Matrix Shape:", matrix.shape)

Matrix Shape: (610, 9742)


In [6]:
movie_map = pd.DataFrame(data=movies['title'].values, index=movies['movieId'].values, columns=['title'])
movie_map.head()

Unnamed: 0,title
1,Toy Story (1995)
2,Jumanji (1995)
3,Grumpier Old Men (1995)
4,Waiting to Exhale (1995)
5,Father of the Bride Part II (1995)


In [7]:
# group = np.random.randint(low=0, high=len(user_ids)+1, size=3).tolist()
group = [11, 23, 249]
print(group)
other_users = [u for u in user_ids if u not in group]
len(other_users)

[11, 23, 249]


607

In [8]:
list_of_similarities = list()
for user in group:
    list_of_similarities.append(dict())

In [9]:
for u in other_users:
    for dictionary, input_user in zip(list_of_similarities, group):
        dictionary[u] = pearson_similarity(matrix, input_user, u)

In [10]:
for i in range(0, len(list_of_similarities)):
    similarities = list_of_similarities.pop(i)
    similarities = {k: v for k, v in sorted(similarities.items(), key=lambda item: item[1], reverse=True)}
    similarities = dict(list(similarities.items())[:MAX_NEIGHBORS])
    list_of_similarities.insert(i, similarities)

In [11]:
list_of_scores = list()

In [12]:
for i in range(0, len(group)):
    scores = prediction_function(matrix, group[i], list_of_similarities[i], matrix.shape[1])
    list_of_scores.append(scores)

In [13]:
avg_matrix = average_aggregation(list_of_scores)
lm_matrix = least_misery_aggregation(list_of_scores)

In [14]:
recs_avg = group_recommendation(avg_matrix, movie_map, matrix.shape[1])
recs_lm = group_recommendation(lm_matrix, movie_map, matrix.shape[1])

In [20]:
i = 0
for k, v in recs_avg.items():
    if i == 10:
        break
    else:
        print(f"Movie: {k} -> Score: {v:.5f}")
        i += 1

Movie: Jaws (1975) -> Score: 5.00259
Movie: Singin' in the Rain (1952) -> Score: 4.98816
Movie: Traffic (2000) -> Score: 4.97833
Movie: Tangled (2010) -> Score: 4.86329
Movie: Raising Arizona (1987) -> Score: 4.85759
Movie: Pinocchio (1940) -> Score: 4.83803
Movie: Young Frankenstein (1974) -> Score: 4.79752
Movie: Dead Poets Society (1989) -> Score: 4.77201
Movie: Bridge on the River Kwai, The (1957) -> Score: 4.76807
Movie: Close Encounters of the Third Kind (1977) -> Score: 4.75357


In [21]:
i = 0
for k, v in recs_lm.items():
    if i == 10:
        break
    else:
        print(f"Movie: {k} -> Score: {v:.5f}")
        i += 1

Movie: Traffic (2000) -> Score: 4.85268
Movie: Singin' in the Rain (1952) -> Score: 4.68310
Movie: Tangled (2010) -> Score: 4.57717
Movie: Wallace & Gromit: The Wrong Trousers (1993) -> Score: 4.55361
Movie: True Romance (1993) -> Score: 4.52735
Movie: 50 First Dates (2004) -> Score: 4.52219
Movie: Insider, The (1999) -> Score: 4.49932
Movie: Dead Poets Society (1989) -> Score: 4.47424
Movie: Dead Alive (Braindead) (1992) -> Score: 4.47134
Movie: Day of the Dead (1985) -> Score: 4.47134


In [17]:
# Definire un'altra disciplina di aggregazione delle preferenze

In [18]:
# Valutare (con Kendall-Tau) la fairness delle raccomandazioni