In [1]:
import numpy as np
import pandas as pd

In [2]:
from similarity import pearson_similarity
from predict import prediction_function
from group import average_aggregation, least_misery_aggregation, group_recommendation

In [3]:
# Constants
MAX_NEIGHBORS = 50

In [4]:
ratings, movies = pd.read_csv('./datasets/ratings.csv'), pd.read_csv('./datasets/movies.csv')

In [5]:
user_ids = ratings['userId'].unique().tolist()
movie_ids = movies['movieId'].unique().tolist()

matrix = pd.DataFrame(index=user_ids, columns=movie_ids, dtype=np.float32)

for i in range(len(ratings)):
    user_id, movie_id, rating = ratings.iloc[i]['userId'], ratings.iloc[i]['movieId'], ratings.iloc[i]['rating']
    matrix.at[user_id, movie_id] = rating

print("Matrix Shape:", matrix.shape)

Matrix Shape: (610, 9742)


In [6]:
movie_map = pd.DataFrame(data=movies['title'].values, index=movies['movieId'].values, columns=['title'])
movie_map.head()

Unnamed: 0,title
1,Toy Story (1995)
2,Jumanji (1995)
3,Grumpier Old Men (1995)
4,Waiting to Exhale (1995)
5,Father of the Bride Part II (1995)


In [7]:
# group = np.random.randint(low=0, high=len(user_ids)+1, size=3).tolist()
group = [11, 23, 249]
print(group)
other_users = [u for u in user_ids if u not in group]
len(other_users)

[11, 23, 249]


607

In [8]:
list_of_similarities = list()
for user in group:
    list_of_similarities.append(dict())

In [9]:
for u in other_users:
    for dictionary, input_user in zip(list_of_similarities, group):
        dictionary[u] = pearson_similarity(matrix, input_user, u)

In [10]:
for i in range(0, len(list_of_similarities)):
    similarities = list_of_similarities.pop(i)
    similarities = {k: v for k, v in sorted(similarities.items(), key=lambda item: item[1], reverse=True)}
    similarities = dict(list(similarities.items())[:MAX_NEIGHBORS])
    list_of_similarities.insert(i, similarities)

In [11]:
list_of_scores = list()

In [12]:
for i in range(0, len(group)):
    scores = prediction_function(matrix, group[i], list_of_similarities[i], matrix.shape[1])
    list_of_scores.append(scores)

In [13]:
avg_matrix = average_aggregation(list_of_scores)
lm_matrix = least_misery_aggregation(list_of_scores)

In [14]:
recs_avg = group_recommendation(avg_matrix, movie_map, matrix.shape[1])
recs_lm = group_recommendation(lm_matrix, movie_map, matrix.shape[1])

In [15]:
recs_avg

{'Jaws (1975)': 5.002593631502407,
 "Singin' in the Rain (1952)": 4.988163839736959,
 'Traffic (2000)': 4.978331754121855,
 'Tangled (2010)': 4.86328540018651,
 'Raising Arizona (1987)': 4.857593356690523,
 'Pinocchio (1940)': 4.83803322126573,
 'Young Frankenstein (1974)': 4.797524308264357,
 'Dead Poets Society (1989)': 4.772005082863739,
 'Bridge on the River Kwai, The (1957)': 4.76806817770367,
 'Close Encounters of the Third Kind (1977)': 4.7535659699244945,
 'True Romance (1993)': 4.739011948259553,
 'Adaptation (2002)': 4.731881007272062,
 'Ice Storm, The (1997)': 4.7166729604904996,
 '50 First Dates (2004)': 4.704493009789162,
 'Little Shop of Horrors (1986)': 4.702746536393627,
 'Insider, The (1999)': 4.701932768929688,
 'I Am Sam (2001)': 4.66156593940462,
 'October Sky (1999)': 4.639518900048125,
 'Cool Hand Luke (1967)': 4.632389415948845,
 'Wallace & Gromit: The Wrong Trousers (1993)': 4.624826694899039,
 'Misery (1990)': 4.608267644102554,
 'Gods Must Be Crazy, The (1980)

In [16]:
recs_lm

{'Traffic (2000)': 4.852678537368774,
 "Singin' in the Rain (1952)": 4.683098554611206,
 'Tangled (2010)': 4.577168037305687,
 'Wallace & Gromit: The Wrong Trousers (1993)': 4.553605538277057,
 'True Romance (1993)': 4.527354510512985,
 '50 First Dates (2004)': 4.522186707799054,
 'Insider, The (1999)': 4.499317469920778,
 'Dead Poets Society (1989)': 4.4742403081004944,
 'Dead Alive (Braindead) (1992)': 4.471341133117676,
 'Day of the Dead (1985)': 4.471341133117676,
 'Dawn of the Dead (2004)': 4.471341133117676,
 'Dawn of the Dead (1978)': 4.471341133117676,
 'Ice Storm, The (1997)': 4.470008562715397,
 'Moulin Rouge (2001)': 4.392952866215042,
 'Gods Must Be Crazy, The (1980)': 4.389501094818115,
 'Young Frankenstein (1974)': 4.364860127919173,
 'Big Chill, The (1983)': 4.2737603187561035,
 'Awakenings (1990)': 4.2737603187561035,
 'Misery (1990)': 4.240065266799923,
 'Devil Wears Prada, The (2006)': 4.219556800032081,
 'Raising Arizona (1987)': 4.213068246841431,
 'Beautiful Mind, 

In [17]:
# Definire un'altra disciplina di aggregazione delle preferenze

In [18]:
# Valutare (con Kendall-Tau) la fairness delle raccomandazioni