In [1]:
import numpy as np
import pandas as pd

from preprocessing import get_matrix, get_movie_map
from similarity import pearson_similarity, weighted_pearson_similarity, norm_weights
from predict import recommend_movies

In [2]:
MAX_NEIGHBORS, MAX_RECOMMENDATIONS = 10, 10

In [3]:
# Dataset Preprocessing
ratings, movies = pd.read_csv('./datasets/ratings.csv'), pd.read_csv('./datasets/movies.csv')

matrix, movie_map = get_matrix(ratings, movies), get_movie_map(movies)

In [4]:
input_user = np.random.randint(low=1, high=matrix.shape[0]+1)
other_users = [u for u in matrix.index.tolist() if u != input_user]

print("Input User =", input_user)

Input User = 4


# First Execution

In [5]:
# Similarity Computation
similarities = dict()
for u in other_users:
    similarities[u] = pearson_similarity(matrix, input_user, u)

similarities = {k: v for k, v in sorted(similarities.items(), key=lambda item: item[1], reverse=True)}
similarities = dict(list(similarities.items())[:MAX_NEIGHBORS])

In [6]:
# Recommendations
recommendations = recommend_movies(matrix, input_user, similarities, max_recommendations=MAX_RECOMMENDATIONS, movie_map=movie_map)

In [7]:
df_recommendations = pd.DataFrame(columns=['Movie', 'Score'])
for movie, score in recommendations.items():
    df_recommendations.loc[len(df_recommendations)] = [movie, score]

display(df_recommendations)

Unnamed: 0,Movie,Score
0,True Romance (1993),5.830556
1,Léon: The Professional (a.k.a. The Professiona...,5.412699
2,Cool Hand Luke (1967),5.412699
3,"Boot, Das (Boat, The) (1981)",5.330556
4,Payback (1999),5.132479
5,Finding Nemo (2003),4.912698
6,Hoop Dreams (1994),4.84127
7,"Thin Blue Line, The (1988)",4.84127
8,"Sting, The (1973)",4.830556
9,Young Frankenstein (1974),4.830556


# Second Execution

In [8]:
# Similarities
norm_scores = norm_weights(input_user, matrix)

weighted_similarities = dict()
for u in other_users:
    weighted_similarities[u] = weighted_pearson_similarity(matrix, input_user, u, norm_scores.get(u))

weighted_similarities = {k: v for k, v in sorted(weighted_similarities.items(), key=lambda item: item[1], reverse=True)}
weighted_similarities = dict(list(weighted_similarities.items())[:MAX_NEIGHBORS])

In [9]:
# Recommendations
weighted_recommendations = recommend_movies(matrix, input_user, weighted_similarities, max_recommendations=MAX_RECOMMENDATIONS, movie_map=movie_map)

In [10]:
df_weighted_recommendations = pd.DataFrame(columns=['Movie', 'Score'])
for movie, score in weighted_recommendations.items():
    df_weighted_recommendations.loc[len(df_weighted_recommendations)] = [movie, score]

display(df_weighted_recommendations)

Unnamed: 0,Movie,Score
0,"Remains of the Day, The (1993)",5.156599
1,Gaslight (1944),5.156599
2,"Impostors, The (1998)",5.156599
3,Guys and Dolls (1955),5.156599
4,Crossing Delancey (1988),5.156599
5,"Truly, Madly, Deeply (1991)",5.156599
6,Lady Jane (1986),5.156599
7,Whale Rider (2002),5.156599
8,Babette's Feast (Babettes gæstebud) (1987),5.156599
9,Adam's Rib (1949),5.156599
