# Recommender Demo
## Using cosine similarity

In [1]:
from src.similarity_scores import find_similar_users, get_recommendations
import pandas as pd

In [6]:
ratings = pd.read_csv('Data/Input/ratings.csv')
movies = pd.read_csv('Data/Input/movies.csv')
ratings_wide = ratings.pivot(index='userId', columns='movieId', values='rating')

In [103]:
user = 100
num_users = -1
num_recs=5

In [104]:
%%time
test = get_recommendations(ratings, user, num_users=num_users, num_recs=num_recs)

CPU times: total: 4.8 s
Wall time: 13.7 s


In [105]:
test.merge(movies, 'left', 'movieId')

Unnamed: 0,movieId,wt_rating,title,genres
0,356,2.945969,Forrest Gump (1994),Comedy|Drama|Romance|War
1,296,2.68363,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
2,318,2.678084,"Shawshank Redemption, The (1994)",Crime|Drama
3,593,2.398147,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller
4,2571,2.169936,"Matrix, The (1999)",Action|Sci-Fi|Thriller


## Using LightFM

In [118]:
from lightfm import LightFM
from lightfm.evaluation import precision_at_k, recall_at_k, auc_score
from lightfm.cross_validation import random_train_test_split
from scipy.sparse import coo_matrix
import numpy as np

In [92]:
X_train, X_test = random_train_test_split(
    coo_matrix(ratings_wide.fillna(0)), test_percentage=.2
)

print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")

Training data shape: (610, 9724)
Testing data shape: (610, 9724)


In [102]:
lfm = LightFM(no_components=20, k=10, n=10, loss='warp')
lfm.fit(X_train, epochs=10)

k=10
train_precision = precision_at_k(lfm, X_train, k=k).mean()
test_precision = precision_at_k(lfm, X_test, k=k, train_interactions=X_train).mean()
train_recall = recall_at_k(lfm, X_train, k=k).mean()
test_recall = recall_at_k(lfm, X_test, k=k, train_interactions=X_train).mean()
train_auc = auc_score(lfm, X_train).mean()
test_auc = auc_score(lfm, X_test).mean()
print(f"training precision @{k}: {train_precision}")
print(f"testing precision @{k}: {test_precision}")
print("\n")
print(f"training recall @{k}: {train_recall}")
print(f"testing recall @{k}: {test_recall}")
print("\n")
print(f"training AUC: {train_auc}")
print(f"testing AUC: {test_auc}")

training precision @10: 0.46311476826667786
testing precision @10: 0.23475411534309387


training recall @10: 0.07093122186748896
testing recall @10: 0.12004709565455385


training AUC: 0.9628549814224243
testing AUC: 0.9189186692237854


Precision @k: the proportion of the k recommendations that are relevant to the user
Recall @k: the proportion of all the possible relevant items recommended to the user
AUC: The probability that any known positive rating is higher on the list than a random negative item

In [125]:
items = np.array([i for i in range(X_train.shape[1])])
lfm.predict(1, items).s

(9724,)

9723