# Collaborative filtering

In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.decomposition import NMF
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

from data import get_movielens_1m

## Load MovieLens dataset

In [4]:
movies_df, users_df, ratings_df = get_movielens_1m()

In [5]:
print(f'Users: {ratings_df["UserID"].max()}')
print(f'Movies: {ratings_df["MovieID"].max()}')

Users: 6040
Movies: 3952


## Prepare train/test data

In [6]:
def convert_to_sparse(df: pd.DataFrame) -> csr_matrix:
    rows, cols, vals = zip(*df.values)
    return csr_matrix((vals, (rows, cols)))

In [7]:
ratings_df.sort_values(by='Timestamp', inplace=True)

ratings_df['UserID'] = ratings_df['UserID'].apply(lambda x: x - 1)
ratings_df['MovieID'] = ratings_df['MovieID'].apply(lambda x: x - 1)

train_df, test_df = train_test_split(
    ratings_df.iloc[:, :3], shuffle=False, test_size=0.1)

train_sparse = convert_to_sparse(train_df)
test_sparse = convert_to_sparse(test_df)

print(train_sparse.shape)
print(test_sparse.shape)

train = train_sparse.toarray()
test = test_sparse.toarray()

(6040, 3952)
(6040, 3952)


## Use Non-negative Matrix Factorization to predict users ratings

In [8]:
def non_zero_rmse(pred, actual):
    pred = pred[actual.nonzero()].flatten()     # Ignore nonzero terms
    actual = actual[actual.nonzero()].flatten() # Ignore nonzero terms
    return mean_squared_error(pred, actual, squared=False)

In [9]:
nmf = NMF(n_components=15, alpha=0.001, l1_ratio=0.0)
res = nmf.fit_transform(train)
preds = res @ nmf.components_



In [10]:
print(non_zero_rmse(preds, test))


3.1895046191120273


## Test ready to use NFM for recommender system implementation

In [27]:
import surprise
from surprise import Dataset
from surprise.prediction_algorithms.matrix_factorization import NMF
from surprise.model_selection import cross_validate
from surprise import accuracy

from random import randint

In [16]:
movielens = Dataset.load_builtin('ml-1m', prompt=False)

train_ds, test_ds = surprise.model_selection.train_test_split(movielens, test_size=0.1)

alg = NMF()

predictions = alg.fit(train_ds).test(test_ds)

accuracy.rmse(predictions)

RMSE: 0.9136


0.9136055567011406

## Analyze prediction performance

In [30]:
USER_ID = 4177

In [24]:
movies_by = ratings_df[ratings_df['UserID'] == USER_ID]
for idx, row in movies_by.iterrows():
    movie = movies_df.loc[movies_df['MovieID'] == row['MovieID']].iloc[0]
    print(f'Movie: {movie["Title"]}, genre: {movie["Genres"]}, rating: {row["Rating"]}')

Movie: Wizard of Oz, The (1939), genre: Adventure|Children's|Drama|Musical, rating: 5
Movie: Steam: The Turkish Bath (Hamam) (1997), genre: Drama|Romance, rating: 1
Movie: Sunset Blvd. (a.k.a. Sunset Boulevard) (1950), genre: Film-Noir, rating: 4
Movie: Flintstones, The (1994), genre: Children's|Comedy, rating: 5
Movie: Wings (1927), genre: Drama|Romance|War, rating: 4
Movie: Shower (Xizhao) (1999), genre: Comedy, rating: 3
Movie: Adventures of Rocky and Bullwinkle, The (2000), genre: Animation|Children's|Comedy, rating: 2
Movie: Yellow Submarine (1968), genre: Animation|Musical, rating: 4
Movie: Crimson Pirate, The (1952), genre: Adventure|Comedy|Sci-Fi, rating: 3
Movie: Far and Away (1992), genre: Drama|Romance, rating: 4
Movie: Client, The (1994), genre: Drama|Mystery|Thriller, rating: 4
Movie: Volunteers (1985), genre: Comedy, rating: 4
Movie: Eye for an Eye (1996), genre: Drama|Thriller, rating: 5
Movie: GoldenEye (1995), genre: Action|Adventure|Thriller, rating: 4
Movie: Psycho (

In [25]:
def get_prediction(user_id: int, movie_id: int):
    user_id = str(user_id)
    movie_id = str(movie_id)

    rating = alg.predict(user_id, movie_id)
    return rating

In [29]:
for i in range(10):
    uid = USER_ID
    mid = randint(1, 3953)

    rating = get_prediction(uid, mid)

    movie = movies_df.loc[movies_df['MovieID'] == mid].iloc[0]
    print(f'Movie: {movie["Title"]}, genre: {movie["Genres"]}, rating: {rating.est}')

Movie: Night of the Creeps (1986), genre: Comedy|Horror|Sci-Fi, rating: 2.805948975152578
Movie: Mrs. Doubtfire (1993), genre: Comedy, rating: 4.026193629803376
Movie: Fools Rush In (1997), genre: Comedy|Romance, rating: 3.6302007022492107
Movie: Buddy Holly Story, The (1978), genre: Drama, rating: 4.2566646044830865
Movie: Three Musketeers, The (1993), genre: Action|Adventure|Comedy, rating: 3.7013053195174184
Movie: All Over Me (1997), genre: Drama, rating: 4.034121977378656
Movie: Teenage Mutant Ninja Turtles II: The Secret of the Ooze (1991), genre: Action|Children's|Fantasy, rating: 2.5838075035895023
Movie: Interview with the Vampire (1994), genre: Drama|Horror, rating: 3.988946699291398
Movie: Century of Cinema, A (1994), genre: Documentary, rating: 3.5813352321959413
Movie: Fan, The (1996), genre: Thriller, rating: 3.0813537441144114
