In [1]:
import os

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
from surprise import Reader
from surprise import Dataset

from surprise import SVD
from surprise import KNNBaseline, KNNWithMeans

from surprise.accuracy import rmse
from surprise import accuracy
from surprise.model_selection import train_test_split

## Prepare Data

[Source](https://grouplens.org/datasets/movielens/latest/)

In [4]:
DATA_DIR = 'data/ml-latest-small/'

In [5]:
os.listdir(DATA_DIR)

['ratings.csv', 'README.txt', 'links.csv', 'tags.csv', 'movies.csv']

In [6]:
rdf = pd.read_csv(os.path.join(DATA_DIR, 'ratings.csv'), usecols=['userId', 'movieId', 'rating'])

In [7]:
rdf.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [8]:
mdf = pd.read_csv(os.path.join(DATA_DIR, 'movies.csv'))

In [9]:
mdf.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [10]:
rmdf = pd.merge(rdf, mdf, on='movieId', how='left')

In [11]:
rmdf.groupby(by=['movieId', 'title'])['rating'].mean().sort_values(ascending=False).head(20)

movieId  title                                                         
53355    Sun Alley (Sonnenallee) (1999)                                    5.0
96608    Runaway Brain (1995)                                              5.0
33138    Palindromes (2004)                                                5.0
128087   Trinity and Sartana Are Coming (1972)                             5.0
3851     I'm the One That I Want (2000)                                    5.0
160644   Indignation (2016)                                                5.0
126921   The Fox and the Hound 2 (2006)                                    5.0
3939     Slumber Party Massacre II (1987)                                  5.0
3940     Slumber Party Massacre III (1990)                                 5.0
3941     Sorority House Massacre (1986)                                    5.0
126088   A Flintstones Christmas Carol (1994)                              5.0
3942     Sorority House Massacre II (1990)                 

In [12]:
reader = Reader(rating_scale=(0, 6))
data = Dataset.load_from_df(rmdf[['userId', 'movieId', 'rating']], reader)

In [13]:
trainset, testset = train_test_split(data, test_size=0.25, random_state=42)

## Train model

In [14]:
algo = SVD()

In [15]:
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f8e10737a20>

## Test Model

In [16]:
test_pred = algo.test(testset)

accuracy.rmse(test_pred, verbose=True)

RMSE: 0.8824


0.8823933589555355

## Use Model (Make Predictions)

In [17]:
user_id = 100

In [18]:
user_movies = rmdf.query(f'userId == {user_id}')

In [19]:
user_movies.shape

(148, 5)

In [20]:
user_movies.sort_values(by='rating', ascending=False).head(10)

Unnamed: 0,userId,movieId,rating,title,genres
15386,100,1958,5.0,Terms of Endearment (1983),Comedy|Drama
15401,100,2423,5.0,Christmas Vacation (National Lampoon's Christm...,Comedy
15437,100,5620,5.0,Sweet Home Alabama (2002),Comedy|Romance
15355,100,1101,5.0,Top Gun (1986),Action|Romance
15425,100,4041,5.0,"Officer and a Gentleman, An (1982)",Drama|Romance
15370,100,1307,4.5,When Harry Met Sally... (1989),Comedy|Romance
15384,100,1912,4.5,Out of Sight (1998),Comedy|Crime|Drama|Romance|Thriller
15382,100,1777,4.5,"Wedding Singer, The (1998)",Comedy|Romance
15381,100,1680,4.5,Sliding Doors (1998),Drama|Romance
15380,100,1678,4.5,"Joy Luck Club, The (1993)",Drama|Romance


In [21]:
user_movies.sort_values(by='rating', ascending=False).tail(10)

Unnamed: 0,userId,movieId,rating,title,genres
15359,100,1220,3.0,"Blues Brothers, The (1980)",Action|Comedy|Musical
15335,100,594,3.0,Snow White and the Seven Dwarfs (1937),Animation|Children|Drama|Fantasy|Musical
15429,100,4571,3.0,Bill & Ted's Excellent Adventure (1989),Adventure|Comedy|Sci-Fi
15418,100,3499,2.0,Misery (1990),Drama|Horror|Thriller
15352,100,1037,2.0,"Lawnmower Man, The (1992)",Action|Horror|Sci-Fi|Thriller
15405,100,2513,2.0,Pet Sematary (1989),Horror
15410,100,2710,2.0,"Blair Witch Project, The (1999)",Drama|Horror|Thriller
15367,100,1288,2.0,This Is Spinal Tap (1984),Comedy
15304,100,19,1.0,Ace Ventura: When Nature Calls (1995),Comedy
15315,100,235,1.0,Ed Wood (1994),Comedy|Drama


In [22]:
unwatched_movies = [m for m in rdf['movieId'].unique() if m not in user_movies['movieId'].unique()]

In [23]:
scores = []
for m in unwatched_movies:
    score = algo.predict(user_id, m).est
    scores.append((m, score))

In [24]:
sdf = pd.DataFrame(scores, columns=['movieId', 'Score'])

In [25]:
sdf = sdf.merge(mdf, on='movieId', how='left')

In [26]:
sdf.sort_values(by='Score', ascending=False).head(10)

Unnamed: 0,movieId,Score,title,genres
316,1250,4.685485,"Bridge on the River Kwai, The (1957)",Adventure|Drama|War
297,904,4.681891,Rear Window (1954),Mystery|Thriller
680,4993,4.654358,"Lord of the Rings: The Fellowship of the Ring,...",Adventure|Fantasy
203,318,4.633669,"Shawshank Redemption, The (1994)",Crime|Drama
690,5952,4.617059,"Lord of the Rings: The Two Towers, The (2002)",Adventure|Fantasy
12,260,4.612151,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi
699,7153,4.602432,"Lord of the Rings: The Return of the King, The...",Action|Adventure|Drama|Fantasy
688,5618,4.601907,Spirited Away (Sen to Chihiro no kamikakushi) ...,Adventure|Animation|Fantasy
295,898,4.600192,"Philadelphia Story, The (1940)",Comedy|Drama|Romance
1017,1252,4.596181,Chinatown (1974),Crime|Film-Noir|Mystery|Thriller


In [27]:
sdf.sort_values(by='Score', ascending=False).tail(10)

Unnamed: 0,movieId,Score,title,genres
4716,2798,2.746762,Problem Child (1990),Children|Comedy
449,65,2.727629,Bio-Dome (1996),Comedy
1385,1499,2.713827,Anaconda (1997),Action|Adventure|Thriller
1389,1556,2.691316,Speed 2: Cruise Control (1997),Action|Romance|Thriller
571,435,2.674361,Coneheads (1993),Comedy|Sci-Fi
1724,5313,2.668778,The Scorpion King (2002),Action|Adventure|Fantasy|Thriller
905,1882,2.660573,Godzilla (1998),Action|Sci-Fi|Thriller
1390,1562,2.637831,Batman & Robin (1997),Action|Adventure|Fantasy|Thriller
661,2701,2.623198,Wild Wild West (1999),Action|Comedy|Sci-Fi|Western
558,374,2.536425,Richie Rich (1994),Children|Comedy
