In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from math import sqrt

In [60]:
# Recreate the data from the book

data = [
    [1, 7, 6, 7, 4, 5, 4],
    [2, 6, 7, np.NaN, 4, 3, 4],
    [3, np.NaN, 3, 3, 1, 1, np.NaN],
    [4, 1, 2, 2, 3, 3, 4],
    [5, 1, np.NaN, 1, 2, 3, 3]
]

In [61]:
ratings = pd.DataFrame(data, columns=['userId', 1, 2, 3, 4, 5, 6])
ratings = ratings.set_index('userId')
ratings

Unnamed: 0_level_0,1,2,3,4,5,6
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,7.0,6.0,7.0,4,5,4.0
2,6.0,7.0,,4,3,4.0
3,,3.0,3.0,1,1,
4,1.0,2.0,2.0,3,3,4.0
5,1.0,,1.0,2,3,3.0


In [62]:
ratings = ratings.reset_index()
ratings

Unnamed: 0,userId,1,2,3,4,5,6
0,1,7.0,6.0,7.0,4,5,4.0
1,2,6.0,7.0,,4,3,4.0
2,3,,3.0,3.0,1,1,
3,4,1.0,2.0,2.0,3,3,4.0
4,5,1.0,,1.0,2,3,3.0


In [63]:
ratings = pd.melt(ratings, id_vars=['userId'], var_name='movieId', value_name='rating').dropna()
ratings['rating'] = ratings['rating'].astype('int')

In [64]:
ratings

Unnamed: 0,userId,movieId,rating
0,1,1,7
1,2,1,6
3,4,1,1
4,5,1,1
5,1,2,6
6,2,2,7
7,3,2,3
8,4,2,2
10,1,3,7
12,3,3,3


In [65]:
from surprise import SVD
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate

In [66]:
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], Reader(rating_scale=(1, 7)))

In [67]:
trainset = data.build_full_trainset()

In [68]:
from surprise import prediction_algorithms

In [69]:
algo = prediction_algorithms.knns.KNNBasic()

In [70]:
algo.fit(trainset)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x128077da0>

In [74]:
algo.predict(1, 1)

Prediction(uid=1, iid=1, r_ui=None, est=6.233032430625208, details={'actual_k': 4, 'was_impossible': False})

In [72]:
algo.predict(1, 2)

Prediction(uid=1, iid=2, r_ui=None, est=5.974696889826041, details={'actual_k': 4, 'was_impossible': False})

In [41]:
algo.predict(1, 3)

Prediction(uid=1, iid=3, r_ui=None, est=6.178568516495166, details={'actual_k': 4, 'was_impossible': False})

In [44]:
algo.predict(3, 6)

Prediction(uid=3, iid=6, r_ui=None, est=3.644653349001175, details={'actual_k': 4, 'was_impossible': False})

In [75]:
svd = SVD()

In [76]:
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x127fb5f60>

In [77]:
svd.predict(-1, 3)

Prediction(uid=-1, iid=3, r_ui=None, est=3.4489906652030924, details={'was_impossible': False})

In [49]:
svd.predict(-2, 3)

Prediction(uid=-2, iid=3, r_ui=None, est=3.4378783059153597, details={'was_impossible': False})

In [78]:
svd.predict(3, 1)

Prediction(uid=3, iid=1, r_ui=None, est=3.092134985236464, details={'was_impossible': False})

In [52]:
svd.predict(4, 1)

Prediction(uid=4, iid=1, r_ui=None, est=2.711298515296551, details={'was_impossible': False})