In [47]:
import pandas as pd
from surprise import Dataset
from surprise import Reader
from pathlib import Path
from surprise import SVD, SVDpp
from surprise import accuracy
from surprise.model_selection import KFold
from surprise.model_selection import cross_validate

In [29]:
PATH_DATA = Path('dataset_small')
PATH_RATINGS = PATH_DATA / 'ratings.csv'
PATH_MOVIES = PATH_DATA / 'movies.csv'

In [30]:
ratings_df = pd.read_csv(PATH_RATINGS)
movies_df = pd.read_csv(PATH_MOVIES)

In [31]:
ratings_df

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [32]:
reader = Reader(rating_scale=(1, 5))

In [33]:
data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)

In [119]:
movies = list(ratings_df[['movieId']].merge(movies_df)[['movieId','title']].drop_duplicates().itertuples(index=False,name=None))

In [120]:
len(movies)

9724

## SVD

In [44]:
kf = KFold(n_splits=3)
for trainset, testset in kf.split(data):

    # train and test algorithm.
    algo.fit(trainset)
    predictions = algo.test(testset)

    # Compute and print Root Mean Squared Error
    accuracy.rmse(predictions, verbose=True)

RMSE: 0.8777
RMSE: 0.8820
RMSE: 0.8801


In [72]:
algo = SVD()
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8802  0.8652  0.8705  0.8690  0.8800  0.8730  0.0060  
MAE (testset)     0.6768  0.6649  0.6670  0.6692  0.6756  0.6707  0.0047  
Fit time          8.37    8.49    8.51    8.39    8.48    8.45    0.06    
Test time         0.52    0.31    0.43    0.30    0.29    0.37    0.09    


{'test_rmse': array([0.88017805, 0.86522076, 0.87047183, 0.8690456 , 0.87996319]),
 'test_mae': array([0.67678089, 0.66492649, 0.66697568, 0.66924319, 0.6755632 ]),
 'fit_time': (8.370809078216553,
  8.490764617919922,
  8.511718511581421,
  8.394794940948486,
  8.479740381240845),
 'test_time': (0.5156803131103516,
  0.31180691719055176,
  0.4317324161529541,
  0.2968153953552246,
  0.2918214797973633)}

In [73]:
trainset = data.build_full_trainset()
algo.fit(trainset)
algo.predict(31, 21)

Prediction(uid=31, iid=21, r_ui=None, est=3.842098216497943, details={'was_impossible': False})

In [74]:
algo.predict(11,22)

Prediction(uid=11, iid=22, r_ui=None, est=3.879026964241314, details={'was_impossible': False})

In [127]:
def get_user_ratings(user_id, ratings_df, movies_df):
    ur_df = ratings_df[ratings_df['userId']==user_id].merge(movies_df, on='movieId')[['movieId', 'title', 'rating']].sort_values(by='rating')
    return ur_df
    
def recommend(user_id, movies, algo, ratings_df):
    ur_df = ratings_df[ratings_df['userId']==user_id]
    watched_movies = set(ur_df[['movieId']])
    predictions = []
    for m_id, m_name in movies:
        _, _, _, est, _ =algo.predict(user_id,m_id)
        predictions.append({
            'movieId' : m_id,
            'title': m_name,
            'rating':est
        })
    pred_df = pd.DataFrame(predictions)
    return pred_df[~pred_df['movieId'].isin(watched_movies)].sort_values(by='rating', ascending=False)
    # return pred_df.sort_values(by='rating', ascending=False)

In [128]:
get_user_ratings(2, ratings_df, movies_df)

Unnamed: 0,movieId,title,rating
25,114060,The Drop (2014),2.0
20,91658,"Girl with the Dragon Tattoo, The (2011)",2.5
0,318,"Shawshank Redemption, The (1994)",3.0
23,109487,Interstellar (2014),3.0
13,77455,Exit Through the Gift Shop (2010),3.0
11,71535,Zombieland (2009),3.0
26,115713,Ex Machina (2015),3.5
5,8798,Collateral (2004),3.5
21,99114,Django Unchained (2012),3.5
19,91529,"Dark Knight Rises, The (2012)",3.5


In [129]:
recommend(2,movies, algo, ratings_df).head(10)

Unnamed: 0,movieId,title,rating
753,4993,"Lord of the Rings: The Fellowship of the Ring,...",4.501297
722,750,Dr. Strangelove or: How I Learned to Stop Worr...,4.49846
15,260,Star Wars: Episode IV - A New Hope (1977),4.488461
166,2571,"Matrix, The (1999)",4.45115
2060,56782,There Will Be Blood (2007),4.449
1104,1223,"Grand Day Out with Wallace and Gromit, A (1989)",4.409245
774,7153,"Lord of the Rings: The Return of the King, The...",4.398688
16,296,Pulp Fiction (1994),4.392835
73,1210,Star Wars: Episode VI - Return of the Jedi (1983),4.364337
74,1213,Goodfellas (1990),4.362405


## SVDpp