In [1]:
import pandas as pd
import os
import surprise
import numpy as np

In [2]:
data_dir = r"C:\Users\fabi\Desktop\DONO\Manipal_Deloitte\Recommendation System\ml-latest-small"
os.chdir(data_dir)

In [3]:
mr = pd.read_csv("ratings.csv")
mr.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [4]:
mr.drop('timestamp', axis=1, inplace=True)

In [5]:
mr.rename(columns={'userId':'user', 'movieId':'item', 'rating':'rating'}, inplace=True)

In [6]:
reader = surprise.dataset.Reader(line_format='user item rating', rating_scale=(0,5))

In [7]:
mr_train = surprise.dataset.Dataset.load_from_df(mr, reader)

In [8]:
mr_trainset = mr_train.build_full_trainset()

In [9]:
from surprise import SVD

In [10]:
svd = SVD(n_factors=20)

In [11]:
svd.fit(mr_trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x18d5b537700>

In [12]:
mr.head()

Unnamed: 0,user,item,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


In [13]:
svd.predict(uid=1, iid=31, r_ui=2.5)

Prediction(uid=1, iid=31, r_ui=2.5, est=2.3549926143129927, details={'was_impossible': False})

In [14]:
from surprise import NMF

In [15]:
nmf = NMF(n_factors=20)

In [16]:
nmf.fit(mr_trainset)

<surprise.prediction_algorithms.matrix_factorization.NMF at 0x18d5cd44a60>

In [17]:
nmf.predict(uid=1, iid=31, r_ui=2.5)

Prediction(uid=1, iid=31, r_ui=2.5, est=2.3974164329114105, details={'was_impossible': False})

In [18]:
nmf2 = NMF(n_factors=20, biased=True)
nmf2.fit(mr_trainset)

<surprise.prediction_algorithms.matrix_factorization.NMF at 0x18d5ce8fc40>

In [19]:
nmf2.predict(uid=1, iid=31)

Prediction(uid=1, iid=31, r_ui=None, est=1.060015756972868, details={'was_impossible': False})

In [20]:
from surprise.model_selection import KFold
from surprise import accuracy

In [21]:
kf = KFold(n_splits=3)

In [22]:
for trainset, testset in kf.split(mr_train):
    svd.fit(trainset)
    predictions = svd.test(testset)
    
    accuracy.rmse(predictions, verbose=True)
    accuracy.mae(predictions, verbose=True)

RMSE: 0.8989
MAE:  0.6925
RMSE: 0.8866
MAE:  0.6843
RMSE: 0.9053
MAE:  0.6965


In [23]:
for trainset, testset in kf.split(mr_train):
    nmf.fit(trainset)
    predictions = nmf.test(testset)
    
    accuracy.rmse(predictions, verbose=True)
    accuracy.mae(predictions, verbose=True)

RMSE: 0.9555
MAE:  0.7273
RMSE: 0.9602
MAE:  0.7301
RMSE: 0.9645
MAE:  0.7359


In [24]:
from surprise.model_selection import GridSearchCV

In [25]:
param_grid = {'n_factors':[15,20,25,30]}

In [26]:
algo = SVD

In [27]:
grid_search = GridSearchCV(algo, param_grid=param_grid, measures=['rmse','mae'])

In [28]:
grid_search.fit(mr_train)

In [29]:
print(grid_search.best_params['rmse'])

{'n_factors': 15}


In [30]:
print(grid_search.best_params['mae'])

{'n_factors': 15}


In [31]:
print(grid_search.best_score['rmse'])

0.8905107009475136


In [32]:
print(grid_search.best_score['mae'])

0.6859596834068754


In [33]:
grid_search = GridSearchCV(algo_class=NMF, param_grid=param_grid, measures=['rmse','mae'])

In [34]:
grid_search.fit(mr_train)

In [35]:
print(grid_search.best_params['rmse'])

{'n_factors': 20}


In [36]:
print(grid_search.best_params['mae'])

{'n_factors': 25}


In [37]:
print(grid_search.best_score['rmse'])

0.9472215253640949


In [38]:
print(grid_search.best_score['mae'])

0.7194028944605494
