# Collaborative-based filtering 

In [5]:
import pandas as pd
ratings = pd.read_csv("data/ratings.csv")[["userId","movieId","rating"]]
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


In [7]:
# Create the dataset 
from surprise import Dataset, Reader

reader = Reader(rating_scale=(1,5))
dataset = Dataset.load_from_df(ratings, reader)
dataset

In [None]:
# Build the trainset 

In [9]:
trainset = dataset.build_full_trainset()
trainset

<surprise.trainset.Trainset at 0x29d5e7bd0>

In [11]:
list(trainset.all_ratings())[:5]

[(0, 0, 2.5), (0, 1, 3.0), (0, 2, 3.0), (0, 3, 2.0), (0, 4, 4.0)]

In [12]:
# Train the model 

In [13]:
from surprise import SVD 

svd = SVD()

svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x29aadb8d0>

In [14]:
svd.predict(15, 1956)

Prediction(uid=15, iid=1956, r_ui=None, est=3.5130574956561746, details={'was_impossible': False})

In [15]:
svd.predict(15, 1956).est

3.5130574956561746

In [16]:
# Validation 

from surprise import model_selection

model_selection.cross_validate(svd, dataset, measures=["RMSE", "MAE"])


{'test_rmse': array([0.89700361, 0.88973771, 0.90103654, 0.89837071, 0.89772712]),
 'test_mae': array([0.69094031, 0.68457009, 0.69355753, 0.69207813, 0.69161217]),
 'fit_time': (0.41289281845092773,
  0.4229748249053955,
  0.4323689937591553,
  0.4525160789489746,
  0.4098639488220215),
 'test_time': (0.03154397010803223,
  0.08498287200927734,
  0.032592058181762695,
  0.03434586524963379,
  0.034096717834472656)}