## Collaborative-Based Filtering

### Load the data

In [1]:
import pandas as pd
ratings = pd.read_csv("movies_data/ratings.csv")
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
...,...,...,...,...
99999,671,6268,2.5,1065579370
100000,671,6269,4.0,1065149201
100001,671,6365,4.0,1070940363
100002,671,6385,2.5,1070979663


### Create the dataset

In [5]:
from surprise import Dataset, Reader

reader = Reader(rating_scale=(1, 5))
dataset = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
dataset

<surprise.dataset.DatasetAutoFolds at 0x795fc8443310>

### Buid the trainset

In [9]:
trainset = dataset.build_full_trainset()
list(trainset.all_ratings())[:5]

[(0, 0, 2.5), (0, 1, 3.0), (0, 2, 3.0), (0, 3, 2.0), (0, 4, 4.0)]

### Train the model

In [10]:
from surprise import SVD
svd = SVD()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x795fb427c850>

### Predicting ratings

In [11]:
svd.predict(671, 6365)

Prediction(uid=671, iid=6365, r_ui=None, est=3.5202045633100516, details={'was_impossible': False})

### Validate the Model

In [None]:
from surprise import model_selection as ms

ms.cross_validate(svd, dataset, measures=['RMSE', 'MAE'])