## Collaborative-Based Filtering

In [1]:
import pandas as pd
ratings = pd.read_csv('ratings.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


### Create the dataset

In [7]:
from surprise import Dataset, Reader

reader = Reader(rating_scale=(1,5))
dataset = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

### Build the train set

In [9]:
trainset = dataset.build_full_trainset()

### Train the Model

In [10]:
from surprise import SVD
svd = SVD()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f061a732bb0>

In [13]:
svd.predict(15, 1956)

Prediction(uid=15, iid=1956, r_ui=None, est=3.422326524892422, details={'was_impossible': False})

### Validation

In [14]:
from surprise import model_selection

model_selection.cross_validate(svd, dataset, measures=['RMSE', 'MAE'])

{'test_rmse': array([0.90258633, 0.8873754 , 0.90074602, 0.89641594, 0.89652712]),
 'test_mae': array([0.6896685 , 0.68245156, 0.69425446, 0.69263146, 0.69165209]),
 'fit_time': (1.3421549797058105,
  1.4178175926208496,
  1.4828054904937744,
  1.3615391254425049,
  1.4688808917999268),
 'test_time': (0.23634767532348633,
  0.5086185932159424,
  0.294189453125,
  0.1854262351989746,
  0.40115785598754883)}

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=451c449a-2285-4693-8243-d4184b24f59a' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>