# Collaborative- Based Filtering

## Load the data

In [None]:
import pandas as pd
ratings = pd.read_csv("ratings.csv")[["userId", "movieId", "rating"]]
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


## Create the dataset

In [None]:
from surprise import Dataset, Reader

reader = Reader(rating_scale=(1,5))
dataset = Dataset.load_from_df(ratings, reader)
dataset

<surprise.dataset.DatasetAutoFolds at 0x7f8bb2f10250>

## Build the trainset

In [None]:
trainset= dataset.build_full_trainset()

In [None]:
list(trainset.all_ratings())

[(0, 0, 2.5),
 (0, 1, 3.0),
 (0, 2, 3.0),
 (0, 3, 2.0),
 (0, 4, 4.0),
 (0, 5, 2.0),
 (0, 6, 2.0),
 (0, 7, 2.0),
 (0, 8, 3.5),
 (0, 9, 2.0),
 (0, 10, 2.5),
 (0, 11, 1.0),
 (0, 12, 4.0),
 (0, 13, 4.0),
 (0, 14, 3.0),
 (0, 15, 2.0),
 (0, 16, 2.0),
 (0, 17, 2.5),
 (0, 18, 1.0),
 (0, 19, 3.0),
 (1, 20, 4.0),
 (1, 21, 5.0),
 (1, 22, 5.0),
 (1, 23, 4.0),
 (1, 24, 4.0),
 (1, 25, 3.0),
 (1, 26, 3.0),
 (1, 27, 4.0),
 (1, 28, 3.0),
 (1, 29, 5.0),
 (1, 30, 4.0),
 (1, 31, 3.0),
 (1, 32, 3.0),
 (1, 33, 3.0),
 (1, 34, 3.0),
 (1, 35, 3.0),
 (1, 36, 3.0),
 (1, 37, 5.0),
 (1, 38, 1.0),
 (1, 39, 3.0),
 (1, 40, 3.0),
 (1, 41, 3.0),
 (1, 42, 4.0),
 (1, 43, 4.0),
 (1, 44, 5.0),
 (1, 45, 5.0),
 (1, 46, 3.0),
 (1, 47, 4.0),
 (1, 48, 3.0),
 (1, 49, 4.0),
 (1, 50, 3.0),
 (1, 51, 4.0),
 (1, 52, 2.0),
 (1, 53, 1.0),
 (1, 54, 3.0),
 (1, 55, 4.0),
 (1, 56, 4.0),
 (1, 57, 3.0),
 (1, 58, 3.0),
 (1, 59, 3.0),
 (1, 60, 3.0),
 (1, 61, 2.0),
 (1, 62, 3.0),
 (1, 63, 3.0),
 (1, 64, 3.0),
 (1, 65, 3.0),
 (1, 66, 2.0),
 (1, 

## Train the ML Model

In [None]:
from surprise import SVD

svd = SVD()

In [None]:
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f8bb1ee9d00>

In [None]:
svd.predict(15, 1956)

Prediction(uid=15, iid=1956, r_ui=None, est=3.025320289965138, details={'was_impossible': False})

## Validation

In [None]:
from surprise import model_selection

model_selection.cross_validate(svd, dataset, measures=["RMSE", "MAE"]) # 

{'test_rmse': array([0.89637039, 0.89429017, 0.89755575, 0.9026853 , 0.89824265]),
 'test_mae': array([0.68938106, 0.68584934, 0.69188243, 0.69206747, 0.69254613]),
 'fit_time': (0.9750285148620605,
  0.9724233150482178,
  0.9717409610748291,
  1.0328271389007568,
  1.0220370292663574),
 'test_time': (0.7242555618286133,
  0.15490317344665527,
  0.13984966278076172,
  0.13442587852478027,
  0.15059423446655273)}

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=23f0fd89-95fc-4011-a8fd-9b3c217d2af6' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>