# Collaborative-Base Filtering

## Load the data

In [2]:
import pandas as pd
ratings_df = pd.read_csv('ratings.csv')[['userId', 'movieId', 'rating']]
ratings_df.head()

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


## Install scikit-surprise 

In [3]:
!pip install surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25ldone
[?25h  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2685473 sha256=9dab8358a1502f8cdb40e65497763af5fb76b78d6cbc1f3b4b82a32cadeef180
  Stored in directory: /root/.cache/pip/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed s

## Create the dataset

In [14]:
from surprise import Dataset, Reader

reader = Reader(rating_scale=(0, 5.0))
dataset = Dataset.load_from_df(ratings_df, reader)

## Build the trainset

In [15]:
trainset = dataset.build_full_trainset()
list(trainset.all_ratings())[:5]

[(0, 0, 2.5), (0, 1, 3.0), (0, 2, 3.0), (0, 3, 2.0), (0, 4, 4.0)]

## Train the model

In [16]:
from surprise import SVD
model = SVD()
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f373d2cc3d0>

## Predict with the model

Predict score: user 1 for movie id 1250. Known score 3

In [22]:
model.predict(1, 2150)[3]

2.8410306868306296

In [31]:
round(model.predict(1, 2150)[3], 1)

3.1

Predict score: user 345 for movie id 900. Unknown score

In [32]:
round(model.predict(345, 900)[3], 1)

4.2

## Model validation

In [29]:
from surprise.model_selection import cross_validate
cross_validate(model, dataset, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8957  0.9010  0.8956  0.8928  0.9016  0.8974  0.0034  
MAE (testset)     0.6903  0.6949  0.6895  0.6866  0.6941  0.6911  0.0031  
Fit time          1.11    1.22    1.13    1.26    1.20    1.19    0.06    
Test time         0.10    0.13    0.24    0.13    0.25    0.17    0.06    


{'test_rmse': array([0.89570614, 0.90095644, 0.8956364 , 0.89284264, 0.90161727]),
 'test_mae': array([0.69028421, 0.69491828, 0.68953691, 0.68659585, 0.69408937]),
 'fit_time': (1.1104698181152344,
  1.219717025756836,
  1.133833408355713,
  1.2617647647857666,
  1.2008039951324463),
 'test_time': (0.10205483436584473,
  0.129957914352417,
  0.24006319046020508,
  0.13140296936035156,
  0.24802231788635254)}

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=a1547541-bc41-4190-913e-d8bf46bc6b4a' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>