In [1]:
!pip install surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl.metadata (327 bytes)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2505183 sha256=efebf939e98737050c28c940f2f6883938e269733f7b8048be3b3420c9240237
  Stored in directory: /root/.cache/pip/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Install

In [13]:
import pandas as pd
import numpy as np
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise import Reader
from scipy.sparse import csr_matrix

df = pd.read_csv("ratings.csv")

df.info()

df.dropna(inplace=True)

sparse_matrix = csr_matrix((df['rating'], (df['userId'], df['movieId'])))

reader = Reader(rating_scale=(0.5, 5.0))

data = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader)
trainset = data.build_full_trainset()

model = SVD()
model.fit(trainset)

cross_validate(model, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100836 entries, 0 to 100835
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   userId     100836 non-null  int64  
 1   movieId    100836 non-null  int64  
 2   rating     100836 non-null  float64
 3   timestamp  100836 non-null  int64  
dtypes: float64(1), int64(3)
memory usage: 3.1 MB
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8633  0.8820  0.8706  0.8827  0.8692  0.8736  0.0076  
MAE (testset)     0.6642  0.6752  0.6665  0.6793  0.6692  0.6709  0.0056  
Fit time          1.47    1.50    1.49    1.51    2.34    1.66    0.34    
Test time         0.10    0.34    0.10    0.19    0.42    0.23    0.13    


{'test_rmse': array([0.86328655, 0.88200263, 0.8706162 , 0.8827396 , 0.86923476]),
 'test_mae': array([0.66424068, 0.67518364, 0.66646396, 0.67932298, 0.66924134]),
 'fit_time': (1.4695203304290771,
  1.4992871284484863,
  1.487313985824585,
  1.5073788166046143,
  2.343101978302002),
 'test_time': (0.1042182445526123,
  0.33635783195495605,
  0.10196161270141602,
  0.186293363571167,
  0.418149471282959)}

In [14]:
def get_top_n_recommendations(model, user_id, df, n=5):
    all_items = df['movieId'].unique()
    user_items = df[df['userId'] == user_id]['movieId'].unique()
    items_to_predict = list(set(all_items) - set(user_items))
    predictions = [model.predict(user_id, item).est for item in items_to_predict]
    top_n_items = [x for _, x in sorted(zip(predictions, items_to_predict), reverse=True)][:n]
    return top_n_items


user_id = int(input("Enter user id:"))
n = int(input("Enter no. of movies to be recommended:"))
print(get_top_n_recommendations(model, user_id = user_id, df=df, n=n))

Enter user id:1
Enter no. of movies to be recommended:5
[58559, 44555, 27156, 8368, 7153]
