In [1]:
import pandas as pd
import numpy as np

df1 = pd.read_csv("/kaggle/input/tmdb-movie-metadata/tmdb_5000_credits.csv")
df2 = pd.read_csv("/kaggle/input/tmdb-movie-metadata/tmdb_5000_movies.csv")

df1.columns = ['id' , 'tittle' , 'cast' , 'crew']
df2 = df2.merge(df1,on = 'id')

Since the dataset we used before did not have userId(which is necessary for collaborative filtering) let's load another dataset. We'll be using the Surprise library to implement SVD.

In [6]:
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

reader = Reader()
ratings = pd.read_csv("/kaggle/input/the-movies-dataset/ratings_small.csv")

data = Dataset.load_from_df(ratings[['userId','movieId','rating']],reader)

svd = SVD()
cross_validate(svd , data , measures = ['RMSE',"MAE"] , cv = 5)

{'test_rmse': array([0.89447656, 0.89282944, 0.90203287, 0.88671461, 0.90159312]),
 'test_mae': array([0.69105273, 0.69010595, 0.69525866, 0.67988446, 0.6939775 ]),
 'fit_time': (1.460784912109375,
  1.4955329895019531,
  1.5053229331970215,
  1.5392072200775146,
  1.5094432830810547),
 'test_time': (0.19269847869873047,
  0.5823655128479004,
  0.18411588668823242,
  0.18601107597351074,
  0.18418335914611816)}

We get a mean Root Mean Sqaure Error of 0.901 approx which is more than good enough for our case. Let us now train on our dataset and arrive at predictions.

In [7]:
trainset = data.build_full_trainset()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7d09c8d45a20>

Let us pick user with user Id 1 and check the ratings she/he has given.

In [8]:
ratings[ratings['userId'] == 1]

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
5,1,1263,2.0,1260759151
6,1,1287,2.0,1260759187
7,1,1293,2.0,1260759148
8,1,1339,3.5,1260759125
9,1,1343,2.0,1260759131


In [9]:
svd.predict(1,302,3)

Prediction(uid=1, iid=302, r_ui=3, est=2.572203496168211, details={'was_impossible': False})