# **Install and Import Modules**

In [1]:
!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3163751 sha256=143a69e885e536dce6a9e339e241311194426deda2a9b4318e238ae7977972d2
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.3


In [13]:
from surprise import Dataset, Reader
from surprise.prediction_algorithms.matrix_factorization import SVD
from surprise import accuracy
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer
from sklearn.model_selection import train_test_split

# **Load and Preprocess the Data**

In [7]:
ratings_df = pd.read_csv("ratings.csv")
movies_df = pd.read_csv("movies.csv")

df = pd.merge(ratings_df, movies_df[['movieId', 'genres']], on = 'movieId', how = 'left')

df

Unnamed: 0,userId,movieId,rating,timestamp,genres
0,1,1,4.0,964982703,Adventure|Animation|Children|Comedy|Fantasy
1,1,3,4.0,964981247,Comedy|Romance
2,1,6,4.0,964982224,Action|Crime|Thriller
3,1,47,5.0,964983815,Mystery|Thriller
4,1,50,5.0,964982931,Crime|Mystery|Thriller
...,...,...,...,...,...
100831,610,166534,4.0,1493848402,Drama|Horror|Thriller
100832,610,168248,5.0,1493850091,Action|Crime|Thriller
100833,610,168250,5.0,1494273047,Horror
100834,610,168252,5.0,1493846352,Action|Sci-Fi


In [10]:
user_encoder = LabelEncoder()
movie_encoder = LabelEncoder()
mlb = MultiLabelBinarizer()

df['userId'] = user_encoder.fit_transform(df['userId'])
df['movieId'] = movie_encoder.fit_transform(df['movieId'])

df = df.join(pd.DataFrame(mlb.fit_transform(df.pop('genres').str.split('|')), columns = mlb.classes_, index = df.index ))

In [12]:
df.drop(columns = "(no genres listed)", inplace = True)


Unnamed: 0,userId,movieId,rating,timestamp,Action,Adventure,Animation,Children,Comedy,Crime,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,0,0,4.0,964982703,0,1,1,1,1,0,...,0,0,0,0,0,0,0,0,0,0
1,0,2,4.0,964981247,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
2,0,5,4.0,964982224,1,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
3,0,43,5.0,964983815,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
4,0,46,5.0,964982931,0,0,0,0,0,1,...,0,0,0,0,1,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100831,609,9416,4.0,1493848402,0,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,0
100832,609,9443,5.0,1493850091,1,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
100833,609,9444,5.0,1494273047,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
100834,609,9445,5.0,1493846352,1,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


# **Build the Model with Collabrative filtering**

In [14]:
train_df, test_df = train_test_split(df, test_size = 0.2)
train_df

Unnamed: 0,userId,movieId,rating,timestamp,Action,Adventure,Animation,Children,Comedy,Crime,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
84963,551,812,3.5,1112151115,0,0,1,1,1,0,...,0,0,0,1,0,1,0,0,0,0
83821,533,6388,4.0,1459787998,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
84973,551,920,4.0,1111472942,1,0,0,0,1,0,...,0,0,0,1,0,0,0,0,0,0
66913,431,3003,4.0,1315244163,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
21690,139,6501,3.0,1186077792,1,1,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63438,413,2629,2.0,961514247,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
80502,508,3331,3.0,1436031667,0,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4907,30,862,3.0,850467408,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
15239,97,8888,2.5,1532457800,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
reader = Reader(rating_scale = (0.5, 5))
data = Dataset.load_from_df(train_df[['userId', 'movieId', 'rating']], reader)
trainset = data.build_full_trainset()


<surprise.trainset.Trainset at 0x7882c905ff70>

In [16]:
model_svd = SVD()
model_svd.fit(trainset)

predictions_svd = model_svd.test(trainset.build_anti_testset())
accuracy.rmse(predictions_svd)

RMSE: 0.4777


0.4777352046039378

# **Make Recommendations**

In [24]:
def get_top_n_recommendations(user_id, n=5):
  user_movies = df[df['userId'] == user_id]['movieId'].unique()
  all_movies = df['movieId'].unique()
  movies_to_predict = list(set(all_movies) - set(user_movies))

  user_movie_pairs = [(user_id, movie_id, 0) for movie_id in movies_to_predict]
  predictions_cf = model_svd.test(user_movie_pairs)

  top_n_recommendations = sorted(predictions_cf, key = lambda x: x.est)[:n]

  for pred in top_n_recommendations:
    predicted_rating = pred.est
    print(predicted_rating)


  top_n_movie_ids = [int(pred.iid) for pred in top_n_recommendations]

  top_n_movies = movie_encoder.inverse_transform(top_n_movie_ids)

  return top_n_movies

In [25]:

user_id = 221
recommendations = get_top_n_recommendations(user_id)
top_n_movies_titles = movies_df[movies_df['movieId'].isin(recommendations)]['title'].tolist()
print(f"Top 5 Recommendations for User {user_id}:")
for i, title in enumerate(top_n_movies_titles, 1):
  print(f"{i}.{title}")

1.5463819581287739
1.8023725684558956
1.8625275464904463
1.8697898880000998
1.8820046033566906
Top 5 Recommendations for User 221:
1.Stuart Saves His Family (1995)
2.Richie Rich (1994)
3.Honey, I Blew Up the Kid (1992)
4.Superman IV: The Quest for Peace (1987)
5.Battlefield Earth (2000)
