In [1]:
import numpy as np # supports numerical operations
import pandas as pd # used for data manipulation and handling tabular data(csv,excel etc.)
import joblib # used for saving and loading ml model effectively
import ipywidgets as widgets # allows to create interactive widgets
from IPython.display import display # used to display widgets and outputs


In [2]:
!pip install scikit-surprise # python library specifically for recommendation systems (collaborative filtering)

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2505176 sha256=89910cd3c26c13cb2dbdfbb6ea68e6972c7f17d78c11b58cb55f50d084fb0c55
  Stored in directory: /root/.cache/pip/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Succe

In [3]:
from surprise import Dataset, Reader, SVD, KNNBasic # 'Dataset': Helps load data in a format suitable for surprise models. 'Reader': Defines the rating scale for your dataset
# SVD : Singular Value Decomposition 'matrix factorization algorithm'. 'KNNBasic' : A user/item based nearest-neighbor collaborative filtering model
from surprise.model_selection import train_test_split, cross_validate, GridSearchCV # 'train_test_split': Splits the dataset into training and testing sets.
# 'cross_validate': Runs cross-validation to evaluate model performance. 'GridSearchCV': Performs hyperparameter tuning to find the best model configuration.
from surprise.accuracy import rmse # 'rmse' : Measures how well the predicted ratings match actual ratings

In [4]:
# Load Dataset
ratings = pd.read_csv("u.data", sep="\t", names=["userId","movieId","rating","timestamp"], usecols=[0,1,2])
movies = pd.read_csv("u.item", sep="|", encoding="ISO-8859-1", names=["movieId","title"], usecols=[0,1]) # encoding, since some titles may contain special characters

In [5]:
# merge ratings with movie titles
ratings = ratings.merge(movies, on= "movieId")


In [6]:
# Prepare data for surprise
reader = Reader(rating_scale=(1,5))
data = Dataset.load_from_df(ratings[['userId','movieId','rating']], reader) # surprise models require numerical rep of data, so movieId is enough no need of title(metadata)

In [7]:
# Hyperparameter tuning for SVD
param_grid = {'n_factors': [50,100], 'lr_all': [0.002, 0.005], 'reg_all': [0.02,0.1]} # n_factors → Number of latent factors for matrix factorization
# lr_all - learning rate, reg_all - regularization term
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3) # rmse,mae - evaluation metrices , cv- cross validation , splitting dataset into 3 parts
gs.fit(data) #  trains multiple SVD models using different combinations of hyperparameters.Selects the best combination based on RMSE and MAE scores

In [8]:
# SVD Model
best_svd = gs.best_estimator['rmse'] # selects best performing svd model using rmse value


In [9]:
# Train the model
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)
best_svd.fit(trainset) # fits the model on trainset


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7eff61e16090>

In [10]:
# Evaluate model performance
predictions = best_svd.test(testset)
rmse(predictions)

RMSE: 0.9315


0.931497768244026

In [11]:
# Save the trained model
joblib.dump(best_svd, 'movie_recommender.pkl') # .pkl - pickle format efficiently stores python objects (entire model)

['movie_recommender.pkl']

In [12]:
# Implement KNN for similar movies
sim_options = {'name': 'cosine', 'user_based': False} # using cosine similarity to find similar movies. item-item similarity is checking
knn = KNNBasic(sim_options = sim_options) # A basic KNN algorithm for recommendations.
knn.fit(trainset)
joblib.dump(knn , 'knn_movie_recommender.pkl')

Computing the cosine similarity matrix...
Done computing similarity matrix.


['knn_movie_recommender.pkl']

In [13]:
# Function to recommend movies
def get_movie_recommendations(user_id, model, movies_df, n=5):
  all_movie_ids = movies_df['movieId'].unique() # gets all unique movieIds from the dataset.
  predictions = [model.predict(user_id, movie_id) for movie_id in all_movie_ids] # predicts ratings for all movies the user hasn't seen yet.
  predictions.sort(key=lambda x: x.est, reverse=True) # sorts movies by predicted rating (highest first).
  top_movies = [movies_df[movies_df['movieId'] == pred.iid]['title'].values[0] for pred in predictions[:n]] # Returns the top n recommended movies
  return top_movies


In [14]:
# Get similar movies using KNN
def get_similar_movies(movie_id,knn_model,movies_df,n=5):
  inner_id = knn_model.trainset.to_inner_iid(movie_id) # surprise converts the movie_id to internal indices
  neighbors = knn_model.get_neighbors(inner_id,k=n) # returns a list of internal indices for similar movies.
  similar_movies = [movies_df.iloc[knn_model.trainset.to_raw_iid(neighbor)]['title'] for neighbor in neighbors] # to_raw_iid(neighbor) converts the internal index back to the original movieId.
  return similar_movies


In [15]:
# Interactive widgets for user-based recommendations (SVD)
user_id_widget = widgets.IntText(value=1, description='User ID:') # IntText for User ID input
n_movies_widget = widgets.IntSlider(value=5, min=1, max=10, step=1, description='Num Movies:') # IntSlider for number of recommended movies
predict_button = widgets.Button(description='Get recommendations') # A Button (predict_button) to trigger recommendations
output = widgets.Output() # An Output widget to display results

#  Interactive Widgets for similar movie search (KNN)
movie_id_widget = widgets.IntText(value=1, description='Movie ID:')
sim_movies_button = widgets.Button(description='Find Similar Movies')
output_knn = widgets.Output()

# Function to recommend movies using SVD
def on_predict_click (b):
  with output:
    output.clear_output() # clears the previous output
    recommended_movies = get_movie_recommendations(user_id_widget.value, best_svd, movies, n_movies_widget.value)
    print('Recommended Movies for User', user_id_widget.value, ':', recommended_movies)

# Function to find similar movies using KNN
def on_sim_movies_click(b):
    with output_knn:
        output_knn.clear_output()
        similar_movies = get_similar_movies(movie_id_widget.value, knn, movies, n=5)
        print(f'Movies similar to {movie_id_widget.value}:', similar_movies)

# Attach functions to buttons
predict_button.on_click(on_predict_click)
sim_movies_button.on_click(on_sim_movies_click)

# Display widgets for both recommendation systems
display(user_id_widget, n_movies_widget, predict_button, output) # SVD-based recommendations
display(widgets.VBox([movie_id_widget, sim_movies_button, output_knn])) # KNN-based similar movie search


IntText(value=1, description='User ID:')

IntSlider(value=5, description='Num Movies:', max=10, min=1)

Button(description='Get recommendations', style=ButtonStyle())

Output()

VBox(children=(IntText(value=1, description='Movie ID:'), Button(description='Find Similar Movies', style=Butt…