<a href="https://colab.research.google.com/github/Shiyasalloor/Machine_Learning_And_Parellel_Computing/blob/main/Question_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import euclidean
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score
from math import sqrt
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# Load the MovieLens dataset
!wget -nc https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
!unzip -n ml-latest-small.zip

--2025-03-25 08:07:47--  https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 978202 (955K) [application/zip]
Saving to: ‘ml-latest-small.zip’


2025-03-25 08:07:48 (3.24 MB/s) - ‘ml-latest-small.zip’ saved [978202/978202]

Archive:  ml-latest-small.zip
   creating: ml-latest-small/
  inflating: ml-latest-small/links.csv  
  inflating: ml-latest-small/tags.csv  
  inflating: ml-latest-small/ratings.csv  
  inflating: ml-latest-small/README.txt  
  inflating: ml-latest-small/movies.csv  


In [3]:
ratings = pd.read_csv("ml-latest-small/ratings.csv")
movies = pd.read_csv("ml-latest-small/movies.csv")

In [4]:
# Merge datasets
data = pd.merge(ratings, movies, on='movieId')
print(data.head())

   userId  movieId  rating  timestamp                        title  \
0       1        1     4.0  964982703             Toy Story (1995)   
1       1        3     4.0  964981247      Grumpier Old Men (1995)   
2       1        6     4.0  964982224                  Heat (1995)   
3       1       47     5.0  964983815  Seven (a.k.a. Se7en) (1995)   
4       1       50     5.0  964982931   Usual Suspects, The (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                               Comedy|Romance  
2                        Action|Crime|Thriller  
3                             Mystery|Thriller  
4                       Crime|Mystery|Thriller  


In [5]:
# Create User-Item Matrix
user_item_matrix = data.pivot_table(index='userId', columns='title', values='rating')

In [6]:
# Fill NaN with 0 for similarity calculations
user_item_matrix_filled = user_item_matrix.fillna(0)

In [7]:
# Compute User Similarity (Cosine)
user_similarity = cosine_similarity(user_item_matrix_filled)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

In [8]:
# Compute Item Similarity (Cosine)
item_similarity = cosine_similarity(user_item_matrix_filled.T)
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)

In [9]:
def get_user_based_recommendations(user_id, num_recommendations=5):
    similar_users = user_similarity_df[user_id].sort_values(ascending=False).index[1:6]
    user_ratings = user_item_matrix.loc[similar_users].mean()
    user_ratings = user_ratings[user_ratings > 0]
    recommended_movies = user_ratings.sort_values(ascending=False).head(num_recommendations)
    return recommended_movies

In [10]:
def get_item_based_recommendations(movie_title, num_recommendations=5):
    similar_items = item_similarity_df[movie_title].sort_values(ascending=False)[1:num_recommendations+1]
    return similar_items

In [11]:
# Hybrid Recommender System
def hybrid_recommendation(user_id, movie_title, num_recommendations=5):
    user_recs = get_user_based_recommendations(user_id, num_recommendations)
    item_recs = get_item_based_recommendations(movie_title, num_recommendations)
    hybrid_recs = pd.concat([user_recs, item_recs]).sort_values(ascending=False).head(num_recommendations)
    return hybrid_recs

In [12]:
# Evaluation Metrics
def evaluate_recommendations(y_true, y_pred):
    precision = precision_score(y_true, y_pred, average='micro')
    recall = recall_score(y_true, y_pred, average='micro')
    f1 = f1_score(y_true, y_pred, average='micro')
    return precision, recall, f1


In [13]:
# Example Recommendations
print("User-based recommendations:")
print(get_user_based_recommendations(user_id=1))

User-based recommendations:
title
Flash Gordon (1980)           5.0
General, The (1926)           5.0
Alien Nation (1988)           5.0
Godfather, The (1972)         5.0
Waiting for Guffman (1996)    5.0
dtype: float64


In [14]:
print("Item-based recommendations:")
print(get_item_based_recommendations(movie_title='Toy Story (1995)'))


Item-based recommendations:
title
Toy Story 2 (1999)                           0.572601
Jurassic Park (1993)                         0.565637
Independence Day (a.k.a. ID4) (1996)         0.564262
Star Wars: Episode IV - A New Hope (1977)    0.557388
Forrest Gump (1994)                          0.547096
Name: Toy Story (1995), dtype: float64


In [15]:
print("Hybrid recommendations:")
print(hybrid_recommendation(user_id=1, movie_title='Toy Story (1995)'))

Hybrid recommendations:
title
Flash Gordon (1980)           5.0
General, The (1926)           5.0
Alien Nation (1988)           5.0
Godfather, The (1972)         5.0
Waiting for Guffman (1996)    5.0
dtype: float64


In [16]:
# Example Evaluation
y_true = [1, 1, 0, 1, 0]  # Ground truth (actual liked movies)
y_pred = [1, 0, 1, 1, 1]  # Predicted recommendations
precision, recall, f1 = evaluate_recommendations(y_true, y_pred)
print(f"Precision: {precision}, Recall: {recall}, F1-score: {f1}")

Precision: 0.4, Recall: 0.4, F1-score: 0.4
