In [8]:
%%capture
! pip install tensorflow streamlit

# Introduction

Item-based recommender systems predict a user's preference for an item based on the similarity between items. They analyze past interactions to recommend items similar to those the user liked before. These systems use similarity metrics like cosine similarity or Pearson correlation and are common in e-commerce and streaming services to provide personalized suggestions and enhance user experience.

# Setup

In [9]:
import tensorflow as tf
import streamlit as st
import pandas as pd
import numpy as np
import pickle

In [10]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

# Loading Data Sets

In [11]:
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

In [12]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [13]:
titles=movies['title'].unique()
len(titles)

9737

In [14]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


# Mergeing movies and ratings

In [15]:
movies_ratings = pd.merge( movies,ratings, on='movieId')
movies_ratings.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0,964982703
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,847434962
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5,1106635946
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5,1510577970
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5,1305696483


In [16]:
utility_matrix = movies_ratings.pivot_table(index='userId',columns='movieId',values='rating').fillna(0)
utility_matrix.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
scaler=MinMaxScaler()
utility_matrix_scaled=scaler.fit_transform(utility_matrix)

In [18]:
item_item_similarity=cosine_similarity(utility_matrix_scaled.T)

In [19]:
similarity_df=pd.DataFrame(item_item_similarity,index=utility_matrix.columns,columns=utility_matrix.columns)
similarity_df.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.410562,0.296917,0.035573,0.308762,0.376316,0.277491,0.131629,0.232586,0.395573,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.410562,1.0,0.282438,0.106415,0.287795,0.297009,0.228576,0.172498,0.044835,0.417693,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.296917,0.282438,1.0,0.092406,0.417802,0.284257,0.402831,0.313434,0.30484,0.242954,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.035573,0.106415,0.092406,1.0,0.188376,0.089685,0.275035,0.158022,0.0,0.095598,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.308762,0.287795,0.417802,0.188376,1.0,0.298969,0.474002,0.283523,0.335058,0.218061,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
with open('similarity_df.pkl', 'wb') as f:
    pickle.dump(similarity_df, f)

In [21]:
def recommender(movie , similarity_mat , movies_data , k):
  index=movies_data[movies_data['title']==movie].index[0]
  distances = sorted(enumerate(similarity_mat[index]),reverse=True,key = lambda x: x[1])
  recommended_movies = []
  for i in distances[:k]:
    recommended_movies.append(movies_data.iloc[i[0]].title)

  return recommended_movies

In [22]:
# test:
movie = 'Jurassic Park (1993)'
recommended_movies = recommender(movie, similarity_df.values, movies,10)

# Print recommended movies and their posters
for movie in recommended_movies:
    print(f"Recommended Movie: {movie}")
    print()

Recommended Movie: Jurassic Park (1993)

Recommended Movie: Terminator 2: Judgment Day (1991)

Recommended Movie: Forrest Gump (1994)

Recommended Movie: Braveheart (1995)

Recommended Movie: Fugitive, The (1993)

Recommended Movie: Speed (1994)

Recommended Movie: Batman (1989)

Recommended Movie: Independence Day (a.k.a. ID4) (1996)

Recommended Movie: Apollo 13 (1995)

Recommended Movie: True Lies (1994)



 # Get top-K similar movies based on similarity_matrix

In [23]:
def recommend_similar_movies(movie_id, similarity_matrix, K=5):

    similar_movies = similarity_matrix.loc[movie_id].nlargest(K+1).index.tolist()
    similar_movies.remove(movie_id)
    return similar_movies[:K]

# Example usage:
movie_id = 3114
top_k_similar_movies = recommend_similar_movies(movie_id, similarity_df, K=5)
print(f"Top-5 recommended movies similar to movie {movie_id}:")
print(top_k_similar_movies)

Top-5 recommended movies similar to movie 3114:
[2355, 1, 4306, 4886, 3175]


In [24]:
ratings.groupby('movieId')['rating'].mean().reset_index()

Unnamed: 0,movieId,rating
0,1,3.920930
1,2,3.431818
2,3,3.259615
3,4,2.357143
4,5,3.071429
...,...,...
9719,193581,4.000000
9720,193583,3.500000
9721,193585,3.500000
9722,193587,3.500000
