In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from ipywidgets import*

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
movies=pd.read_csv('/content/drive/MyDrive/movie/movies.csv')
ratings=pd.read_csv('/content/drive/MyDrive/movie/ratings.csv')

In [4]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523


In [6]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10329 entries, 0 to 10328
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  10329 non-null  int64 
 1   title    10329 non-null  object
 2   genres   10329 non-null  object
dtypes: int64(1), object(2)
memory usage: 242.2+ KB


In [7]:
movies.shape

(10329, 3)

In [8]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105339 entries, 0 to 105338
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   userId     105339 non-null  int64  
 1   movieId    105339 non-null  int64  
 2   rating     105339 non-null  float64
 3   timestamp  105339 non-null  int64  
dtypes: float64(1), int64(3)
memory usage: 3.2 MB


In [9]:
ratings.shape

(105339, 4)

In [10]:
ratings.describe()

Unnamed: 0,userId,movieId,rating,timestamp
count,105339.0,105339.0,105339.0,105339.0
mean,364.924539,13381.312477,3.51685,1130424000.0
std,197.486905,26170.456869,1.044872,180266000.0
min,1.0,1.0,0.5,828565000.0
25%,192.0,1073.0,3.0,971100800.0
50%,383.0,2497.0,3.5,1115154000.0
75%,557.0,5991.0,4.0,1275496000.0
max,668.0,149532.0,5.0,1452405000.0


In [11]:
movies['genres'].str.split("|")

Unnamed: 0,genres
0,"[Adventure, Animation, Children, Comedy, Fantasy]"
1,"[Adventure, Children, Fantasy]"
2,"[Comedy, Romance]"
3,"[Comedy, Drama, Romance]"
4,[Comedy]
...,...
10324,"[Animation, Children, Comedy]"
10325,[Comedy]
10326,[Comedy]
10327,[Drama]


In [12]:
movies['genres']=movies['genres'].str.split("|")

In [13]:
movies2=movies.explode('genres')

In [14]:
movies2.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure
0,1,Toy Story (1995),Animation
0,1,Toy Story (1995),Children
0,1,Toy Story (1995),Comedy
0,1,Toy Story (1995),Fantasy


In [15]:
movies2=movies2[movies2['genres']!='(no genres listed)']

In [16]:
merged_data=pd.merge(ratings,movies2,on=['movieId'],how='inner')
merged_data

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,16,4.0,1217897793,Casino (1995),Crime
1,1,16,4.0,1217897793,Casino (1995),Drama
2,1,24,1.5,1217895807,Powder (1995),Drama
3,1,24,1.5,1217895807,Powder (1995),Sci-Fi
4,1,32,4.0,1217896246,Twelve Monkeys (a.k.a. 12 Monkeys) (1995),Mystery
...,...,...,...,...,...,...
281892,668,143385,4.0,1446388585,Bridge of Spies (2015),Drama
281893,668,143385,4.0,1446388585,Bridge of Spies (2015),Thriller
281894,668,144976,2.5,1448656898,Bone Tomahawk (2015),Horror
281895,668,144976,2.5,1448656898,Bone Tomahawk (2015),Western


In [17]:
popularity=merged_data.groupby(['genres','title']).agg({'rating':['mean','size']}).reset_index()
popularity.columns=["Genres","Title","Average Ratings","Number of Ratings"]
popularity

Unnamed: 0,Genres,Title,Average Ratings,Number of Ratings
0,Action,'71 (2014),3.500000,1
1,Action,'Hellboy': The Seeds of Creation (2004),3.000000,1
2,Action,10 to Midnight (1983),2.500000,1
3,Action,12 Rounds (2009),2.875000,4
4,Action,13 Assassins (Jûsan-nin no shikaku) (2010),3.500000,5
...,...,...,...,...
23093,Western,Wyatt Earp (1994),3.200000,30
23094,Western,Young Guns (1988),3.375000,36
23095,Western,Young Guns II (1990),3.083333,12
23096,Western,Young Ones (2014),2.000000,1


In [18]:
def TopNPopularMovies(genre,threshhold,topN):
    popularity=merged_data.groupby(['genres','title']).agg({'rating':['mean','size']}).reset_index()
    popularity.columns=["Genres","Title","Average_Ratings","Number_of_Ratings"]
    #filter
    topNrecommendations=popularity[(popularity['Genres']==genre)&(popularity['Number_of_Ratings']>=threshhold)].sort_values(by='Average_Ratings',ascending=False).head(topN)
    #output
    topNrecommendations['Sno.']=range(1,len(topNrecommendations)+1)
    topNrecommendations.index=range(0,len(topNrecommendations))
    topNrecommendations.columns=['Genres','Movie Title','Average Movie Rating','Number of Reviews','Sno.']
    return topNrecommendations[['Sno.','Movie Title','Average Movie Rating','Number of Reviews']]

In [None]:
TopNPopularMovies("Action",50,10)

Unnamed: 0,Sno.,Movie Title,Average Movie Rating,Number of Reviews
0,1,Princess Mononoke (Mononoke-hime) (1997),4.384615,52
1,2,North by Northwest (1959),4.273973,73
2,3,"Matrix, The (1999)",4.264368,261
3,4,Star Wars: Episode V - The Empire Strikes Back...,4.22807,228
4,5,Seven Samurai (Shichinin no samurai) (1954),4.217742,62
5,6,Raiders of the Lost Ark (Indiana Jones and the...,4.212054,224
6,7,Inception (2010),4.18932,103
7,8,Star Wars: Episode IV - A New Hope (1977),4.188645,273
8,9,Fight Club (1999),4.188406,207
9,10,Blade Runner (1982),4.169872,156


In [19]:


#contentbased



In [20]:
movies3=movies2.groupby('title').agg({"genres":lambda x:" ".join(list(x))}).reset_index()

In [21]:
movies3.head()

Unnamed: 0,title,genres
0,'71 (2014),Action Drama Thriller War
1,'Hellboy': The Seeds of Creation (2004),Action Adventure Comedy Documentary Fantasy
2,'Round Midnight (1986),Drama Musical
3,'Til There Was You (1997),Drama Romance
4,"'burbs, The (1989)",Comedy


In [22]:
tf=TfidfVectorizer(analyzer='word',ngram_range=(1,3),stop_words='english')
tf

In [23]:
tf_matrix=tf.fit_transform(movies3['genres'])


In [24]:
cosine_sim=cosine_similarity(tf_matrix,tf_matrix)

In [25]:
cosine_sim

array([[1.        , 0.02677945, 0.02931913, ..., 0.10229517, 0.        ,
        0.        ],
       [0.02677945, 1.        , 0.        , ..., 0.03626651, 0.02411583,
        0.02863994],
       [0.02931913, 0.        , 1.        , ..., 0.        , 0.        ,
        0.35526663],
       ...,
       [0.10229517, 0.03626651, 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.02411583, 0.        , ..., 0.        , 1.        ,
        0.07090711],
       [0.        , 0.02863994, 0.35526663, ..., 0.        , 0.07090711,
        1.        ]])

In [26]:
indices=pd.Series(movies3.index,index=movies3['title'])
indices

Unnamed: 0_level_0,0
title,Unnamed: 1_level_1
'71 (2014),0
'Hellboy': The Seeds of Creation (2004),1
'Round Midnight (1986),2
'Til There Was You (1997),3
"'burbs, The (1989)",4
...,...
loudQUIETloud: A Film About the Pixies (2006),10315
xXx (2002),10316
xXx: State of the Union (2005),10317
¡Three Amigos! (1986),10318


In [27]:
index=indices["'71 (2014)"]

In [28]:
scores=list(enumerate(cosine_sim[index]))

In [29]:
scores=[(index, float(value)) for index, value in scores]

In [30]:
matched=sorted(scores,key=lambda x:x[1],reverse=True)[:5]

In [31]:
matched=[i[0]for i in matched]

In [32]:
movies3.iloc[matched]

Unnamed: 0,title,genres
0,'71 (2014),Action Drama Thriller War
596,Army of Shadows (L'armée des ombres) (1969),Action Drama Thriller War
858,Battle Royale 2: Requiem (Batoru rowaiaru II: ...,Action Drama Thriller War
3839,Green Zone (2010),Action Drama Thriller War
4407,"Hurt Locker, The (2008)",Action Drama Thriller War


In [33]:
def recommendation_genre(movie_df,similarity_matrix,movie_title,topN):

    indices=pd.Series(movies3.index,index=movies3['title'])

    index=indices[movie_title]

    cosine_scores=list(enumerate(similarity_matrix[index]))
    cosine_scores=sorted(cosine_scores,key=lambda x:x[1],reverse=True)[1:topN+2]
    matched=[i[0]for i in cosine_scores]
    matching_df=movie_df.iloc[matched]
    matching_df=matching_df[matching_df['title']!=movie_title]

    matching_df.rename(columns={'title':'Movie Title'},inplace=True)
    matching_df['Sno.']=range(1,len(matching_df)+1)
    matching_df.index=range(0,len(matching_df))
    return matching_df[['Sno.','Movie Title']].head(topN)

In [34]:

#interactive widgets


In [35]:
genres=Dropdown(options=list(set(movies2['genres'])),description="Genre",style={"description_width":"initial"})
num_reviews=IntText(description="Minimum Reviews",style={"description_width":"initial"})
num_recommendations_1=IntText(description="Number of Recommendation",style={"description_width":"initial"})

b1=Button(description="RECOMMEND ME",style={"description_width":"initial"})
h1=HBox([num_reviews,num_recommendations_1])
popularity_tab=VBox([genres,h1,b1])



#content based

title=Textarea(description="Movie Title",style={"description_width":"initial"})
num_recommendations_2=IntText(description="Number of Recommendation",style={"description_width":"initial"})

h2=HBox([title,num_recommendations_2])
b2=Button(description="RECOMMEND ME",style={"description_width":"initial"})
content_tab=VBox([h2,b2])


#final tabs

tabs=[popularity_tab,content_tab]
wid=widgets.Tab(tabs)

names=['Popularity Based Recommendations','Content Based Recommendations']
[wid.set_title(i,title)for i,title in enumerate(names)]

display(wid)

Tab(children=(VBox(children=(Dropdown(description='Genre', options=('Musical', 'Film-Noir', 'Documentary', 'Sc…

In [41]:
#event handling

def b1_clicked(b):
    global output
    output=TopNPopularMovies(genre=genres.value,threshhold=num_reviews.value,topN=num_recommendations_1.value)
b1.on_click(b1_clicked)

def b2_clicked(b):
    global output
    result=recommendation_genre(movie_df=movies3,similarity_matrix=cosine_sim,movie_title=title.value,topN=num_recommendations_2.value)
    output=result
b2.on_click(b2_clicked)

In [45]:
display(wid)

Tab(children=(VBox(children=(Dropdown(description='Genre', index=11, options=('Musical', 'Film-Noir', 'Documen…

In [51]:
output

Unnamed: 0,Sno.,Movie Title,Average Movie Rating,Number of Reviews
0,1,Citizen Kane (1941),4.396104,77
1,2,Rear Window (1954),4.331081,74
2,3,"Usual Suspects, The (1995)",4.328947,228
3,4,Chinatown (1974),4.323529,68
4,5,"Third Man, The (1949)",4.291667,36
5,6,North by Northwest (1959),4.273973,73
6,7,"Big Sleep, The (1946)",4.267857,28
