In [33]:
import pandas as pd

In [34]:
movies=pd.read_csv('/content/movie.csv')


In [35]:
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
27273,131254,Kein Bund für's Leben (2007),Comedy
27274,131256,"Feuer, Eis & Dosenbier (2002)",Comedy
27275,131258,The Pirates (2014),Adventure
27276,131260,Rentun Ruusu (2001),(no genres listed)


In [36]:
movies.isnull().sum()

Unnamed: 0,0
movieId,0
title,0
genres,0


In [37]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27278 entries, 0 to 27277
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  27278 non-null  int64 
 1   title    27278 non-null  object
 2   genres   27278 non-null  object
dtypes: int64(1), object(2)
memory usage: 639.5+ KB


In [38]:
import re
def clean_title(title):
  return re.sub("[^a-zA-Z0-9]"," ",title)

In [39]:
movies["clean_title"]=movies["title"].apply(clean_title)
movies.head(5)

Unnamed: 0,movieId,title,genres,clean_title
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,Toy Story 1995
1,2,Jumanji (1995),Adventure|Children|Fantasy,Jumanji 1995
2,3,Grumpier Old Men (1995),Comedy|Romance,Grumpier Old Men 1995
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,Waiting to Exhale 1995
4,5,Father of the Bride Part II (1995),Comedy,Father of the Bride Part II 1995


In [40]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer=TfidfVectorizer(ngram_range=(1,2))
tfidf=vectorizer.fit_transform(movies["clean_title"])

In [41]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
def search(title):
  title=clean_title(title)
  query_vec=vectorizer.transform([title])
  similarity=cosine_similarity(query_vec,tfidf).flatten()
  indices=np.argpartition(similarity,-5)[-5:]
  results=movies.iloc[indices][::-1]
  return results

In [42]:
import ipywidgets as widgets
from IPython.display import display

movie_input=widgets.Text(
    value="Toy Story",
    description="Movie Title :",
    disabled=False
)
movie_list=widgets.Output()

def on_type(data):
  with movie_list:
    movie_list.clear_output()
    title=data["new"]
    if len(title)>5:
      display(search(title))

movie_input.observe(on_type,names='value')
display(movie_input,movie_list)




Text(value='Toy Story', description='Movie Title :')

Output()

In [43]:
ratings=pd.read_csv("/content/rating.csv")
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,2005-04-02 23:53:47
1,1,29,3.5,2005-04-02 23:31:16
2,1,32,3.5,2005-04-02 23:33:39
3,1,47,3.5,2005-04-02 23:32:07
4,1,50,3.5,2005-04-02 23:29:40
...,...,...,...,...
20000258,138493,68954,4.5,2009-11-13 15:42:00
20000259,138493,69526,4.5,2009-12-03 18:31:48
20000260,138493,69644,3.0,2009-12-07 18:10:57
20000261,138493,70286,5.0,2009-11-13 15:42:24


In [44]:
ratings.dtypes

Unnamed: 0,0
userId,int64
movieId,int64
rating,float64
timestamp,object


In [45]:
ratings.isnull().sum()

Unnamed: 0,0
userId,0
movieId,0
rating,0
timestamp,0


In [46]:
movie_id=1

In [47]:
similar_users=ratings[(ratings["movieId"]==movie_id) & (ratings["rating"]>4)]["userId"].unique()

In [48]:
similar_users

array([     6,     11,     14, ..., 138473, 138474, 138486])

In [49]:
similar_user_recs=ratings[(ratings["userId"].isin(similar_users))&(ratings["rating"]>4)]["movieId"]

In [50]:
similar_user_recs

Unnamed: 0,movieId
517,1
519,7
520,17
521,52
522,62
...,...
19999505,53000
19999506,53953
19999507,54771
19999509,55282


In [51]:
similar_user_recs=similar_user_recs.value_counts()/len(similar_users)
similar_user_recs=similar_user_recs[similar_user_recs>.1]

In [52]:
similar_user_recs

Unnamed: 0_level_0,count
movieId,Unnamed: 1_level_1
1,1.000000
318,0.426600
260,0.395222
296,0.359822
356,0.340574
...,...
778,0.101869
2502,0.100631
733,0.100631
2804,0.100384


In [53]:
all_users=ratings[(ratings["movieId"].isin(similar_user_recs.index)) & (ratings["rating"]>4)]

In [54]:
all_users_recs=all_users["movieId"].value_counts()/len(all_users["userId"].unique())

In [55]:
all_users_recs

Unnamed: 0_level_0,count
movieId,Unnamed: 1_level_1
318,0.318561
296,0.279770
593,0.229719
527,0.220469
356,0.218255
...,...
1580,0.047004
745,0.044837
551,0.044578
2804,0.042803


In [56]:
rec_percentages=pd.concat([similar_user_recs,all_users_recs],axis=1)
rec_percentages.columns=["similar","all"]

In [57]:
rec_percentages

Unnamed: 0_level_0,similar,all
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1.000000,0.126878
318,0.426600,0.318561
260,0.395222,0.216944
296,0.359822,0.279770
356,0.340574,0.218255
...,...,...
778,0.101869,0.071778
2502,0.100631,0.059913
733,0.100631,0.052768
2804,0.100384,0.042803


In [58]:
rec_percentages["score"]=rec_percentages["similar"]/rec_percentages["all"]
rec_percentages=rec_percentages.sort_values("score",ascending=False)
rec_percentages

Unnamed: 0_level_0,similar,all,score
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1.000000,0.126878,7.881607
3114,0.269031,0.055484,4.848761
2355,0.116351,0.027781,4.188078
4886,0.189256,0.057903,3.268505
6377,0.189751,0.060918,3.114850
...,...,...,...
296,0.359822,0.279770,1.286134
858,0.256220,0.200352,1.278850
2858,0.222057,0.174455,1.272863
4973,0.127243,0.102221,1.244783


In [59]:
rec_percentages.head(10).merge(movies,left_index=True,right_on="movieId")

Unnamed: 0,similar,all,score,movieId,title,genres,clean_title
0,1.0,0.126878,7.881607,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,Toy Story 1995
3027,0.269031,0.055484,4.848761,3114,Toy Story 2 (1999),Adventure|Animation|Children|Comedy|Fantasy,Toy Story 2 1999
2270,0.116351,0.027781,4.188078,2355,"Bug's Life, A (1998)",Adventure|Animation|Children|Comedy,Bug s Life A 1998
4790,0.189256,0.057903,3.268505,4886,"Monsters, Inc. (2001)",Adventure|Animation|Children|Comedy|Fantasy,Monsters Inc 2001
6271,0.189751,0.060918,3.11485,6377,Finding Nemo (2003),Adventure|Animation|Children|Comedy,Finding Nemo 2003
582,0.212774,0.071731,2.96628,588,Aladdin (1992),Adventure|Animation|Children|Comedy|Musical,Aladdin 1992
8278,0.160045,0.054511,2.936018,8961,"Incredibles, The (2004)",Action|Adventure|Animation|Children|Comedy,Incredibles The 2004
589,0.18839,0.067389,2.795573,595,Beauty and the Beast (1991),Animation|Children|Fantasy|Musical|Romance|IMAX,Beauty and the Beast 1991
360,0.231279,0.08464,2.732495,364,"Lion King, The (1994)",Adventure|Animation|Children|Drama|Musical|IMAX,Lion King The 1994
1052,0.151133,0.057463,2.630074,1073,Willy Wonka & the Chocolate Factory (1971),Children|Comedy|Fantasy|Musical,Willy Wonka the Chocolate Factory 1971


In [60]:
def find_similar_movies(movie_id):
  similar_users=ratings[(ratings["movieId"]==movie_id)&(ratings["rating"]>4)]["userId"].unique()
  similar_user_recs=ratings[(ratings["userId"].isin(similar_users)) & (ratings["rating"]>4)]["movieId"]

  similar_user_recs=similar_user_recs.value_counts()/len(similar_users)
  similar_user_recs=similar_user_recs[similar_user_recs>0.10]

  all_users=ratings[(ratings["movieId"].isin(similar_user_recs.index)) & (ratings["rating"] >4)]
  all_users_recs=all_users["movieId"].value_counts() / len(all_users["userId"].unique())

  rec_percentages=pd.concat([similar_user_recs,all_users_recs],axis=1)
  rec_percentages.columns=["similar","all"]

  rec_percentages["score"]=rec_percentages["similar"]/rec_percentages["all"]

  rec_percentages=rec_percentages.sort_values("score",ascending=False)
  return rec_percentages.head(10).merge(movies,left_index=True, right_on="movieId")[["score","title","genres"]]

In [61]:
movie_name_input=widgets.Text(
    value="Toy Story",
    description="Movie Title :",
    disabled=False
)

recommendation_list=widgets.Output()

def on_type(data):
  with recommendation_list:
    recommendation_list.clear_output()
    title=data["new"]
    if len(title)>5:
      results=search(title)
      movie_id=results.iloc[0]["movieId"]
      display(find_similar_movies(movie_id))

movie_name_input.observe(on_type,names="value")

display(movie_name_input, recommendation_list)

Text(value='Toy Story', description='Movie Title :')

Output()