In [None]:
#video-Link: https://youtu.be/eyEabQRBMQA?feature=shared

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

In [3]:
movies=pd.read_csv('movies.csv')

In [4]:
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
62418,209157,We (2018),Drama
62419,209159,Window of the Soul (2001),Documentary
62420,209163,Bad Poems (2018),Comedy|Drama
62421,209169,A Girl Thing (2001),(no genres listed)


In [5]:
import re

def clean_title(title):
    return re.sub("[^a-zA-Z0-9 ]","",title)

In [6]:
movies['clean_title']=movies['title'].apply(clean_title)

In [7]:
movies

Unnamed: 0,movieId,title,genres,clean_title
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,Toy Story 1995
1,2,Jumanji (1995),Adventure|Children|Fantasy,Jumanji 1995
2,3,Grumpier Old Men (1995),Comedy|Romance,Grumpier Old Men 1995
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,Waiting to Exhale 1995
4,5,Father of the Bride Part II (1995),Comedy,Father of the Bride Part II 1995
...,...,...,...,...
62418,209157,We (2018),Drama,We 2018
62419,209159,Window of the Soul (2001),Documentary,Window of the Soul 2001
62420,209163,Bad Poems (2018),Comedy|Drama,Bad Poems 2018
62421,209169,A Girl Thing (2001),(no genres listed),A Girl Thing 2001


In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer=TfidfVectorizer(ngram_range=(1,2))
tfid=vectorizer.fit_transform(movies['clean_title'])

In [19]:
from sklearn.metrics.pairwise import cosine_similarity

def search(title):
    title=clean_title(title)
    query=vectorizer.transform([title])
    similarity=cosine_similarity(query,tfid).flatten()
    indices=np.argpartition(similarity,-5)[-5:]
    results=movies.iloc[indices][::-1]
    return results

In [20]:
search("India")

Unnamed: 0,movieId,title,genres,clean_title
43290,165693,I for India (2005),Documentary,I for India 2005
34742,146156,Mr India (1987),Action|Adventure|Sci-Fi,Mr India 1987
13868,71760,India Song (1975),Drama|Fantasy|Romance,India Song 1975
10461,41650,Mother India (1957),Drama|Musical,Mother India 1957
28724,132340,The Dead 2: India (2013),Horror,The Dead 2 India 2013


In [66]:
import ipywidgets as widgets
from IPython.display import display

In [26]:
ratings=pd.read_csv('ratings.csv')

In [27]:
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,5.0,1147880044
1,1,306,3.5,1147868817
2,1,307,5.0,1147868828
3,1,665,5.0,1147878820
4,1,899,3.5,1147868510
...,...,...,...,...
25000090,162541,50872,4.5,1240953372
25000091,162541,55768,2.5,1240951998
25000092,162541,56176,2.0,1240950697
25000093,162541,58559,4.0,1240953434


In [29]:
ratings.dtypes

userId         int64
movieId        int64
rating       float64
timestamp      int64
dtype: object

In [64]:
def find_similar_movies(movie_id):
    #finding recommendations from similar to us
    similar_users=ratings[(ratings['movieId'] == movie_id) & (ratings['rating']>4)]['userId'].unique()
    similar_user_recs=ratings[(ratings['userId'].isin(similar_users)) & (ratings['rating']>4)]['movieId']
    
    #adjusting and retrieving top 10%
    similar_user_recs=similar_user_recs.value_counts()/len(similar_users)
    similar_user_recs=similar_user_recs[similar_user_recs>.1]
    
    #recommendation from all of users
    all_users=ratings[(ratings['movieId'].isin(similar_user_recs.index)) & (ratings['rating']>4)]
    all_user_recs=all_users['movieId'].value_counts()/len(all_users['userId'].unique())
    
    #creating score by concatinating both
    rec_percentages=pd.concat([similar_user_recs,all_user_recs],axis=1)
    rec_percentages.columns=['similar','all']
    rec_percentages['score']=rec_percentages['similar']/rec_percentages['all']
    
    #sorting the score
    rec_percentages=rec_percentages.sort_values('score',ascending=False)
    
    #merge top 10 with movies and return required columns
    return rec_percentages.head(10).merge(movies,left_index=True,right_on='movieId')[['score','title','genres']]

In [65]:
movie_widget=widgets.Text(
    value="Default",
    description="Movie Title:",
    disabled=False
)

recommendation_list=widgets.Output()

def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        title=data["new"]
        if len(title)>5:
            results=search(title)
            movie_id=results.iloc[0]['movieId']
            display(find_similar_movies(movie_id))

movie_widget.observe(on_type, names='value')
display(movie_widget,recommendation_list)

Text(value='Default', description='Movie Title:')

Output()