# Movie Recommendation System 

### Importing the Libraries

In [62]:
import gradio as gr
import numpy as np
import pandas as pd

### Importing the Movies Dataset

In [63]:
movies = pd.read_csv("movies.csv")

In [64]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


### Cleaning the Data (Removing Punctuations using Regular expression Library)

In [65]:
import re

def clean_title(title):
    title = re.sub("[^a-zA-Z0-9 ]", "", title)
    return title



In [66]:
movies["clean_title"] = movies["title"].apply(clean_title)

In [67]:
movies

Unnamed: 0,movieId,title,genres,clean_title
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,Toy Story 1995
1,2,Jumanji (1995),Adventure|Children|Fantasy,Jumanji 1995
2,3,Grumpier Old Men (1995),Comedy|Romance,Grumpier Old Men 1995
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,Waiting to Exhale 1995
4,5,Father of the Bride Part II (1995),Comedy,Father of the Bride Part II 1995
...,...,...,...,...
62418,209157,We (2018),Drama,We 2018
62419,209159,Window of the Soul (2001),Documentary,Window of the Soul 2001
62420,209163,Bad Poems (2018),Comedy|Drama,Bad Poems 2018
62421,209169,A Girl Thing (2001),(no genres listed),A Girl Thing 2001


In [68]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(ngram_range=(1,2))

tfidf = vectorizer.fit_transform(movies["clean_title"])

## Creating Search Engine for our Recommendation System

In [69]:
from sklearn.metrics.pairwise import cosine_similarity

def search(title):
    title = clean_title(title)
    query_vec = vectorizer.transform([title])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -5)[-5:]
    results = movies.iloc[indices].iloc[::-1]
    
    return results

### Creating Interface for the Search Bar

In [83]:
gg=gr.Interface(fn=search,inputs=gr.components.Textbox(lines=1,placeholder="Enter the Name of the Movie....."),
                outputs=gr.components.Dataframe(headers=['movieId','title','genres','clean_title'])
               ,examples=['Toy Story','Avengers','The Accountant','Thor','The Dictator']
               ,title='Search Bar 😺😺😺',live=True)
gg.launch()

Running on local URL:  http://127.0.0.1:7888

To create a public link, set `share=True` in `launch()`.




### Importing the ratings Dataset

In [72]:
ratings = pd.read_csv("ratings.csv")

In [73]:
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,5.0,1147880044
1,1,306,3.5,1147868817
2,1,307,5.0,1147868828
3,1,665,5.0,1147878820
4,1,899,3.5,1147868510
...,...,...,...,...
25000090,162541,50872,4.5,1240953372
25000091,162541,55768,2.5,1240951998
25000092,162541,56176,2.0,1240950697
25000093,162541,58559,4.0,1240953434


In [74]:
ratings.dtypes

userId         int64
movieId        int64
rating       float64
timestamp      int64
dtype: object

### Creating a function for finding similar movies liked by other users.

In [75]:
def find_similar_movies(movie_id):
    similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] > 4)]["userId"].unique()
    similar_user_recs = ratings[(ratings["userId"].isin(similar_users)) & (ratings["rating"] > 4)]["movieId"]
    similar_user_recs = similar_user_recs.value_counts() / len(similar_users)

    similar_user_recs = similar_user_recs[similar_user_recs > .10]
    all_users = ratings[(ratings["movieId"].isin(similar_user_recs.index)) & (ratings["rating"] > 4)]
    all_user_recs = all_users["movieId"].value_counts() / len(all_users["userId"].unique())
    rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)
    rec_percentages.columns = ["similar", "all"]
    
    rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]
    rec_percentages = rec_percentages.sort_values("score", ascending=False)
    return rec_percentages.head(10).merge(movies, left_index=True, right_on="movieId")[["score", "title", "genres"]]

In [76]:
find_similar_movies(162606)

Unnamed: 0,score,title,genres
41860,232.470046,The Accountant (2016),Crime|Drama|Thriller
40872,72.991914,Jason Bourne (2016),Action
47952,69.550465,The Hitman's Bodyguard (2017),Action|Comedy
41190,65.329174,War Dogs (2016),Comedy
39656,63.909291,Snowden (2016),Drama|Thriller
21755,57.751993,"Equalizer, The (2014)",Action|Crime|Thriller
16116,49.236283,"Mechanic, The (2011)",Action|Drama|Thriller
27490,48.362563,Focus (2015),Comedy|Crime|Drama|Romance
19020,47.541535,Jack Reacher (2012),Action|Crime|Thriller
41117,45.504775,Sully (2016),Drama


In [77]:
def on_type(title):
    if len(title) > 5:
        results = search(title)
        movie_id = results.iloc[0]["movieId"]
        ans=find_similar_movies(movie_id)
        return ans
#Function for returning the results.

## Creating Interface for the Movie Recommendation System

In [81]:
recom=gr.Interface(fn=on_type,inputs=gr.components.Textbox(lines=1,placeholder="Enter the Name of the Movie....."),
                outputs=gr.components.Dataframe(headers=['score','title','genres'])
               ,examples=['Toy Story','John Wick','The Accountant','Evil Dead','The Dictator']
               ,title='Movie Recommendation System 😺😺😺',live=True)
recom.launch(share=True)

Running on local URL:  http://127.0.0.1:7886

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


