<a href="https://colab.research.google.com/github/Krish-002/Movie-Recommendation-Engine/blob/main/Movie_Recommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

movies = pd.read_csv("movies.csv")

In [None]:
movies


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
62418,209157,We (2018),Drama
62419,209159,Window of the Soul (2001),Documentary
62420,209163,Bad Poems (2018),Comedy|Drama
62421,209169,A Girl Thing (2001),(no genres listed)


In [None]:
import re
# Removing all special charachters from the title of the movies.
def clean_title(title):
  title = re.sub("[^a-zA-Z0-9 ]", "", title)
  return title



In [None]:
movies["cleaned_title"] = movies["title"].apply(clean_title) # applying it to all titles

In [None]:
movies


Unnamed: 0,movieId,title,genres,cleaned_title
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,Toy Story 1995
1,2,Jumanji (1995),Adventure|Children|Fantasy,Jumanji 1995
2,3,Grumpier Old Men (1995),Comedy|Romance,Grumpier Old Men 1995
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,Waiting to Exhale 1995
4,5,Father of the Bride Part II (1995),Comedy,Father of the Bride Part II 1995
...,...,...,...,...
62418,209157,We (2018),Drama,We 2018
62419,209159,Window of the Soul (2001),Documentary,Window of the Soul 2001
62420,209163,Bad Poems (2018),Comedy|Drama,Bad Poems 2018
62421,209169,A Girl Thing (2001),(no genres listed),A Girl Thing 2001


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(ngram_range=(1,3))

tfidf = vectorizer.fit_transform(movies["cleaned_title"])

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def search(title):
  title = clean_title(title)
  vec = vectorizer.transform([title])
  similarity = cosine_similarity(vec, tfidf).flatten()
  indices = np.argpartition(similarity, -5)[-5:]
  results = movies.iloc[indices].iloc[::-1] # to order it such that the most similar comes on top

  return results


In [None]:
import ipywidgets as widgets
from IPython.display import display

input = widgets.Text(
    value="",
    description="Movie Title: ",
    disabled = False
)

list_of_movies = widgets.Output()
def on_type(data):
    with list_of_movies:
        list_of_movies.clear_output()
        title = data["new"]
        if len(title) > 5:
            display(search(title))

input.observe(on_type, names='value')


display(input, list_of_movies)

Text(value='', description='Movie Title: ')

Output()

In [None]:
rating = pd.read_csv('ratings.csv')

In [None]:
rating

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,5.0,1.147880e+09
1,1,306,3.5,1.147869e+09
2,1,307,5.0,1.147869e+09
3,1,665,5.0,1.147879e+09
4,1,899,3.5,1.147869e+09
...,...,...,...,...
494376,3397,784,2.0,1.058077e+09
494377,3397,785,3.0,1.058076e+09
494378,3397,788,3.5,1.058077e+09
494379,3397,832,2.5,1.058079e+09


In [None]:
rating.dtypes

userId         int64
movieId        int64
rating       float64
timestamp    float64
dtype: object

In [None]:
def find_similar_movies(movie_id):
  movie = movies[movies["movieId"] == movie_id]
  return movie

In [None]:
users_that_like_the_input_movie = rating[(rating["movieId"] == movie_id) & (ratings["rating"] > 4)]["userId"].unique()