<a href="https://colab.research.google.com/github/Skmohanty11/Movie_Recommendation_System/blob/main/Movie_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#IMPORTING MODULES
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [2]:
#LOADING DATASETS
ratings_data = pd.read_csv('Dataset.csv')
titles_data = pd.read_csv('Movie_Id_Titles.csv')

In [3]:
#MERGING DATASETS
movie_data = pd.merge(ratings_data, titles_data, on='item_id')

In [4]:
#STRUCTURE OF DATASET
movie_data.head()

Unnamed: 0,user_id,item_id,rating,timestamp,title
0,0,50,5,881250949,Star Wars (1977)
1,290,50,5,880473582,Star Wars (1977)
2,79,50,4,891271545,Star Wars (1977)
3,2,50,5,888552084,Star Wars (1977)
4,8,50,5,879362124,Star Wars (1977)


In [5]:
#SHAPE OF DATASET
movie_data.shape

(100003, 5)

In [6]:
#INFORMATION OF DATASET
movie_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100003 entries, 0 to 100002
Data columns (total 5 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   user_id    100003 non-null  int64 
 1   item_id    100003 non-null  int64 
 2   rating     100003 non-null  int64 
 3   timestamp  100003 non-null  int64 
 4   title      100003 non-null  object
dtypes: int64(4), object(1)
memory usage: 4.6+ MB


In [7]:
#CALCULATING AVERAGE RATING FOR EACH MOVIE
ratings = pd.DataFrame(movie_data.groupby('title')['rating'].mean())
ratings['num_of_ratings'] = movie_data.groupby('title')['rating'].count()

In [8]:
#CHECKING AVERAGE RATING DATAFRAME
ratings.head()

Unnamed: 0_level_0,rating,num_of_ratings
title,Unnamed: 1_level_1,Unnamed: 2_level_1
'Til There Was You (1997),2.333333,9
1-900 (1994),2.6,5
101 Dalmatians (1996),2.908257,109
12 Angry Men (1957),4.344,125
187 (1997),3.02439,41


In [9]:
#CREATING PIVOT TABLE
user_ratings = movie_data.pivot_table(index='user_id', columns='title', values='rating')
user_ratings.head()

title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,,,,,,,,,,...,,,,,,,,,,
1,,,2.0,5.0,,,3.0,4.0,,,...,,,,5.0,3.0,,,,4.0,
2,,,,,,,,,1.0,,...,,,,,,,,,,
3,,,,,2.0,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,


In [10]:
#SPARSE MATRIX
user_ratings = user_ratings.fillna(0)

In [11]:
#CALCULATE SIMILARITIES
movies_similarity = cosine_similarity(user_ratings)

MANUAL TESTING

In [12]:
#FUNCTION WHICH RECOMMENDS MOVIES BASED ON A MOVIE NAME THE USER ENTERS
def recommend_movies(movie):
    movie_name = movie.split('(')[0].strip()
    movie_name = movie_name.lower()
    index = titles_data['title'].str.lower().str.contains(movie_name).idxmax()
    similarity = list(enumerate(movies_similarity[index]))
    sort = sorted(similarity, key=lambda x: x[1], reverse=True)
    top_movies = [titles_data['title'][i[0]] for i in sort[1:6]]
    return top_movies

In [15]:
#TEST CASE
title_name = input('Enter Movie Title Name : ')
recommend_movies(title_name)

Enter Movie Title Name : The Dark knight


['Madness of King George, The (1994)',
 'Mary Reilly (1996)',
 'Circle of Friends (1995)',
 'Alien (1979)',
 'Tales from the Hood (1995)']