# Movie Recommendation System

### Importing Libraries

In [79]:
import pandas as pd
from math import sqrt
import numpy as np
import random
import matplotlib.pyplot as plt

### Reading the dataset

In [80]:
movies = pd.read_csv(r"movies.dat", sep='::', engine='python', encoding='latin1',names = ['MovieID','Title','Genre'])
ratings = pd.read_csv(r"ratings.dat.zip",sep='::', engine='python',encoding='latin1')
ratings.columns =['UserID', 'MovieID', 'Rating', 'Timestamp']
users = pd.read_csv(r"users.dat",sep='::',engine='python',encoding='latin1')
users.columns =['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code']

In [81]:
movies.head()

Unnamed: 0,MovieID,Title,Genre
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [82]:
users.head()

Unnamed: 0,UserID,Gender,Age,Occupation,Zip-code
0,2,M,56,16,70072
1,3,M,25,15,55117
2,4,M,45,7,2460
3,5,M,25,20,55455
4,6,F,50,9,55117


In [83]:
ratings.head()

Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,1,661,3,978302109
1,1,914,3,978301968
2,1,3408,4,978300275
3,1,2355,5,978824291
4,1,1197,3,978302268


## Data Preprocessing and Data Cleaning:

### Splitting the genres and years from the title.

In [84]:
movies['Year'] = movies.Title.str.extract('(\(\d\d\d\d\))',expand=False)
movies['Year'] = movies.Year.str.extract('(\d\d\d\d)',expand=False)
movies['Title'] = movies.Title.str.replace('(\(\d\d\d\d\))', '')
movies['Title'] = movies['Title'].apply(lambda x: x.strip())
movies['Genre'] = movies.Genre.str.split('|')
movies.head()

Unnamed: 0,MovieID,Title,Genre,Year
0,1,Toy Story (1995),"[Animation, Children's, Comedy]",1995
1,2,Jumanji (1995),"[Adventure, Children's, Fantasy]",1995
2,3,Grumpier Old Men (1995),"[Comedy, Romance]",1995
3,4,Waiting to Exhale (1995),"[Comedy, Drama]",1995
4,5,Father of the Bride Part II (1995),[Comedy],1995


### Encoding the genres

In [85]:
movies_genres = movies.copy()
for i, row in movies.iterrows():
    for genre in row['Genre']:
        movies_genres.at[i, genre] = 1
movies_genres = movies_genres.fillna(0)
movies_genres.head()

Unnamed: 0,MovieID,Title,Genre,Year,Animation,Children's,Comedy,Adventure,Fantasy,Romance,...,Crime,Thriller,Horror,Sci-Fi,Documentary,War,Musical,Mystery,Film-Noir,Western
0,1,Toy Story (1995),"[Animation, Children's, Comedy]",1995,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,Jumanji (1995),"[Adventure, Children's, Fantasy]",1995,0.0,1.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,Grumpier Old Men (1995),"[Comedy, Romance]",1995,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,Waiting to Exhale (1995),"[Comedy, Drama]",1995,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,Father of the Bride Part II (1995),[Comedy],1995,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Content-Based Recommendation System

In [86]:
user = random.choice(ratings['UserID'])
print("User ID: ", user)

User ID:  3323


In [87]:
user_input = ratings[ratings['UserID'] == user].reset_index(drop=True)
user_input

Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,3323,1256,4,968979010
1,3323,2997,3,971546335
2,3323,1259,4,968978986
3,3323,2064,5,968979119
4,3323,1265,4,968979010
...,...,...,...,...
121,3323,1093,3,968978193
122,3323,562,5,968978942
123,3323,1242,4,969059983
124,3323,1244,3,967920079


In [88]:
input_movies = movies[movies['MovieID'].isin(user_input['MovieID'].tolist())]
user_input = pd.merge(input_movies, user_input)
user_input = user_input.drop('Genre',axis= 1).drop('Year', axis=1).drop('UserID',axis= 1)
user_input

Unnamed: 0,MovieID,Title,Rating,Timestamp
0,39,Clueless (1995),4,968979119
1,111,Taxi Driver (1976),5,969059983
2,223,Clerks (1994),4,968978133
3,235,Ed Wood (1994),5,968979078
4,260,Star Wars: Episode IV - A New Hope (1977),4,967912821
...,...,...,...,...
121,3835,"Crush, The (1993)",4,968985680
122,3836,Kelly's Heroes (1970),5,967920117
123,3846,Easy Money (1983),5,968985645
124,3868,"Naked Gun: From the Files of Police Squad!, Th...",4,968985606


### User's Movies Matrix

In [90]:
user_movies = movies_genres[movies_genres['MovieID'].isin(user_input['MovieID'].tolist())]
user_movies

Unnamed: 0,MovieID,Title,Genre,Year,Animation,Children's,Comedy,Adventure,Fantasy,Romance,...,Crime,Thriller,Horror,Sci-Fi,Documentary,War,Musical,Mystery,Film-Noir,Western
38,39,Clueless (1995),"[Comedy, Romance]",1995,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
109,111,Taxi Driver (1976),"[Drama, Thriller]",1976,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
220,223,Clerks (1994),[Comedy],1994,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
232,235,Ed Wood (1994),"[Comedy, Drama]",1994,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
257,260,Star Wars: Episode IV - A New Hope (1977),"[Action, Adventure, Fantasy, Sci-Fi]",1977,0.0,0.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3765,3835,"Crush, The (1993)",[Thriller],1993,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3766,3836,Kelly's Heroes (1970),"[Action, Comedy, War]",1970,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3776,3846,Easy Money (1983),[Comedy],1983,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3798,3868,"Naked Gun: From the Files of Police Squad!, Th...",[Comedy],1988,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [91]:
user_movies = user_movies.reset_index(drop=True)
user_genres = user_movies.drop('MovieID', axis=1).drop('Title',axis= 1).drop('Genre',axis= 1).drop('Year',axis= 1)
user_genres.head()

Unnamed: 0,Animation,Children's,Comedy,Adventure,Fantasy,Romance,Drama,Action,Crime,Thriller,Horror,Sci-Fi,Documentary,War,Musical,Mystery,Film-Noir,Western
0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


### Ratings By Genres

In [92]:
user_profile = user_genres.transpose().dot(user_input['Rating'])
user_profile

Animation        0.0
Children's       9.0
Comedy         327.0
Adventure       29.0
Fantasy          8.0
Romance         61.0
Drama          183.0
Action          53.0
Crime           40.0
Thriller        48.0
Horror          30.0
Sci-Fi          26.0
Documentary      5.0
War             28.0
Musical         16.0
Mystery          3.0
Film-Noir        0.0
Western         15.0
dtype: float64

### Movies Matrix

In [93]:
genre_table = movies_genres.set_index(movies_genres['MovieID'])
genre_table = genre_table.drop('MovieID',axis= 1).drop('Title',axis= 1).drop('Genre', axis=1).drop('Year',axis=1)
genre_table.head()

Unnamed: 0_level_0,Animation,Children's,Comedy,Adventure,Fantasy,Romance,Drama,Action,Crime,Thriller,Horror,Sci-Fi,Documentary,War,Musical,Mystery,Film-Noir,Western
MovieID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Recommendation Matrix

In [94]:
recommendations_df = ((genre_table*user_profile).sum(axis=1))/(user_profile.sum())
recommendations_df.head()

MovieID
1    0.381385
2    0.052213
3    0.440409
4    0.578888
5    0.371169
dtype: float64

In [95]:
recommendations_df = recommendations_df.sort_values(ascending=False)
recommendations_df.head()

MovieID
3266    0.684449
1473    0.684449
1918    0.684449
2002    0.684449
2001    0.684449
dtype: float64

In [96]:
df = movies.loc[~movies['MovieID'].isin(user_input.MovieID)].reset_index(drop=True)
df.loc[df['MovieID'].isin(recommendations_df.head(20).keys())].reset_index(drop=True)

Unnamed: 0,MovieID,Title,Genre,Year
0,195,Something to Talk About (1995),"[Comedy, Drama, Romance]",1995
1,351,"Corrina, Corrina (1994)","[Comedy, Drama, Romance]",1994
2,359,I Like It Like That (1994),"[Comedy, Drama, Romance]",1994
3,496,What Happened Was... (1994),"[Comedy, Drama, Romance]",1994
4,838,Emma (1996),"[Comedy, Drama, Romance]",1996
5,1211,Wings of Desire (Der Himmel über Berlin) (1987),"[Comedy, Drama, Romance]",1987
6,1473,Best Men (1997),"[Action, Comedy, Crime, Drama]",1997
7,1888,Hope Floats (1998),"[Comedy, Drama, Romance]",1998
8,1895,Can't Hardly Wait (1998),"[Comedy, Drama, Romance]",1998
9,1918,Lethal Weapon 4 (1998),"[Action, Comedy, Crime, Drama]",1998
