<a href="https://colab.research.google.com/github/AparajitaM05/AparajitaM05/blob/main/movie_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
data = pd.read_csv('movies.csv');
data.shape

(9125, 3)

In [3]:
data.head(5)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
rating = pd.read_csv('ratings.csv')

In [5]:
rating.shape

(100836, 4)

In [6]:
rating.head(5)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [7]:
data = pd.merge(data,rating, on='movieId', how='inner')

In [8]:
data.head(5)

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0,964982703
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,847434962
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5,1106635946
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5,1510577970
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5,1305696483


In [11]:
#let's drop movieId, UserId and timestamp
data = data.drop(['movieId','userId','timestamp'], axis=1)

In [12]:
data.head(5)

Unnamed: 0,title,genres,rating
0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,4.0
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,4.0
2,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,4.5
3,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,2.5
4,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,4.5


In [15]:
data = pd.pivot_table(data, index=['title','genres'], aggfunc='mean')
data.reset_index(level = ['title','genres'], inplace=True)
data.head(5)

Unnamed: 0,title,genres,rating
0,'Hellboy': The Seeds of Creation (2004),Action|Adventure|Comedy|Documentary|Fantasy,4.0
1,'Round Midnight (1986),Drama|Musical,3.5
2,'Salem's Lot (2004),Drama|Horror|Mystery|Thriller,5.0
3,'Til There Was You (1997),Drama|Romance,4.0
4,"'burbs, The (1989)",Comedy,3.176471


In [18]:
#As title has year of release as well let's extract it
data['y']= data['title'].str.split(' ')
data['year'] = data['y'].apply(lambda x:x[-1])
data = data.drop(['y'], axis=1)
data.head(5)

Unnamed: 0,title,genres,rating,year
0,'Hellboy': The Seeds of Creation (2004),Action|Adventure|Comedy|Documentary|Fantasy,4.0,(2004)
1,'Round Midnight (1986),Drama|Musical,3.5,(1986)
2,'Salem's Lot (2004),Drama|Horror|Mystery|Thriller,5.0,(2004)
3,'Til There Was You (1997),Drama|Romance,4.0,(1997)
4,"'burbs, The (1989)",Comedy,3.176471,(1989)


In [19]:
#removing the opening and closing brackets
data['year'] = data['year'].str.strip(')')
data['year'] = data['year'].str.strip('(')

In [20]:
data.head(5)

Unnamed: 0,title,genres,rating,year
0,'Hellboy': The Seeds of Creation (2004),Action|Adventure|Comedy|Documentary|Fantasy,4.0,2004
1,'Round Midnight (1986),Drama|Musical,3.5,1986
2,'Salem's Lot (2004),Drama|Horror|Mystery|Thriller,5.0,2004
3,'Til There Was You (1997),Drama|Romance,4.0,1997
4,"'burbs, The (1989)",Comedy,3.176471,1989


In [21]:
# removing the bad values
data['year'] = data['year'].replace(('2007-','Despot','Things','1975-1979','Road',''),('2007','2016','2016','1979','2002','1996'))
data['year'] = data['year'].astype(int)
data.head()

Unnamed: 0,title,genres,rating,year
0,'Hellboy': The Seeds of Creation (2004),Action|Adventure|Comedy|Documentary|Fantasy,4.0,2004
1,'Round Midnight (1986),Drama|Musical,3.5,1986
2,'Salem's Lot (2004),Drama|Horror|Mystery|Thriller,5.0,2004
3,'Til There Was You (1997),Drama|Romance,4.0,1997
4,"'burbs, The (1989)",Comedy,3.176471,1989


In [22]:
# lets convert the year column into Integer data type
data['year'] = data['year'].astype(int)

In [23]:
#remove the year from title
data['title'] = data['title'].str.split(' ')
data['title'] = data['title'].apply(lambda x: ' '.join(x[:-1]))

In [24]:
data['genres']

0       Action|Adventure|Comedy|Documentary|Fantasy
1                                     Drama|Musical
2                     Drama|Horror|Mystery|Thriller
3                                     Drama|Romance
4                                            Comedy
                           ...                     
7067                         Action|Sci-Fi|Thriller
7068                          Action|Crime|Thriller
7069                          Action|Crime|Thriller
7070                                 Comedy|Western
7071                                 Comedy|Musical
Name: genres, Length: 7072, dtype: object

In [26]:
from ipywidgets import interact

In [27]:
# lets create an Interactive Function to get the List of Best Movies from Each Genre

@interact
def genre(Genre = ['Action', 'Adventure', 'Animation','Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
       'Film-Noir', 'Horror', 'IMAX', 'Musical', 'Mystery', 'Romance','Sci-Fi', 'Thriller', 'War', 'Western'], year = 2000):
    x = data['genres'].str.split('|')
    d = data.drop(['genres'], axis = 1)
    x = pd.concat([d, x], axis = 1)
    x = x.explode('genres')
    x= x[(x['genres'] == Genre) & (x['year'] >= year)][['title', 'rating', 'year']].sort_values(by = 'rating',
                            ascending = False).reset_index(drop = True).head(10)
    return x

interactive(children=(Dropdown(description='Genre', options=('Action', 'Adventure', 'Animation', 'Children', '…

In [29]:
from mlxtend.preprocessing import TransactionEncoder

In [35]:
genres = data['genres'].str.split('|')
te = TransactionEncoder()
genres = te.fit_transform(genres)
genres = pd.DataFrame(genres, columns = te.columns_)

genres.head(2)

Unnamed: 0,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,False,True,True,False,False,True,False,True,False,True,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False


In [36]:
# convert this data into boolean
genres = genres.astype('int')
genres.insert(0, 'title', data['title'])
genres.head(2)

Unnamed: 0,title,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,'Hellboy': The Seeds of Creation,0,1,1,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,'Round Midnight,0,0,0,0,0,0,0,0,1,...,0,0,0,1,0,0,0,0,0,0


In [37]:
genres = genres.transpose()
genres = genres.rename(columns = genres.iloc[0])

In [38]:
genres.head(2)

Unnamed: 0,'Hellboy': The Seeds of Creation,'Round Midnight,'Salem's Lot,'Til There Was You,"'burbs, The",'night Mother,(500) Days of Summer,*batteries not included,...And Justice for All,1-900 (06),...,Zoom,Zootopia,Zulu,Zulu.1,[REC],eXistenZ,xXx,xXx: State of the Union,¡Three Amigos!,À nous la liberté (Freedom for Us)
title,'Hellboy': The Seeds of Creation,'Round Midnight,'Salem's Lot,'Til There Was You,"'burbs, The",'night Mother,(500) Days of Summer,*batteries not included,...And Justice for All,1-900 (06),...,Zoom,Zootopia,Zulu,Zulu,[REC],eXistenZ,xXx,xXx: State of the Union,¡Three Amigos!,À nous la liberté (Freedom for Us)
(no genres listed),0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [39]:
genres = genres.drop(genres.index[0])
genres = genres.astype(int)

In [40]:
genres.head(3)

Unnamed: 0,'Hellboy': The Seeds of Creation,'Round Midnight,'Salem's Lot,'Til There Was You,"'burbs, The",'night Mother,(500) Days of Summer,*batteries not included,...And Justice for All,1-900 (06),...,Zoom,Zootopia,Zulu,Zulu.1,[REC],eXistenZ,xXx,xXx: State of the Union,¡Three Amigos!,À nous la liberté (Freedom for Us)
(no genres listed),0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Action,1,0,0,0,0,0,0,0,0,0,...,0,1,1,0,0,1,1,1,0,0
Adventure,1,0,0,0,0,0,0,0,0,0,...,1,1,0,0,0,0,0,0,0,0


In [41]:
@interact
def movie_recommendation(movie = list(genres.columns)):
  similar_movie = genres.corrwith(genres[movie])
  similar_movie = similar_movie.sort_values(ascending=False)
  similar_movie = similar_movie.iloc[1:]
  return similar_movie.head(5)

interactive(children=(Dropdown(description='movie', options=("'Hellboy': The Seeds of Creation", "'Round Midni…