In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import os
import matplotlib.pyplot as plt

movies = pd.read_csv('movies.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [2]:
ratings = pd.read_csv('ratings.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,5.0,1147880044
1,1,306,3.5,1147868817
2,1,307,5.0,1147868828
3,1,665,5.0,1147878820
4,1,899,3.5,1147868510


In [3]:
movies.shape
ratings.shape

(25000095, 4)

In [4]:
movies.isnull().sum()

movieId    0
title      0
genres     0
dtype: int64

In [5]:
ratings.isnull().sum()

userId       0
movieId      0
rating       0
timestamp    0
dtype: int64

In [6]:
movie_list=[]
year_list=[]
for movie in movies['title']:
    pos1=movie.find('(')
    pos2=movie.find(')')
    year=movie[pos1+1:pos2]
    movie_list.append(movie[0:pos1-1])
    year_list.append(year)
    
del movies['title']   
movies.insert(1,'Name',movie_list)
movies.insert(2,'Year',year_list)
movies.head()

Unnamed: 0,movieId,Name,Year,genres
0,1,Toy Story,1995,Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji,1995,Adventure|Children|Fantasy
2,3,Grumpier Old Men,1995,Comedy|Romance
3,4,Waiting to Exhale,1995,Comedy|Drama|Romance
4,5,Father of the Bride Part II,1995,Comedy


In [7]:
#Creating a Matrix for the movie with respect to the genres
movies = pd.concat([movies, movies['genres'].str.get_dummies(sep='|')], axis=1)
movies.head()

Unnamed: 0,movieId,Name,Year,genres,(no genres listed),Action,Adventure,Animation,Children,Comedy,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story,1995,Adventure|Animation|Children|Comedy|Fantasy,0,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,Jumanji,1995,Adventure|Children|Fantasy,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,3,Grumpier Old Men,1995,Comedy|Romance,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
3,4,Waiting to Exhale,1995,Comedy|Drama|Romance,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
4,5,Father of the Bride Part II,1995,Comedy,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [8]:
#used to generate the genre matrix(movies) to calculate cosine similarity
from sklearn.metrics.pairwise import cosine_similarity
movies_genre_matrix = movies.loc[:, "Action":].copy()
movies_genre_matrix.head()

Unnamed: 0,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [9]:
cs = cosine_similarity(movies_genre_matrix, movies_genre_matrix)
cs.shape

(62423, 62423)

In [10]:
cs

array([[1.        , 0.77459667, 0.31622777, ..., 0.31622777, 0.        ,
        0.25819889],
       [0.77459667, 1.        , 0.        , ..., 0.        , 0.        ,
        0.33333333],
       [0.31622777, 0.        , 1.        , ..., 0.5       , 0.        ,
        0.        ],
       ...,
       [0.31622777, 0.        , 0.5       , ..., 1.        , 0.        ,
        0.40824829],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.25819889, 0.33333333, 0.        , ..., 0.40824829, 0.        ,
        1.        ]])

In [11]:
name=movies.Name.str.lower()
movies_index = pd.Series(movies.index, index=name).drop_duplicates()
movies_index.head()

Name
toy story                      0
jumanji                        1
grumpier old men               2
waiting to exhale              3
father of the bride part ii    4
dtype: int64

In [12]:
def movie_recommendation(movie_name, sim=cs):
    try:
        index = movies_index[movie_name]

        print('Users who watched',movie_name,'also watched:\n')
        cs_score = list(enumerate(cs[index]))
        cs_score = sorted(cs_score, key=lambda x: x[1], reverse=True)
        cs_score = cs_score[0:11]
        movie= [movie[0] for movie in cs_score]

        result = movies[['movieId','Name','Year','genres']].iloc[movie].drop(index)
        return result
    except:
        print("Sorry...Movie not found")

In [13]:
movie=input("Enter the movie name :")
movie=movie.lower()
movie_recommendation(movie)

Enter the movie name :JUMANJI
Users who watched jumanji also watched:



Unnamed: 0,movieId,Name,Year,genres
59,60,"Indian in the Cupboard, The",1995,Adventure|Children|Fantasy
124,126,"NeverEnding Story III, The",1994,Adventure|Children|Fantasy
986,1009,Escape to Witch Mountain,1975,Adventure|Children|Fantasy
1954,2043,Darby O'Gill and the Little People,1959,Adventure|Children|Fantasy
2003,2093,Return to Oz,1985,Adventure|Children|Fantasy
2071,2161,"NeverEnding Story, The",1984,Adventure|Children|Fantasy
2072,2162,"NeverEnding Story II: The Next Chapter, The",1990,Adventure|Children|Fantasy
2308,2399,Santa Claus: The Movie,1985,Adventure|Children|Fantasy
4790,4896,Harry Potter and the Sorcerer's Stone,a.k.a. Harry Potter and the Philosopher's Stone,Adventure|Children|Fantasy
9557,31447,Magic in the Water,1995,Adventure|Children|Fantasy
