# Movie Recommendation System using Collaborative Filtering System

In [1]:
# importing libraries
import numpy as np 
import pandas as pd 

In [2]:
# loading Datasets
movies = pd.read_csv('movies_Dataset.csv')
ratings = pd.read_csv('ratings_Dataset.csv')

In [3]:
# display movies table
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
# display ratings table
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [5]:
# counting unique user id in ratings
ratings['userId'].value_counts().shape

(610,)

In [6]:

x = ratings['userId'].value_counts()>500
y = x[x].index
y.shape

(43,)

In [7]:
ratings = ratings[ratings['userId'].isin(y)]
ratings.shape

(40054, 4)

In [8]:
movie_details = movies.merge(ratings).drop(['timestamp'], axis = 1)
movie_details.head()

Unnamed: 0,movieId,title,genres,userId,rating
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,18,3.5
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,19,4.0
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,64,4.0
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,68,2.5
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,89,3.0


In [9]:
rating_number = movie_details.groupby('title')['rating'].count().reset_index()
rating_number.rename(columns={'rating':'total rating'}, inplace=True)
rating_number.head()

Unnamed: 0,title,total rating
0,'71 (2014),1
1,"'burbs, The (1989)",11
2,'night Mother (1986),1
3,(500) Days of Summer (2009),16
4,*batteries not included (1987),5


In [10]:
dataFrame = movie_details.merge(rating_number)
dataFrame.head()

Unnamed: 0,movieId,title,genres,userId,rating,total rating
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,18,3.5,36
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,19,4.0,36
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,64,4.0,36
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,68,2.5,36
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,89,3.0,36


In [11]:
dataFrame.drop(columns=['total rating'], inplace=True)
dataFrame.head()

Unnamed: 0,movieId,title,genres,userId,rating
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,18,3.5
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,19,4.0
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,64,4.0
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,68,2.5
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,89,3.0


In [12]:
dataFrame['rating'] = dataFrame['rating'].astype(int)
dataFrame.head()

Unnamed: 0,movieId,title,genres,userId,rating
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,18,3
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,19,4
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,64,4
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,68,2
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,89,3


In [13]:
# arranging table with index title, columns userid and values rating to display movies rating of different users
movies_pivot = dataFrame.pivot_table(columns='userId', index='title', values='rating').fillna(0)
movies_pivot.head(50)

userId,18,19,28,64,68,89,91,105,111,140,...,534,555,561,590,599,600,603,606,608,610
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'71 (2014),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
"'burbs, The (1989)",0.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,...,0.0,5.0,4.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0
'night Mother (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
(500) Days of Summer (2009),4.0,0.0,0.0,0.0,4.0,2.0,0.0,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,3.0
*batteries not included (1987),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0
...And Justice for All (1979),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00 Schneider - Jagd auf Nihil Baxter (1994),4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10 (1979),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10 Cent Pistol (2015),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10 Cloverfield Lane (2016),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0


In [14]:
# importing csr_matrix from scipy
from scipy.sparse import csr_matrix
movies_sparse = csr_matrix(movies_pivot)

In [15]:
# importing NearestNeighbors from sklearn
from sklearn.neighbors import NearestNeighbors
movies_model = NearestNeighbors(n_neighbors=10, algorithm='brute', metric='cosine')

In [16]:
movies_model.fit(movies_sparse)

NearestNeighbors(algorithm='brute', metric='cosine', n_neighbors=10)

In [17]:
dataFrame.drop(columns = ['genres', 'userId', 'rating'], inplace=True)
dataFrame.drop_duplicates(inplace=True)
dataFrame.to_csv('movieLists.csv',index=False)

In [18]:
distances,suggestions = movies_model.kneighbors(movies_pivot.iloc[540,:].values.reshape(1,-1))
distances

array([[0.        , 0.19647516, 0.23415076, 0.23536673, 0.23639107,
        0.23752609, 0.26928801, 0.27493843, 0.28292374, 0.28380882]])

In [19]:
suggestions

array([[ 540, 4398, 4325, 7461, 5478, 2346, 1838, 5903, 1503, 5085]],
      dtype=int64)

In [20]:
movie_DataFrame = dataFrame.copy()
arr = []
for i in movie_DataFrame['title']:
    arr.append(i.split(' (')[0])
movie_DataFrame['title'] = arr

In [21]:
for i in range(len(suggestions)):
    print(movies_pivot.index[suggestions[i]])

Index(['Atlantis: The Lost Empire (2001)', 'Lilo & Stitch (2002)',
       'Lemony Snicket's A Series of Unfortunate Events (2004)',
       'The Scorpion King (2002)', 'One, The (2001)', 'Enchanted (2007)',
       'Daredevil (2003)', 'Prince of Egypt, The (1998)',
       'Chronicles of Riddick, The (2004)', 'Mulan (1998)'],
      dtype='object', name='title')


In [22]:
def recommend_movies(movie_name):
    movie_id = movie_DataFrame[movie_DataFrame['title'] == movie_name].drop_duplicates('title')['movieId'].values[0]
    durations,suggestions = movies_model.kneighbors(movies_pivot.iloc[movie_id,:].values.reshape(1,-1))
    
    
    for i in range(len(suggestions)):
        return (movies_pivot.index[suggestions[i]])

In [23]:
recommendation_user = recommend_movies("Ace Ventura: When Nature Calls")
for i in recommendation_user:
    print(i)

Asterix at the Olympic Games (Astérix aux jeux olympiques) (2008)
Freaks of Nature (2015)
Free Willy 2: The Adventure Home (1995)
Mars Needs Moms (2011)
Mirror Mirror (2012)
Asterix & Obelix vs. Caesar (Astérix et Obélix contre César) (1999)
Asterix & Obelix: Mission Cleopatra (Astérix & Obélix: Mission Cléopâtre) (2002)
The Girl with All the Gifts (2016)
Asterix in America (a.k.a Asterix Conquers America) (Astérix et les Indiens) (1994)
Legend of the Guardians: The Owls of Ga'Hoole (2010)


In [24]:
recommendation_user = recommend_movies("Hackers")
for i in recommendation_user:
    print(i)

Aces: Iron Eagle III (1992)
The Hunger (1983)
Fly II, The (1989)
Toxic Avenger Part III: The Last Temptation of Toxie, The (1989)
Carnosaur 3: Primal Species (1996)
Friday the 13th Part 3: 3D (1982)
Out-of-Towners, The (1999)
Man with the Golden Arm, The (1955)
Amos & Andrew (1993)
Toxic Avenger, Part II, The (1989)
