## Movie Recommendation using Collaborative Filtering

In [2]:
## Import necessary libraries 
import os
import pandas as pd

In [10]:
# Load the dataset 
data = pd.read_csv('data/movies_metadata.csv', low_memory=False)
data.shape

(45466, 24)

In [11]:
data.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


In [12]:
# Check null values 
data.isnull().sum()

adult                        0
belongs_to_collection    40972
budget                       0
genres                       0
homepage                 37684
id                           0
imdb_id                     17
original_language           11
original_title               0
overview                   954
popularity                   5
poster_path                386
production_companies         3
production_countries         3
release_date                87
revenue                      6
runtime                    263
spoken_languages             6
status                      87
tagline                  25054
title                        6
video                        6
vote_average                 6
vote_count                   6
dtype: int64

In [14]:
# Calculate the meanvote 
meanvote = data['vote_average'].mean()
meanvote

5.618207215134185

In [15]:
# Calculate the cutoff value with min votes
minvote = data['vote_count'].quantile(0.90)
minvote

160.0

In [16]:
# filter the movies based on the cutoff 
filter_movies = data.copy().loc[data['vote_count'] >= minvote]
filter_movies.shape

(4555, 24)

In [21]:
# Compute the weighted average using the formula 
def weighted_rating(x, min_vote=minvote, mean_vote=meanvote):
    voters = x['vote_count']
    avg_vote = x['vote_average']
    
    # calculate based on the IMDB formula 
    return (voters/(voters+min_vote) * avg_vote) + (min_vote/(min_vote+voters) * mean_vote) 

In [22]:
filter_movies['score'] = filter_movies.apply(weighted_rating, axis=1)

In [23]:
filter_movies.sort_values('score', ascending=False)

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,score
314,False,,25000000,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",,278,tt0111161,en,The Shawshank Redemption,Framed in the 1940s for the double murder of h...,...,2.834147e+07,142.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Fear can hold you prisoner. Hope can set you f...,The Shawshank Redemption,False,8.5,8358.0,8.445869
834,False,"{'id': 230, 'name': 'The Godfather Collection'...",6000000,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",http://www.thegodfather.com/,238,tt0068646,en,The Godfather,"Spanning the years 1945 to 1955, a chronicle o...",...,2.450664e+08,175.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,An offer you can't refuse.,The Godfather,False,8.5,6024.0,8.425439
10309,False,,13200000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,19404,tt0112870,hi,Dilwale Dulhania Le Jayenge,"Raj is a rich, carefree, happy-go-lucky second...",...,1.000000e+08,190.0,"[{'iso_639_1': 'hi', 'name': 'हिन्दी'}]",Released,Come... Fall In Love,Dilwale Dulhania Le Jayenge,False,9.1,661.0,8.421453
12481,False,"{'id': 263, 'name': 'The Dark Knight Collectio...",185000000,"[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...",http://thedarkknight.warnerbros.com/dvdsite/,155,tt0468569,en,The Dark Knight,Batman raises the stakes in his war on crime. ...,...,1.004558e+09,152.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Why So Serious?,The Dark Knight,False,8.3,12269.0,8.265477
2843,False,,63000000,"[{'id': 18, 'name': 'Drama'}]",http://www.foxmovies.com/movies/fight-club,550,tt0137523,en,Fight Club,A ticking-time-bomb insomniac and a slippery s...,...,1.008538e+08,139.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Mischief. Mayhem. Soap.,Fight Club,False,8.3,9678.0,8.256385
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9710,False,"{'id': 43072, 'name': 'The Mask Collection', '...",84000000,"[{'id': 14, 'name': 'Fantasy'}, {'id': 35, 'na...",,10214,tt0362165,en,Son of the Mask,"Tim Avery, an aspiring cartoonist, finds himse...",...,0.000000e+00,94.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Who's next?,Son of the Mask,False,3.6,346.0,4.238168
12911,False,,25000000,"[{'id': 28, 'name': 'Action'}, {'id': 35, 'nam...",http://www.disastermovie.net/,13805,tt1213644,en,Disaster Movie,"In DISASTER MOVIE, the filmmaking team behind ...",...,1.410928e+07,87.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Your favorite movies are going to be destroyed.,Disaster Movie,False,3.1,250.0,4.082715
3471,False,,44000000,"[{'id': 28, 'name': 'Action'}, {'id': 878, 'na...",,5491,tt0185183,en,Battlefield Earth,"In the year 3000, man is no match for the Psyc...",...,2.140000e+07,118.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Take Back The Planet,Battlefield Earth,False,3.0,259.0,3.999793
11557,False,,20000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",,9760,tt0799949,en,Epic Movie,"When Edward, Peter, Lucy and Susan each follow...",...,8.686556e+07,86.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,We know it's big. We measured.,Epic Movie,False,3.2,334.0,3.983225
