In [1]:
# Popularity Based Recommendation System
# Content Based Recommendation System
# Collaborative Based Recommendation System
import pandas as pd
import numpy as np
import difflib
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Remove SettingWithCopyWarning
pd.options.mode.chained_assignment = None

In [2]:
df = pd.read_csv("movies.csv")
df

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.312950,...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4798,4798,220000,Action Crime Thriller,,9367,united states\u2013mexico barrier legs arms pa...,es,El Mariachi,El Mariachi just wants to play his guitar and ...,14.269792,...,81.0,"[{""iso_639_1"": ""es"", ""name"": ""Espa\u00f1ol""}]",Released,"He didn't come looking for trouble, but troubl...",El Mariachi,6.6,238,Carlos Gallardo Jaime de Hoyos Peter Marquardt...,"[{'name': 'Robert Rodriguez', 'gender': 0, 'de...",Robert Rodriguez
4799,4799,9000,Comedy Romance,,72766,,en,Newlyweds,A newlywed couple's honeymoon is upended by th...,0.642552,...,85.0,[],Released,A newlywed couple's honeymoon is upended by th...,Newlyweds,5.9,5,Edward Burns Kerry Bish\u00e9 Marsha Dietlein ...,"[{'name': 'Edward Burns', 'gender': 2, 'depart...",Edward Burns
4800,4800,0,Comedy Drama Romance TV Movie,http://www.hallmarkchannel.com/signedsealeddel...,231617,date love at first sight narration investigati...,en,"Signed, Sealed, Delivered","""Signed, Sealed, Delivered"" introduces a dedic...",1.444476,...,120.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,,"Signed, Sealed, Delivered",7.0,6,Eric Mabius Kristin Booth Crystal Lowe Geoff G...,"[{'name': 'Carla Hetland', 'gender': 0, 'depar...",Scott Smith
4801,4801,0,,http://shanghaicalling.com/,126186,,en,Shanghai Calling,When ambitious New York attorney Sam is sent t...,0.857008,...,98.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,A New Yorker in Shanghai,Shanghai Calling,5.7,7,Daniel Henney Eliza Coupe Bill Paxton Alan Ruc...,"[{'name': 'Daniel Hsia', 'gender': 2, 'departm...",Daniel Hsia


In [3]:
# select features
features = ["genres", "keywords", "tagline", "cast", "director"]
df_selected = df[features]
# print(df_selected.head())

# missing values
for f in features:
    # df_selected[f] = df_selected[f].fillna('')
    df_selected[f].fillna('',inplace=True)

# combining all 5 columns to 1
combined_features = df_selected["genres"]+' '+df_selected["keywords"]+' '+df_selected["tagline"]+' '+\
                       df_selected["cast"]+' '+df_selected["director"]

print(combined_features)

# Vectorize text
vectorizer = TfidfVectorizer()
f_vector = vectorizer.fit_transform(combined_features)
# print(f_vector)

0       Action Adventure Fantasy Science Fiction cultu...
1       Adventure Fantasy Action ocean drug abuse exot...
2       Action Adventure Crime spy based on novel secr...
3       Action Crime Drama Thriller dc comics crime fi...
4       Action Adventure Science Fiction based on nove...
                              ...                        
4798    Action Crime Thriller united states\u2013mexic...
4799    Comedy Romance  A newlywed couple's honeymoon ...
4800    Comedy Drama Romance TV Movie date love at fir...
4801      A New Yorker in Shanghai Daniel Henney Eliza...
4802    Documentary obsession camcorder crush dream gi...
Length: 4803, dtype: object


In [4]:
similarity = cosine_similarity(f_vector)
print(similarity)

[[1.         0.07219487 0.037733   ... 0.         0.         0.        ]
 [0.07219487 1.         0.03281499 ... 0.03575545 0.         0.        ]
 [0.037733   0.03281499 1.         ... 0.         0.05389661 0.        ]
 ...
 [0.         0.03575545 0.         ... 1.         0.         0.02651502]
 [0.         0.         0.05389661 ... 0.         1.         0.        ]
 [0.         0.         0.         ... 0.02651502 0.         1.        ]]


In [5]:
# Input
movie_name = input("Enter movie name:")

# Creating list of all movie similar
titles = df["title"].tolist()
# print(titles)

# finding close match
find_close_match = difflib.get_close_matches(movie_name, titles)
print(find_close_match)

close_match = find_close_match[0]
print(close_match)

# finding index of movie with title
i = df[df.title == close_match]["index"].values[0]
print(i)

# getting list of similar movies
similarity_score = list(enumerate(similarity[i]))
# print(similarity_score)

# sorting movies based on similarity score
sorted_similarity_score = sorted(similarity_score, key=lambda x: x[1], reverse=True)
# print(sorted_similarity_score)

# print movie names from above sim score
print("Movies suggested are:")
i = 1
for m in sorted_similarity_score:
    index_m = m[0]
    title = df[df.index == index_m]["title"].values[0]
    if i < 30:
        print(i, '.', title)
        i += 1

Enter movie name: Harry Porter


['Party Monster', 'Harry Brown']
Party Monster
3639
Movies suggested are:
1 . Party Monster
2 . Shattered Glass
3 . Home Alone 2: Lost in New York
4 . Barry Munday
5 . #Horror
6 . Kids
7 . Neighbors 2: Sorority Rising
8 . Trees Lounge
9 . The Game
10 . Boys Don't Cry
11 . Scooby-Doo 2: Monsters Unleashed
12 . Valley of the Heart's Delight
13 . Antibirth
14 . How to Be Single
15 . Saved!
16 . The Brown Bunny
17 . Carrie
18 . The Ring Two
19 . The 5th Wave
20 . Home Alone
21 . Mommie Dearest
22 . My Week with Marilyn
23 . The Equalizer
24 . Mean Creek
25 . What Lies Beneath
26 . Vampires
27 . Anne of Green Gables
28 . The Interview
29 . The Big Bounce
