In [1]:
# Importing libraries
import pandas as pd 
import os 
import numpy as np
import requests


#Load .env
from dotenv import load_dotenv

In [2]:
# Loading access keys 
load_dotenv()

API_KEY = os.getenv("API_KEY")

accept = os.getenv("accept")
Authorization = os.getenv("Authorization")

In [3]:
# Loading dataset from api
path1 = "../Resources/tmdb_movies_list"

metadata = pd.read_csv(path1)

In [4]:
metadata

Unnamed: 0,id,imdb_id,Title,Genre,Plot,Popularity,movie_rating_avg,Poster
0,863,tt0120363,Toy Story 2,"[16, 35, 10751]","Andy heads off to Cowboy Camp, leaving his toy...",111.148,7.591,https://m.media-amazon.com/images/M/MV5BMWM5ZD...
1,10193,tt0435761,Toy Story 3,"[16, 10751, 35]","Woody, Buzz, and the rest of Andy's toys haven...",76.340,7.795,https://m.media-amazon.com/images/M/MV5BMTgxOT...
2,9487,tt0120623,A Bug's Life,"[12, 16, 35, 14, 10751]","On behalf of ""oppressed bugs everywhere,"" an i...",70.368,6.969,https://m.media-amazon.com/images/M/MV5BNThmZG...
3,8587,tt0110357,The Lion King,"[10751, 16, 18]",A young lion prince is cast out of his pride b...,102.851,8.257,https://m.media-amazon.com/images/M/MV5BYTYxNG...
4,585,tt0198781,"Monsters, Inc.","[16, 35, 10751]",Lovable Sulley and his wisecracking sidekick M...,128.295,7.842,https://m.media-amazon.com/images/M/MV5BMTY1NT...
...,...,...,...,...,...,...,...,...
441,11861,tt0113347,How to Make an American Quilt,18,Soon-to-be-wed graduate student Finn Dodd deve...,19.315,6.400,https://m.media-amazon.com/images/M/MV5BNGYwZm...
442,8391,tt0114916,When Night Is Falling,18,A prudish woman working on tenure as a literac...,12.404,6.239,https://m.media-amazon.com/images/M/MV5BNGUxMD...
443,11448,tt0113819,Mighty Aphrodite,35,"When Lenny and his wife, Amanda, adopt a baby,...",15.684,6.722,https://m.media-amazon.com/images/M/MV5BMTZmNj...
444,49133,tt0110299,Lamerica,18,"Fiore, an Italian conman, arrives in post Comm...",8.535,7.200,https://m.media-amazon.com/images/M/MV5BNjlmZm...


# Creating function to get recommendations 

In [5]:
#Import TfIdfVectorizer from scikit-learn
from sklearn.feature_extraction.text import TfidfVectorizer

#Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
tfidf = TfidfVectorizer(stop_words='english')

def matrix_creation(element):
    
    #Replace NaN with an empty string
    metadata[element] = metadata[element].fillna('')

    #Construct the required TF-IDF matrix by fitting and transforming the data
    tfidf_matrix = tfidf.fit_transform(metadata[element])
    
    return tfidf_matrix 

In [6]:
tfidf_matrix = tfidf.fit_transform(metadata["Plot"])

In [7]:
matrix_creation("Plot")

<446x5043 sparse matrix of type '<class 'numpy.float64'>'
	with 10692 stored elements in Compressed Sparse Row format>

In [8]:
indices = pd.Series(metadata.index, index = metadata["Title"])

indices.head()

Title
Toy Story 2       0
Toy Story 3       1
A Bug's Life      2
The Lion King     3
Monsters, Inc.    4
dtype: int64

In [9]:
# Importing linear_kernel from sklearn
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics import ndcg_score

# Creating cosine similarity, to compare tfid matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix )

In [10]:
def get_recommendations(title, element, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = indices[title]
    
    # Create tfidf_matrix
    tfidf_matrix = matrix_creation(element)
    
    # Compute the cosine similarity matrix
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]
    
    # Set up dataframe
    test = pd.DataFrame({"movie_id":metadata["id"].iloc[movie_indices], "Title":metadata['Title'].iloc[movie_indices], "sim_score": sim_scores, "Genres":metadata['Genre'].iloc[movie_indices], "Plot":metadata['Plot'].iloc[movie_indices]}).reset_index(drop=True)
    
    return tfidf_matrix.toarray()

# Testing recommendation function

In [11]:
get_recommendations('Toy Story', "Plot")


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

# Re-writing recommendation function to also display accuracy

In [12]:
def get_accuracy(title, element, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = indices[title]
    
    
    tfidf_matrix = matrix_creation(element)
    
    # Compute the cosine similarity matrix
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]
    
    test = pd.DataFrame({"movie_id":metadata["id"].iloc[movie_indices], "Title":metadata['Title'].iloc[movie_indices], "sim_score": sim_scores, "Genres":metadata['Genre'].iloc[movie_indices], "Plot":metadata['Plot'].iloc[movie_indices]}).reset_index(drop=True)
    
    #Set up api request 
    movie_ID = metadata.loc[metadata["Title"] == title]["id"].values[0]
    print("This is movie_ID:" + str(movie_ID))
    
    url = "https://api.themoviedb.org/3/movie/" + str(movie_ID) + "/recommendations?language=en-US&page=1"
    
    headers = {
        "accept": accept,
        "Authorization": Authorization
    }
    
    #API request for recommendation
    response = requests.get(url, headers=headers).json()


    #Set up list to store api recommendation
    movie_titles = []
    for j in range(0,10):
        movie_titles.append(response["results"][j]['title'])
    
    test1 = pd.DataFrame({"movie_id":metadata["id"].iloc[movie_indices], 
                          "Title":metadata['Title'].iloc[movie_indices],
                          "Genres":metadata['Genre'].iloc[movie_indices],
                          "API_Rec_Titles":movie_titles})
    test_3 = [i for i in list(test1["Title"]) if i in list(test1["API_Rec_Titles"])]
    test_4 = len(test_3)/10
 
    relevance_score = len([i for i in list(test1["Title"]) if i in test1["API_Rec_Titles"]])
    
    # Add relevance score into dataframe
#     test1["Relevance Score"] = relevance_score
#     print(test1)
    print("Precision at 10: " + str(test_4))
    return test1

# Recommendation using Plot

#### Calculation of overall precision @k for plot column 

0.2 + 0.4 + 0.1 + 0.0 + 0.1 + 0.0 + 0.1 + 0.1 +0.7 + 0.0 = 1.7/10

Overall precision at k(10) = 0.17 precision

In [13]:
 get_accuracy('Toy Story', "Plot")

This is movie_ID:862
Precision at 10: 0.2


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
1,10193,Toy Story 3,"[16, 10751, 35]",Toy Story 2
0,863,Toy Story 2,"[16, 35, 10751]",Toy Story 3
188,350,The Devil Wears Prada,"[18, 35]",A Bug's Life
172,7303,Maid in Manhattan,"[35, 18, 10749]",The Lion King
262,278,The Shawshank Redemption,"[18, 80]","Monsters, Inc."
419,5,Four Rooms,35,The Incredibles
395,393559,My Life as a Zucchini,"[16, 35, 18, 10751, 10749]",Up
238,18093,Northanger Abbey,"[10749, 18, 10770]",Finding Nemo
165,8467,Dumb and Dumber,[35],Se7en
285,11674,101 Dalmatians,"[10751, 35]",WALL·E


In [14]:
 get_accuracy('Jumanji', "Plot")

This is movie_ID:8844
Precision at 10: 0.4


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
10,353486,Jumanji: Welcome to the Jungle,"[12, 35, 14]",Jumanji: Welcome to the Jungle
339,257344,Pixels,"[28, 35, 878]",Night at the Museum
91,36557,Casino Royale,"[12, 28, 53]",Zathura: A Space Adventure
414,9091,Sudden Death,28,Mrs. Doubtfire
15,512200,Jumanji: The Next Level,"[12, 35, 14]",Jurassic Park
17,879,Hook,"[12, 14, 35, 10751]",Jumanji: The Next Level
12,6795,Zathura: A Space Adventure,"[878, 12, 10751]",Toy Story
249,1272,Sunshine,"[18, 878, 53]",Hook
176,1091,The Thing,"[27, 9648, 878]",Good Will Hunting
373,44113,Stone,"[18, 53]",Home Alone


In [15]:
 get_accuracy('Grumpier Old Men', "Plot")

This is movie_ID:15602
Precision at 10: 0.1


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
20,11520,Grumpy Old Men,[35],Grumpy Old Men
52,1538,Collateral,"[18, 80, 53]",Toy Story
114,15789,A Goofy Movie,"[10749, 16, 10751, 35, 12]",Splash
163,1624,Liar Liar,[35],The Exorcist
139,1598,Cape Fear,"[80, 53]",It
57,6620,Sabrina,"[35, 10749, 18]",Marriage Italian Style
213,726209,Leave the World Behind,"[18, 9648, 53, 878]",The Devil's Advocate
322,25468,My Dinner with Andre,[18],Anaconda
360,101,Léon: The Professional,"[80, 18, 28]",Some Like It Hot
39,11846,Father of the Bride,"[35, 10749]",I.Q.


In [16]:
 get_accuracy('Waiting to Exhale', "Plot")

This is movie_ID:31357
Precision at 10: 0.0


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
27,239,Some Like It Hot,"[35, 10749, 80]",John Wick
18,489,Good Will Hunting,[18],Venom
104,8005,Robin Hood: Men in Tights,[35],Mean Girls
312,723745,Fight!,[16],The Bodyguard
294,20048,Confessions of a Shopaholic,"[35, 10749]",Downsizing
157,241,Natural Born Killers,"[80, 53, 18]",The Virgin Suicides
333,504608,Rocketman,"[10402, 18]",A Beautiful Mind
321,1541,Thelma & Louise,"[18, 53, 80, 12]",Bad Boys
110,10897,The Little Rascals,"[10749, 35, 10751]",Fatherhood
61,6068,Six Days Seven Nights,"[35, 28, 12, 10749]",To Wong Foo


In [17]:
 get_accuracy('Father of the Bride Part II', "Plot")

This is movie_ID:11862
Precision at 10: 0.1


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
39,11846,Father of the Bride,"[35, 10749]",Father of the Bride
327,4133,Blow,"[80, 18]",Cheaper by the Dozen
398,418879,The Current War,"[18, 36]",A Few Good Men
239,45269,The King's Speech,"[18, 36]",Mad Money
31,10625,Mean Girls,[35],First Knight
376,8874,My Best Friend's Wedding,"[35, 10749]",Inception
5,9806,The Incredibles,"[28, 12, 16, 10751]",The Social Network
288,275,Fargo,"[80, 18, 53]",Suicide Squad
412,11860,Sabrina,35,Big Hero 6
414,9091,Sudden Death,28,Home Alone


In [18]:
 get_accuracy('Heat', "Plot")

This is movie_ID:949
Precision at 10: 0.0


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
60,75656,Now You See Me,"[53, 80]",Casino
255,337674,Elle,"[18, 53]",Scarface
187,102899,Ant-Man,"[878, 28, 12]",Donnie Brasco
414,9091,Sudden Death,28,Se7en
67,22582,Tom and Jerry: The Movie,"[10751, 16, 35]",Taxi Driver
245,782,Gattaca,"[53, 878, 9648, 10749]",Collateral
196,2636,The Specialist,"[28, 53]",GoodFellas
221,795,City of Angels,"[10749, 18, 14]",L.A. Confidential
10,353486,Jumanji: Welcome to the Jungle,"[12, 35, 14]",Kill Bill: Vol. 2
339,257344,Pixels,"[28, 35, 878]",The Untouchables


In [19]:
 get_accuracy('Tom and Huck', "Plot")

This is movie_ID:45325
Precision at 10: 0.1


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
68,5769,Tom Jones,"[12, 35, 10749]",Tom and Jerry: The Movie
65,4477,The Devil's Own,"[80, 53, 18]",Tom Jones
40,11007,Cheaper by the Dozen,"[35, 10751]",Insidious
441,11861,How to Make an American Quilt,18,The Parent Trap
155,6,Judgment Night,"[28, 80, 53]",District 9
369,10895,Pinocchio,"[16, 10751]",A Dog's Purpose
223,118,Charlie and the Chocolate Factory,"[12, 35, 10751, 14]",Back to the Future
357,451,Leaving Las Vegas,"[18, 10749]",The Accountant
327,4133,Blow,"[80, 18]",Tarzan
349,22881,The Blind Side,[18],Oliver & Company


In [20]:
 get_accuracy('Sudden Death', "Plot")

This is movie_ID:9091
Precision at 10: 0.1


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
10,353486,Jumanji: Welcome to the Jungle,"[12, 35, 14]",Timecop
339,257344,Pixels,"[28, 35, 878]",Hard Target
130,8844,Jumanji,"[12, 14, 10751]",Last Action Hero
15,512200,Jumanji: The Next Level,"[12, 35, 14]",The Quest
91,36557,Casino Royale,"[12, 28, 53]",Double Team
138,949,Heat,"[28, 80, 18]",Double Impact
381,670,Oldboy,"[18, 53, 9648, 28]",Universal Soldier
81,9405,Double Team,"[28, 878]",Street Fighter
268,10395,Wolf,"[14, 18, 27, 10749]",Maximum Risk
151,11036,The Notebook,"[10749, 18]",Avatar


In [21]:
 get_accuracy('GoldenEye', "Plot")

This is movie_ID:710
Precision at 10: 0.7


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
90,36669,Die Another Day,"[12, 28, 53]",Tomorrow Never Dies
93,646,Dr. No,"[12, 28, 53]",Goldfinger
94,206647,Spectre,"[28, 12, 53]",The World Is Not Enough
92,709,Licence to Kill,"[12, 28, 53]",Die Another Day
89,36643,The World Is Not Enough,"[12, 28, 53]",Casino Royale
91,36557,Casino Royale,"[12, 28, 53]",Licence to Kill
87,714,Tomorrow Never Dies,"[12, 28, 53]",Dr. No
125,607,Men in Black,"[28, 12, 35, 878]",Spectre
298,112949,Safe Haven,"[10749, 53]",Rocky
440,577,To Die For,18,Toy Story


In [22]:
 get_accuracy('The Shawshank Redemption', "Plot")

This is movie_ID:278
Precision at 10: 0.0


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
203,9972,Lock Up,"[28, 80]",The Godfather
186,8077,Alien³,"[878, 28, 27]",Schindler's List
1,10193,Toy Story 3,"[16, 10751, 35]",The Dark Knight
136,293863,The Age of Adaline,"[10749, 14, 18]",Pulp Fiction
367,10144,The Little Mermaid,"[16, 10751, 14]",The Godfather Part II
123,4347,Atonement,"[18, 10749]",Fight Club
105,36593,Naked Gun 33⅓: The Final Insult,"[35, 80]",Forrest Gump
287,6075,Carlito's Way,"[80, 18, 10749, 53]",The Green Mile
329,341013,Atomic Blonde,"[28, 53]",The Silence of the Lambs
188,350,The Devil Wears Prada,"[18, 35]",Spirited Away


# Combinning genre and plot together into one column

In [23]:
metadata.head()

Unnamed: 0,id,imdb_id,Title,Genre,Plot,Popularity,movie_rating_avg,Poster
0,863,tt0120363,Toy Story 2,"[16, 35, 10751]","Andy heads off to Cowboy Camp, leaving his toy...",111.148,7.591,https://m.media-amazon.com/images/M/MV5BMWM5ZD...
1,10193,tt0435761,Toy Story 3,"[16, 10751, 35]","Woody, Buzz, and the rest of Andy's toys haven...",76.34,7.795,https://m.media-amazon.com/images/M/MV5BMTgxOT...
2,9487,tt0120623,A Bug's Life,"[12, 16, 35, 14, 10751]","On behalf of ""oppressed bugs everywhere,"" an i...",70.368,6.969,https://m.media-amazon.com/images/M/MV5BNThmZG...
3,8587,tt0110357,The Lion King,"[10751, 16, 18]",A young lion prince is cast out of his pride b...,102.851,8.257,https://m.media-amazon.com/images/M/MV5BYTYxNG...
4,585,tt0198781,"Monsters, Inc.","[16, 35, 10751]",Lovable Sulley and his wisecracking sidekick M...,128.295,7.842,https://m.media-amazon.com/images/M/MV5BMTY1NT...


In [24]:
metadata['genre_plot'] = metadata['Genre'].apply(lambda x: x.strip("[]"))


In [25]:
metadata["genre_plot"] = metadata["genre_plot"] + " " + metadata["Plot"]

In [26]:
metadata.head()

Unnamed: 0,id,imdb_id,Title,Genre,Plot,Popularity,movie_rating_avg,Poster,genre_plot
0,863,tt0120363,Toy Story 2,"[16, 35, 10751]","Andy heads off to Cowboy Camp, leaving his toy...",111.148,7.591,https://m.media-amazon.com/images/M/MV5BMWM5ZD...,"16, 35, 10751 Andy heads off to Cowboy Camp, l..."
1,10193,tt0435761,Toy Story 3,"[16, 10751, 35]","Woody, Buzz, and the rest of Andy's toys haven...",76.34,7.795,https://m.media-amazon.com/images/M/MV5BMTgxOT...,"16, 10751, 35 Woody, Buzz, and the rest of And..."
2,9487,tt0120623,A Bug's Life,"[12, 16, 35, 14, 10751]","On behalf of ""oppressed bugs everywhere,"" an i...",70.368,6.969,https://m.media-amazon.com/images/M/MV5BNThmZG...,"12, 16, 35, 14, 10751 On behalf of ""oppressed ..."
3,8587,tt0110357,The Lion King,"[10751, 16, 18]",A young lion prince is cast out of his pride b...,102.851,8.257,https://m.media-amazon.com/images/M/MV5BYTYxNG...,"10751, 16, 18 A young lion prince is cast out ..."
4,585,tt0198781,"Monsters, Inc.","[16, 35, 10751]",Lovable Sulley and his wisecracking sidekick M...,128.295,7.842,https://m.media-amazon.com/images/M/MV5BMTY1NT...,"16, 35, 10751 Lovable Sulley and his wisecrack..."


# Recommendation model using genre_plot

#### Calculation of overall precision @k for genre_plot column 

0.2 + 0.4 + 0.1 + 0.0 + 0.1 + 0.0 + 0.1 + 0.1 +0.7 + 0.0 = 1.7/10

Overall precision at k(10) = 0.17 precision

In [27]:
get_accuracy('Toy Story', "genre_plot")

This is movie_ID:862
Precision at 10: 0.2


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
1,10193,Toy Story 3,"[16, 10751, 35]",Toy Story 2
0,863,Toy Story 2,"[16, 35, 10751]",Toy Story 3
188,350,The Devil Wears Prada,"[18, 35]",A Bug's Life
172,7303,Maid in Manhattan,"[35, 18, 10749]",The Lion King
395,393559,My Life as a Zucchini,"[16, 35, 18, 10751, 10749]","Monsters, Inc."
262,278,The Shawshank Redemption,"[18, 80]",The Incredibles
366,10674,Mulan,"[16, 10751, 12]",Up
419,5,Four Rooms,35,Finding Nemo
223,118,Charlie and the Chocolate Factory,"[12, 35, 10751, 14]",Se7en
113,3170,Bambi,"[16, 18, 10751]",WALL·E


In [28]:
get_accuracy('Jumanji', "genre_plot")

This is movie_ID:8844
Precision at 10: 0.4


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
10,353486,Jumanji: Welcome to the Jungle,"[12, 35, 14]",Jumanji: Welcome to the Jungle
339,257344,Pixels,"[28, 35, 878]",Night at the Museum
15,512200,Jumanji: The Next Level,"[12, 35, 14]",Zathura: A Space Adventure
91,36557,Casino Royale,"[12, 28, 53]",Mrs. Doubtfire
414,9091,Sudden Death,28,Jurassic Park
17,879,Hook,"[12, 14, 35, 10751]",Jumanji: The Next Level
12,6795,Zathura: A Space Adventure,"[878, 12, 10751]",Toy Story
249,1272,Sunshine,"[18, 878, 53]",Hook
182,129,Spirited Away,"[16, 10751, 14]",Good Will Hunting
176,1091,The Thing,"[27, 9648, 878]",Home Alone


In [29]:
get_accuracy('Grumpier Old Men', "genre_plot")

This is movie_ID:15602
Precision at 10: 0.1


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
20,11520,Grumpy Old Men,[35],Grumpy Old Men
52,1538,Collateral,"[18, 80, 53]",Toy Story
114,15789,A Goofy Movie,"[10749, 16, 10751, 35, 12]",Splash
163,1624,Liar Liar,[35],The Exorcist
139,1598,Cape Fear,"[80, 53]",It
57,6620,Sabrina,"[35, 10749, 18]",Marriage Italian Style
39,11846,Father of the Bride,"[35, 10749]",The Devil's Advocate
360,101,Léon: The Professional,"[80, 18, 28]",Anaconda
322,25468,My Dinner with Andre,[18],Some Like It Hot
213,726209,Leave the World Behind,"[18, 9648, 53, 878]",I.Q.


In [30]:
get_accuracy('Waiting to Exhale', "genre_plot")

This is movie_ID:31357
Precision at 10: 0.0


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
27,239,Some Like It Hot,"[35, 10749, 80]",John Wick
18,489,Good Will Hunting,[18],Venom
104,8005,Robin Hood: Men in Tights,[35],Mean Girls
294,20048,Confessions of a Shopaholic,"[35, 10749]",The Bodyguard
110,10897,The Little Rascals,"[10749, 35, 10751]",Downsizing
312,723745,Fight!,[16],The Virgin Suicides
157,241,Natural Born Killers,"[80, 53, 18]",A Beautiful Mind
333,504608,Rocketman,"[10402, 18]",Bad Boys
321,1541,Thelma & Louise,"[18, 53, 80, 12]",Fatherhood
61,6068,Six Days Seven Nights,"[35, 28, 12, 10749]",To Wong Foo


In [31]:
get_accuracy('Father of the Bride Part II', "genre_plot")

This is movie_ID:11862
Precision at 10: 0.1


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
39,11846,Father of the Bride,"[35, 10749]",Father of the Bride
327,4133,Blow,"[80, 18]",Cheaper by the Dozen
398,418879,The Current War,"[18, 36]",A Few Good Men
239,45269,The King's Speech,"[18, 36]",Mad Money
31,10625,Mean Girls,[35],First Knight
376,8874,My Best Friend's Wedding,"[35, 10749]",Inception
412,11860,Sabrina,35,The Social Network
5,9806,The Incredibles,"[28, 12, 16, 10751]",Suicide Squad
288,275,Fargo,"[80, 18, 53]",Big Hero 6
414,9091,Sudden Death,28,Home Alone


In [32]:
get_accuracy('Heat', "genre_plot")

This is movie_ID:949
Precision at 10: 0.0


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
60,75656,Now You See Me,"[53, 80]",Casino
255,337674,Elle,"[18, 53]",Scarface
187,102899,Ant-Man,"[878, 28, 12]",Donnie Brasco
414,9091,Sudden Death,28,Se7en
67,22582,Tom and Jerry: The Movie,"[10751, 16, 35]",Taxi Driver
196,2636,The Specialist,"[28, 53]",Collateral
245,782,Gattaca,"[53, 878, 9648, 10749]",GoodFellas
221,795,City of Angels,"[10749, 18, 14]",L.A. Confidential
339,257344,Pixels,"[28, 35, 878]",Kill Bill: Vol. 2
193,287947,Shazam!,"[28, 35, 14]",The Untouchables


In [33]:
get_accuracy('Tom and Huck', "genre_plot")

This is movie_ID:45325
Precision at 10: 0.1


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
68,5769,Tom Jones,"[12, 35, 10749]",Tom and Jerry: The Movie
65,4477,The Devil's Own,"[80, 53, 18]",Tom Jones
40,11007,Cheaper by the Dozen,"[35, 10751]",Insidious
441,11861,How to Make an American Quilt,18,The Parent Trap
155,6,Judgment Night,"[28, 80, 53]",District 9
369,10895,Pinocchio,"[16, 10751]",A Dog's Purpose
223,118,Charlie and the Chocolate Factory,"[12, 35, 10751, 14]",Back to the Future
357,451,Leaving Las Vegas,"[18, 10749]",The Accountant
327,4133,Blow,"[80, 18]",Tarzan
349,22881,The Blind Side,[18],Oliver & Company


In [34]:
get_accuracy('Sudden Death', "genre_plot")

This is movie_ID:9091
Precision at 10: 0.1


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
339,257344,Pixels,"[28, 35, 878]",Timecop
10,353486,Jumanji: Welcome to the Jungle,"[12, 35, 14]",Hard Target
130,8844,Jumanji,"[12, 14, 10751]",Last Action Hero
91,36557,Casino Royale,"[12, 28, 53]",The Quest
15,512200,Jumanji: The Next Level,"[12, 35, 14]",Double Team
138,949,Heat,"[28, 80, 18]",Double Impact
381,670,Oldboy,"[18, 53, 9648, 28]",Universal Soldier
81,9405,Double Team,"[28, 878]",Street Fighter
418,1408,Cutthroat Island,28,Maximum Risk
268,10395,Wolf,"[14, 18, 27, 10749]",Avatar


In [35]:
get_accuracy('GoldenEye', "genre_plot")

This is movie_ID:710
Precision at 10: 0.7


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
90,36669,Die Another Day,"[12, 28, 53]",Tomorrow Never Dies
93,646,Dr. No,"[12, 28, 53]",Goldfinger
94,206647,Spectre,"[28, 12, 53]",The World Is Not Enough
92,709,Licence to Kill,"[12, 28, 53]",Die Another Day
89,36643,The World Is Not Enough,"[12, 28, 53]",Casino Royale
91,36557,Casino Royale,"[12, 28, 53]",Licence to Kill
87,714,Tomorrow Never Dies,"[12, 28, 53]",Dr. No
125,607,Men in Black,"[28, 12, 35, 878]",Spectre
341,13051,Max Payne,"[80, 28, 18, 53]",Rocky
298,112949,Safe Haven,"[10749, 53]",Toy Story


In [36]:
get_accuracy('The Shawshank Redemption', "genre_plot")

This is movie_ID:278
Precision at 10: 0.0


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
203,9972,Lock Up,"[28, 80]",The Godfather
186,8077,Alien³,"[878, 28, 27]",Schindler's List
136,293863,The Age of Adaline,"[10749, 14, 18]",The Dark Knight
1,10193,Toy Story 3,"[16, 10751, 35]",Pulp Fiction
287,6075,Carlito's Way,"[80, 18, 10749, 53]",The Godfather Part II
105,36593,Naked Gun 33⅓: The Final Insult,"[35, 80]",Fight Club
123,4347,Atonement,"[18, 10749]",Forrest Gump
367,10144,The Little Mermaid,"[16, 10751, 14]",The Green Mile
188,350,The Devil Wears Prada,"[18, 35]",The Silence of the Lambs
202,819,Sleepers,"[80, 18, 53]",Spirited Away


# Recommendation using Genre

#### Calculation of overall precision @k for Genre column 

0.0 + 0.2 + 0.0 + 0.1 + 0.0 + 0.2 + 0.1 + 0.1 +0.7 + 0.0 = 1.4/10

Overall precision at k(10) = 0.14 precision

In [37]:
get_accuracy('Toy Story', "Genre")

This is movie_ID:862
Precision at 10: 0.0


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
16,862,Toy Story,"[16, 12, 10751, 35]",Toy Story 2
316,46195,Rio,"[16, 12, 35, 10751]",Toy Story 3
317,4978,An American Tail,"[35, 12, 18, 10751, 16]",A Bug's Life
408,150540,Inside Out,"[16, 10751, 12, 18, 35]",The Lion King
112,25913,Balto II: Wolf Quest,"[10751, 16, 12]","Monsters, Inc."
116,34942,Balto III: Wings of Change,"[10751, 12, 16]",The Incredibles
118,12144,The Land Before Time,"[10751, 16, 12]",Up
280,21032,Balto,"[10751, 16, 12]",Finding Nemo
366,10674,Mulan,"[16, 10751, 12]",Se7en
47,177572,Big Hero 6,"[12, 10751, 16, 28, 35]",WALL·E


In [38]:
get_accuracy('Jumanji', "Genre")

This is movie_ID:8844
Precision at 10: 0.2


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
17,879,Hook,"[12, 14, 35, 10751]",Jumanji: Welcome to the Jungle
223,118,Charlie and the Chocolate Factory,"[12, 35, 10751, 14]",Night at the Museum
281,10137,Stuart Little,"[10751, 14, 35, 12]",Zathura: A Space Adventure
72,381289,A Dog's Purpose,"[12, 35, 14, 10751, 18]",Mrs. Doubtfire
278,9447,Babe: Pig in the City,"[12, 35, 18, 10751, 14]",Jurassic Park
11,1593,Night at the Museum,"[28, 12, 35, 10751, 14]",Jumanji: The Next Level
79,9593,Last Action Hero,"[12, 14, 28, 35, 10751]",Toy Story
330,2493,The Princess Bride,"[12, 10751, 14, 35, 10749]",Hook
2,9487,A Bug's Life,"[12, 16, 35, 14, 10751]",Good Will Hunting
115,8839,Casper,"[14, 35, 10751]",Home Alone


In [39]:
get_accuracy('Grumpier Old Men', "Genre")

This is movie_ID:15602
Precision at 10: 0.0


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
34,1443,The Virgin Suicides,"[18, 10749]",Grumpy Old Men
35,453,A Beautiful Mind,"[18, 10749]",Toy Story
64,2288,Closer,"[18, 10749]",Splash
123,4347,Atonement,"[18, 10749]",The Exorcist
143,4348,Pride & Prejudice,"[18, 10749]",It
144,38684,Jane Eyre,"[18, 10749]",Marriage Italian Style
145,2977,Becoming Jane,"[10749, 18]",The Devil's Advocate
151,11036,The Notebook,"[10749, 18]",Anaconda
237,4584,Sense and Sensibility,"[18, 10749]",Some Like It Hot
240,153,Lost in Translation,"[18, 10749]",I.Q.


In [40]:
get_accuracy('Waiting to Exhale', "Genre")

This is movie_ID:31357
Precision at 10: 0.1


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
31,10625,Mean Girls,[35],John Wick
104,8005,Robin Hood: Men in Tights,[35],Venom
106,30197,The Producers,[35],Mean Girls
108,3034,Young Frankenstein,[35],The Bodyguard
163,1624,Liar Liar,[35],Downsizing
165,8467,Dumb and Dumber,[35],The Virgin Suicides
167,2123,"Me, Myself & Irene",[35],A Beautiful Mind
191,11566,Dave,[35],Bad Boys
218,1621,Trading Places,[35],Fatherhood
283,52449,Bad Teacher,[35],To Wong Foo


In [41]:
get_accuracy('Father of the Bride Part II', "Genre")

This is movie_ID:11862
Precision at 10: 0.0


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
31,10625,Mean Girls,[35],Father of the Bride
104,8005,Robin Hood: Men in Tights,[35],Cheaper by the Dozen
106,30197,The Producers,[35],A Few Good Men
108,3034,Young Frankenstein,[35],Mad Money
163,1624,Liar Liar,[35],First Knight
165,8467,Dumb and Dumber,[35],Inception
167,2123,"Me, Myself & Irene",[35],The Social Network
191,11566,Dave,[35],Suicide Squad
218,1621,Trading Places,[35],Big Hero 6
283,52449,Bad Teacher,[35],Home Alone


In [42]:
get_accuracy('Heat', "Genre")

This is movie_ID:949
Precision at 10: 0.2


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
138,949,Heat,"[28, 80, 18]",Casino
360,101,Léon: The Professional,"[80, 18, 28]",Scarface
203,9972,Lock Up,"[28, 80]",Donnie Brasco
82,9594,Double Impact,"[53, 28, 80, 18]",Se7en
174,9869,Patriot Games,"[18, 28, 53, 80]",Taxi Driver
244,155,The Dark Knight,"[18, 28, 80, 53]",Collateral
341,13051,Max Payne,"[80, 28, 18, 53]",GoodFellas
51,103,Taxi Driver,"[80, 18]",L.A. Confidential
53,769,GoodFellas,"[18, 80]",Kill Bill: Vol. 2
142,311,Once Upon a Time in America,"[18, 80]",The Untouchables


In [43]:
get_accuracy('Tom and Huck', "Genre")

This is movie_ID:45325
Precision at 10: 0.1


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
445,26441,The Big Green,10751,Tom and Jerry: The Movie
19,771,Home Alone,"[35, 10751]",Tom Jones
40,11007,Cheaper by the Dozen,"[35, 10751]",Insidious
70,9820,The Parent Trap,"[35, 10751]",The Parent Trap
285,11674,101 Dalmatians,"[10751, 35]",District 9
295,54004,Passport to Paris,"[35, 10751]",A Dog's Purpose
13,788,Mrs. Doubtfire,"[35, 18, 10751]",Back to the Future
201,9279,Jingle All the Way,"[10751, 35, 12]",The Accountant
7,12,Finding Nemo,"[16, 10751]",Tarzan
119,13700,Home on the Range,"[16, 10751]",Oliver & Company


In [44]:
get_accuracy('Sudden Death', "Genre")

This is movie_ID:9091
Precision at 10: 0.1


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
418,1408,Cutthroat Island,28,Timecop
421,11517,Money Train,28,Hard Target
424,9691,Assassins,28,Last Action Hero
438,11443,Dead Presidents,28,The Quest
189,480530,Creed II,"[18, 28]",Double Team
403,96721,Rush,"[18, 28]",Double Impact
29,245891,John Wick,"[28, 53]",Universal Soldier
85,10861,Maximum Risk,"[28, 53]",Street Fighter
135,1573,Die Hard 2,"[28, 53]",Maximum Risk
156,1991,Death Proof,"[28, 53]",Avatar


In [45]:
get_accuracy('GoldenEye', "Genre")

This is movie_ID:710
Precision at 10: 0.7


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
88,658,Goldfinger,"[12, 28, 53]",Tomorrow Never Dies
89,36643,The World Is Not Enough,"[12, 28, 53]",Goldfinger
90,36669,Die Another Day,"[12, 28, 53]",The World Is Not Enough
91,36557,Casino Royale,"[12, 28, 53]",Die Another Day
92,709,Licence to Kill,"[12, 28, 53]",Casino Royale
93,646,Dr. No,"[12, 28, 53]",Licence to Kill
94,206647,Spectre,"[28, 12, 53]",Dr. No
171,10538,Passenger 57,"[28, 12, 53]",Spectre
219,9802,The Rock,"[28, 12, 53]",Rocky
402,710,GoldenEye,"[12, 28, 53]",Toy Story


In [46]:
get_accuracy('The Shawshank Redemption', "Genre")

This is movie_ID:278
Precision at 10: 0.0


Unnamed: 0,movie_id,Title,Genres,API_Rec_Titles
53,769,GoodFellas,"[18, 80]",The Godfather
142,311,Once Upon a Time in America,"[18, 80]",Schindler's List
262,278,The Shawshank Redemption,"[18, 80]",The Dark Knight
265,9344,Kids,"[18, 80]",Pulp Fiction
266,1646,Freedom Writers,"[80, 18]",The Godfather Part II
327,4133,Blow,"[80, 18]",Fight Club
355,359156,Don't Be Bad,"[80, 18]",Forrest Gump
122,106646,The Wolf of Wall Street,"[80, 18, 35]",The Green Mile
242,8321,In Bruges,"[35, 18, 80]",The Silence of the Lambs
420,9273,Ace Ventura: When Nature Calls,80,Spirited Away


# Testing model

In [47]:
from sklearn.model_selection import train_test_split
X = metadata.Plot
X_tfidf = tfidf.fit_transform(X)

In [52]:
# create target
y = metadata.Popularity

# split the dataset for training and testing
X_train, X_test, y_train, y_test = train_test_split(
    X_tfidf, y, test_size=0.5, random_state=42
)

In [54]:
X_train

<223x5043 sparse matrix of type '<class 'numpy.float64'>'
	with 5363 stored elements in Compressed Sparse Row format>

In [55]:
y_train

97     101.734
274     50.853
68      10.112
23      66.297
37      23.316
        ...   
106     17.747
270     37.167
348     17.793
435     43.844
102     30.280
Name: Popularity, Length: 223, dtype: float64

In [53]:
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import classification_report

# Training classifier model 
clf = SGDClassifier()
clf.fit(X_train, y_train)

# # model validation
# y_pred = clf.predict(X_test)

# print(classification_report(y_test, y_pred))

ValueError: Unknown label type: (array([  2.575,   3.442,   6.273,   6.809,   7.189,   7.656,   7.826,
         8.535,   8.973,   8.992,   9.227,  10.112,  10.268,  10.692,
        10.869,  11.533,  11.622,  11.74 ,  11.854,  11.871,  11.914,
        12.214,  12.347,  12.404,  12.406,  12.492,  12.659,  13.107,
        13.277,  13.388,  13.803,  14.363,  14.608,  14.67 ,  14.836,
        15.155,  15.193,  15.454,  15.684,  16.118,  16.131,  16.208,
        16.218,  16.608,  16.834,  17.072,  17.22 ,  17.591,  17.747,
        17.793,  18.225,  18.291,  18.633,  19.055,  19.098,  19.452,
        19.755,  19.858,  20.015,  20.071,  20.095,  20.239,  20.24 ,
        20.373,  20.425,  20.87 ,  20.924,  20.988,  21.055,  21.403,
        21.625,  21.782,  21.978,  22.248,  22.463,  22.833,  22.994,
        23.063,  23.116,  23.217,  23.271,  23.316,  23.493,  24.55 ,
        24.637,  24.956,  24.986,  25.145,  25.567,  25.679,  25.872,
        26.649,  26.953,  27.61 ,  27.931,  27.946,  27.988,  28.118,
        28.889,  29.001,  29.029,  29.305,  29.48 ,  29.628,  29.65 ,
        29.678,  29.726,  29.802,  29.828,  29.834,  29.91 ,  30.132,
        30.28 ,  30.444,  30.482,  30.904,  31.129,  31.353,  31.491,
        31.696,  31.76 ,  32.199,  32.301,  32.696,  32.831,  32.863,
        32.971,  33.307,  33.488,  34.518,  35.443,  35.662,  35.814,
        36.341,  36.359,  36.942,  37.043,  37.167,  37.198,  37.261,
        37.526,  37.737,  39.58 ,  40.022,  40.227,  40.383,  40.822,
        40.867,  41.591,  42.558,  42.694,  43.682,  43.844,  44.139,
        44.394,  45.335,  45.427,  45.898,  46.327,  46.347,  46.918,
        47.096,  48.435,  48.987,  49.327,  49.418,  49.437,  49.979,
        50.853,  51.992,  52.335,  53.114,  53.334,  55.037,  55.608,
        56.393,  58.078,  59.352,  59.663,  60.482,  61.254,  62.233,
        63.568,  63.579,  64.624,  66.297,  67.555,  67.7  ,  68.411,
        68.421,  68.663,  70.233,  71.272,  72.145,  75.727,  76.074,
        76.34 ,  78.63 ,  79.338,  79.403,  91.005,  93.041,  95.507,
        96.432,  96.494,  97.661, 101.365, 101.734, 102.264, 102.703,
       102.994, 103.208, 103.279, 109.346, 113.019, 113.118, 113.832,
       120.367, 121.009, 121.949, 128.295, 151.701, 159.312]),)