# Create a Movie Reccommendation Model 


In [1]:
import pandas as pd 
import os 
import numpy as np

In [2]:
# Read in files 
path1 = "../Resources/movie_metadata"
path2 = "../Resources/user_ratings"

In [3]:
metadata = pd.read_csv(path1)
user_ratings = pd.read_csv(path2)

In [4]:
metadata.head()

Unnamed: 0,MovieID,ImdbID,Title,Rated,Genre_imdb,Plot,Poster
0,1,tt0114709,Toy Story (1995),G,"Animation, Adventure, Comedy",A cowboy doll is profoundly threatened and jea...,https://m.media-amazon.com/images/M/MV5BMDU2ZW...
1,2,tt0113497,Jumanji (1995),PG,"Adventure, Comedy, Family",When two kids find and play a magical board ga...,https://m.media-amazon.com/images/M/MV5BZTk2Zm...
2,3,tt0113228,Grumpier Old Men (1995),PG-13,"Comedy, Romance",John and Max resolve to save their beloved bai...,https://m.media-amazon.com/images/M/MV5BMjQxM2...
3,4,tt0114885,Waiting to Exhale (1995),R,"Comedy, Drama, Romance","Based on Terry McMillan's novel, this film fol...",https://m.media-amazon.com/images/M/MV5BYzcyMD...
4,5,tt0113041,Father of the Bride Part II (1995),PG,"Comedy, Family, Romance",George Banks must deal not only with his daugh...,https://m.media-amazon.com/images/M/MV5BOTEyNz...


In [5]:
user_ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [6]:
#Import TfIdfVectorizer from scikit-learn
from sklearn.feature_extraction.text import TfidfVectorizer
#Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
tfidf = TfidfVectorizer(stop_words='english')

def matrix_creation(element):
    
    #Replace NaN with an empty string
    metadata[element] = metadata[element].fillna('')

    #Construct the required TF-IDF matrix by fitting and transforming the data
    tfidf_matrix = tfidf.fit_transform(metadata[element])
    
    return tfidf_matrix


In [7]:
matrix_creation("Plot")

<200x1846 sparse matrix of type '<class 'numpy.float64'>'
	with 2782 stored elements in Compressed Sparse Row format>

In [8]:
tfidf_matrix = tfidf.fit_transform(metadata["Plot"])

In [9]:
# Create an serie of just title and index 

indices = pd.Series(metadata.index, index = metadata["Title"])

indices.head()

Title
Toy Story (1995)                      0
Jumanji (1995)                        1
Grumpier Old Men (1995)               2
Waiting to Exhale (1995)              3
Father of the Bride Part II (1995)    4
dtype: int64

In [10]:
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics import ndcg_score

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [11]:
def get_recommendations(title, element, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = indices[title]
    
    tfidf_matrix = matrix_creation(element)
    
    # Compute the cosine similarity matrix
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]
    
    test = pd.DataFrame({"Title":metadata['Title'].iloc[movie_indices], "sim_score": sim_scores, "Genres":metadata['Genre_imdb'].iloc[movie_indices], "Plot":metadata['Plot'].iloc[movie_indices]}).reset_index(drop=False)
    
    # Calculate relevance score 
    movie_title_genre = list(metadata.loc[metadata["Title"] == title]["Genre_imdb"])[0].split(",")
    movie_title_genre_list = [i.strip() for i in movie_title_genre]

    relevance_score = []
    
    # Loop through each row in the reccomendation and calculate relevance score 
    for i in range(0,len(test)):
        
        genre_list = [i.strip() for i in test["Genres"][i].split(",")]
#         print(genre_list)
        relevance_score.append(len([i.strip() for i in genre_list if i.strip() in movie_title_genre_list]))
    
    # Add relevance score into dataframe
    test["Relevance Score"] = relevance_score
    
    
    # Calculate nDCG scores 
    ideal_score = test["Relevance Score"].sort_values(ascending=False)
    
    
    ideal_score = np.asarray([ideal_score])
    relevance_score = np.asarray([relevance_score])
    
    test_ndcg = ndcg_score(ideal_score, relevance_score)
    ideal_ndcg = ndcg_score(ideal_score, ideal_score)
    
    score_ndcg = test_ndcg/ideal_ndcg
    print("Relevance nDCG:" + str(test_ndcg))
    print("Ideal nDCG:" + str(ideal_ndcg))
    print("nDCG score:" + str(score_ndcg))
    
    
#     print(relevance_list)
    # Return the top 10 most similar movies
    return test

In [12]:
# Reccommendation based on genre alone
get_recommendations('Toy Story (1995)', "Genre_imdb")


Relevance nDCG:0.9765640703216261
Ideal nDCG:1.0
nDCG score:0.9765640703216261


Unnamed: 0,index,Title,sim_score,Genres,Plot,Relevance Score
0,12,Balto (1995),"(12, 0.9213163608368214)","Animation, Adventure, Drama",An outcast Husky risks his life with other sle...,2
1,44,Pocahontas (1995),"(44, 0.9213163608368214)","Animation, Adventure, Drama",An English soldier and the daughter of an Algo...,2
2,7,Tom and Huck (1995),"(7, 0.5243685970643143)","Adventure, Comedy, Drama",Two best friends witness a murder and embark o...,2
3,18,Ace Ventura: When Nature Calls (1995),"(18, 0.46169969788809245)","Adventure, Comedy, Crime","Ace Ventura, Pet Detective, returns from a spi...",2
4,180,Bushwhacked (1995),"(180, 0.46169969788809245)","Adventure, Comedy, Crime",Framed for murder and on the run from authorit...,2
5,14,Cutthroat Island (1995),"(14, 0.4442098870340997)","Action, Adventure, Comedy",A female pirate and her companion race against...,2
6,95,Muppet Treasure Island (1996),"(95, 0.4442098870340997)","Action, Adventure, Comedy","The Muppets set sail in search of treasure, wi...",2
7,77,White Squall (1996),"(77, 0.42728755949695035)","Adventure, Drama",Teenage boys discover discipline and camarader...,1
8,187,"Cure, The (1995)","(187, 0.42728755949695035)","Adventure, Drama","Erik, a loner, finds a friend in Dexter, an el...",1
9,1,Jumanji (1995),"(1, 0.3926475198283904)","Adventure, Comedy, Family",When two kids find and play a magical board ga...,2


In [13]:
get_recommendations("Balto (1995)", "Genre_imdb")

Relevance nDCG:1.0
Ideal nDCG:1.0
nDCG score:1.0


Unnamed: 0,index,Title,sim_score,Genres,Plot,Relevance Score
0,44,Pocahontas (1995),"(44, 1.0)","Animation, Adventure, Drama",An English soldier and the daughter of an Algo...,3
1,0,Toy Story (1995),"(0, 0.9213163608368214)","Animation, Adventure, Comedy",A cowboy doll is profoundly threatened and jea...,2
2,77,White Squall (1996),"(77, 0.5477307089075028)","Adventure, Drama",Teenage boys discover discipline and camarader...,2
3,187,"Cure, The (1995)","(187, 0.5477307089075028)","Adventure, Drama","Erik, a loner, finds a friend in Dexter, an el...",2
4,7,Tom and Huck (1995),"(7, 0.474956865904372)","Adventure, Comedy, Drama",Two best friends witness a murder and embark o...,2
5,128,Beyond Rangoon (1995),"(128, 0.41658302159445093)","Action, Adventure, Drama",Laura is trying to pick up the pieces of her l...,2
6,140,First Knight (1995),"(140, 0.41658302159445093)","Action, Adventure, Drama","Lancelot falls in love with Guinevere, who is ...",2
7,124,Rob Roy (1995),"(124, 0.3690247252956399)","Adventure, Biography, Drama","In 1713 Scotland, Rob Roy MacGregor is wronged...",2
8,119,"Amazing Panda Adventure, The (1995)","(119, 0.3653627192161314)","Adventure, Drama, Family",A young American boy visiting China helps his ...,2
9,141,Free Willy 2: The Adventure Home (1995),"(141, 0.3653627192161314)","Adventure, Drama, Family",Teenager Jesse becomes reunited with Willy two...,2


In [14]:
get_recommendations("Grumpier Old Men (1995)", "Genre_imdb")

Relevance nDCG:1.0
Ideal nDCG:1.0
nDCG score:1.0


Unnamed: 0,index,Title,sim_score,Genres,Plot,Relevance Score
0,35,Clueless (1995),"(35, 0.9999999999999999)","Comedy, Romance","Shallow, rich and socially successful Cher is ...",2
1,103,If Lucy Fell (1996),"(103, 0.9999999999999999)","Comedy, Romance",Two NYC roommates have a pact to jump off Broo...,2
2,111,Pie in the Sky (1996),"(111, 0.9999999999999999)","Comedy, Romance",A whimsical saga of a young man whose two life...,2
3,114,Man of the Year (1995),"(114, 0.9999999999999999)","Comedy, Romance",Mockumentary look at Playgirl Magazine centerf...,2
4,152,Mallrats (1995),"(152, 0.9999999999999999)","Comedy, Romance","Both dumped by their girlfriends, two best fri...",2
5,157,Nine Months (1995),"(157, 0.9999999999999999)","Comedy, Romance",When he finds out his longtime girlfriend is p...,2
6,3,Waiting to Exhale (1995),"(3, 0.9005667163400763)","Comedy, Drama, Romance","Based on Terry McMillan's novel, this film fol...",2
7,6,Sabrina (1995),"(6, 0.9005667163400763)","Comedy, Drama, Romance",An ugly duckling having undergone a remarkable...,2
8,10,"American President, The (1995)","(10, 0.9005667163400763)","Comedy, Drama, Romance",A widowed U.S. President running for reelectio...,2
9,26,Now and Then (1995),"(26, 0.9005667163400763)","Comedy, Drama, Romance",Four 12-year-old girls grow up together during...,2


In [15]:
get_recommendations("Grumpier Old Men (1995)", "Plot")

Relevance nDCG:0.8494724404118942
Ideal nDCG:1.0000000000000002
nDCG score:0.849472440411894


Unnamed: 0,index,Title,sim_score,Genres,Plot,Relevance Score
0,63,Fair Game (1995),"(63, 0.12508614881494196)","Action, Romance, Thriller",Max Kirkpatrick is a cop who protects Kate McQ...,1
1,138,Die Hard: With a Vengeance (1995),"(138, 0.12328996781006432)","Action, Adventure, Thriller",John McClane and a Harlem store owner are targ...,0
2,107,Chungking Express (Chung Hing sam lam) (1994),"(107, 0.0986517631500813)","Comedy, Crime, Drama",Two melancholy Hong Kong policemen fall in lov...,1
3,164,Smoke (1995),"(164, 0.08197148647421218)","Comedy, Drama",A Brooklyn smoke shop is the center of neighbo...,1
4,106,Boomerang (1992),"(106, 0.08031721945709384)","Comedy, Drama, Romance",A successful executive and womanizer finds his...,2
5,95,Muppet Treasure Island (1996),"(95, 0.06532093460833328)","Action, Adventure, Comedy","The Muppets set sail in search of treasure, wi...",1
6,52,"Postman, The (Postino, Il) (1994)","(52, 0.06012315958628552)","Biography, Comedy, Drama",A simple Italian postman learns to love poetry...,1
7,14,Cutthroat Island (1995),"(14, 0.053752580899324894)","Action, Adventure, Comedy",A female pirate and her companion race against...,1
8,188,Castle Freak (1995),"(188, 0.05370901148803527)","Drama, Horror, Mystery, Thriller",A man struggles to save his family from the st...,0
9,80,Nick of Time (1995),"(80, 0.05045847636800924)","Action, Crime, Drama","An unimpressive, everyday man is forced into a...",0


In [16]:
# Recommendation based on plot alone
get_recommendations('Toy Story (1995)', "Plot")


Relevance nDCG:0.569920991215813
Ideal nDCG:1.0
nDCG score:0.569920991215813


Unnamed: 0,index,Title,sim_score,Genres,Plot,Relevance Score
0,98,Taxi Driver (1976),"(98, 0.09573656083681767)","Crime, Drama",A mentally unstable veteran works as a nightti...,0
1,119,"Amazing Panda Adventure, The (1995)","(119, 0.09536841495442028)","Adventure, Drama, Family",A young American boy visiting China helps his ...,1
2,88,City Hall (1996),"(88, 0.07589463421787788)","Crime, Drama, Mystery",The accidental shooting of a boy in New York l...,0
3,120,"Basketball Diaries, The (1995)","(120, 0.0754287164278157)","Biography, Crime, Drama",A teenager finds his dreams of becoming a bask...,0
4,47,Mighty Aphrodite (1995),"(47, 0.07480595957787413)","Comedy, Fantasy, Romance","When he discovers his adopted son is a genius,...",1
5,107,Chungking Express (Chung Hing sam lam) (1994),"(107, 0.06685034375371283)","Comedy, Crime, Drama",Two melancholy Hong Kong policemen fall in lov...,1
6,78,Dunston Checks In (1996),"(78, 0.06111872785166418)","Adventure, Comedy, Family",A young boy befriends a larcenous orangutan in...,2
7,125,"Addiction, The (1995)","(125, 0.05529440056041926)","Drama, Horror",A New York philosophy grad student turns into ...,0
8,13,Nixon (1995),"(13, 0.046434918636816126)","Biography, Drama, History",A biographical story of former U.S. President ...,0
9,17,Four Rooms (1995),"(17, 0.042932810426309635)",Comedy,Four interlocking tales that take place in a f...,1


In [17]:
# Reccommendation based on rated type
get_recommendations('Toy Story (1995)', "Rated")


Relevance nDCG:0.8903266581268174
Ideal nDCG:1.0000000000000002
nDCG score:0.8903266581268172


Unnamed: 0,index,Title,sim_score,Genres,Plot,Relevance Score
0,1,Jumanji (1995),"(1, 0.0)","Adventure, Comedy, Family",When two kids find and play a magical board ga...,2
1,2,Grumpier Old Men (1995),"(2, 0.0)","Comedy, Romance",John and Max resolve to save their beloved bai...,1
2,3,Waiting to Exhale (1995),"(3, 0.0)","Comedy, Drama, Romance","Based on Terry McMillan's novel, this film fol...",1
3,4,Father of the Bride Part II (1995),"(4, 0.0)","Comedy, Family, Romance",George Banks must deal not only with his daugh...,1
4,5,Heat (1995),"(5, 0.0)","Action, Crime, Drama",A group of high-end professional thieves start...,0
5,6,Sabrina (1995),"(6, 0.0)","Comedy, Drama, Romance",An ugly duckling having undergone a remarkable...,1
6,7,Tom and Huck (1995),"(7, 0.0)","Adventure, Comedy, Drama",Two best friends witness a murder and embark o...,2
7,8,Sudden Death (1995),"(8, 0.0)","Action, Crime, Thriller",A former fireman takes on a group of terrorist...,0
8,9,GoldenEye (1995),"(9, 0.0)","Action, Adventure, Thriller",Years after a friend and fellow 00 agent is ki...,1
9,10,"American President, The (1995)","(10, 0.0)","Comedy, Drama, Romance",A widowed U.S. President running for reelectio...,1


In [18]:
metadata["plot + genre"] = metadata["Genre_imdb"] + ' ' + metadata["Plot"]

metadata.head()

Unnamed: 0,MovieID,ImdbID,Title,Rated,Genre_imdb,Plot,Poster,plot + genre
0,1,tt0114709,Toy Story (1995),G,"Animation, Adventure, Comedy",A cowboy doll is profoundly threatened and jea...,https://m.media-amazon.com/images/M/MV5BMDU2ZW...,"Animation, Adventure, Comedy A cowboy doll is ..."
1,2,tt0113497,Jumanji (1995),PG,"Adventure, Comedy, Family",When two kids find and play a magical board ga...,https://m.media-amazon.com/images/M/MV5BZTk2Zm...,"Adventure, Comedy, Family When two kids find a..."
2,3,tt0113228,Grumpier Old Men (1995),PG-13,"Comedy, Romance",John and Max resolve to save their beloved bai...,https://m.media-amazon.com/images/M/MV5BMjQxM2...,"Comedy, Romance John and Max resolve to save t..."
3,4,tt0114885,Waiting to Exhale (1995),R,"Comedy, Drama, Romance","Based on Terry McMillan's novel, this film fol...",https://m.media-amazon.com/images/M/MV5BYzcyMD...,"Comedy, Drama, Romance Based on Terry McMillan..."
4,5,tt0113041,Father of the Bride Part II (1995),PG,"Comedy, Family, Romance",George Banks must deal not only with his daugh...,https://m.media-amazon.com/images/M/MV5BOTEyNz...,"Comedy, Family, Romance George Banks must deal..."


In [19]:
get_recommendations('Toy Story (1995)','plot + genre')

Relevance nDCG:0.857219016476455
Ideal nDCG:1.0000000000000002
nDCG score:0.8572190164764548


Unnamed: 0,index,Title,sim_score,Genres,Plot,Relevance Score
0,119,"Amazing Panda Adventure, The (1995)","(119, 0.11013986953198283)","Adventure, Drama, Family",A young American boy visiting China helps his ...,1
1,78,Dunston Checks In (1996),"(78, 0.09900552455609749)","Adventure, Comedy, Family",A young boy befriends a larcenous orangutan in...,2
2,12,Balto (1995),"(12, 0.08578522109002469)","Animation, Adventure, Drama",An outcast Husky risks his life with other sle...,2
3,47,Mighty Aphrodite (1995),"(47, 0.07999742483931899)","Comedy, Fantasy, Romance","When he discovers his adopted son is a genius,...",1
4,133,Congo (1995),"(133, 0.07847372716370395)","Action, Adventure, Mystery",When an expedition to the African Congo ends i...,1
5,44,Pocahontas (1995),"(44, 0.07809441979785128)","Animation, Adventure, Drama",An English soldier and the daughter of an Algo...,2
6,107,Chungking Express (Chung Hing sam lam) (1994),"(107, 0.07277938971871874)","Comedy, Crime, Drama",Two melancholy Hong Kong policemen fall in lov...,1
7,88,City Hall (1996),"(88, 0.07219153132028869)","Crime, Drama, Mystery",The accidental shooting of a boy in New York l...,0
8,120,"Basketball Diaries, The (1995)","(120, 0.07132701426112702)","Biography, Crime, Drama",A teenager finds his dreams of becoming a bask...,0
9,109,"NeverEnding Story III, The (1994)","(109, 0.07002001036307756)","Adventure, Comedy, Family",A young boy must restore order when a group of...,2


In [20]:
# Calculate nDCG score 
from sklearn.metrics import ndcg_score

In [21]:
# Create a ideal relevance score list 
ideal_score = get_recommendations('Toy Story (1995)','plot + genre')["Relevance Score"].sort_values(ascending=False)

# Get the predicted relevance score 

relevance_score = get_recommendations('Toy Story (1995)','plot + genre')["Relevance Score"]

Relevance nDCG:0.857219016476455
Ideal nDCG:1.0000000000000002
nDCG score:0.8572190164764548
Relevance nDCG:0.857219016476455
Ideal nDCG:1.0000000000000002
nDCG score:0.8572190164764548


In [22]:
ideal_score = np.asarray([ideal_score])
relevance_score = np.asarray([relevance_score])

In [23]:
ideal_score

array([[2, 2, 2, 2, 1, 1, 1, 1, 0, 0]])

In [24]:
relevance_score

array([[1, 2, 2, 1, 1, 2, 1, 0, 0, 2]])

In [25]:
true_relevance = np.asarray([[10, 0, 0, 1, 5]])
scores = np.asarray([[.1, .2, .3, 4, 70]])
ndcg_score(true_relevance, scores, k=3)

0.4123818817534531

In [26]:
# Calculate nDCG score 
test_ndcg = ndcg_score(ideal_score, relevance_score)

In [27]:
# Calculate Ideal Score 
ideal_ndcg = ndcg_score(ideal_score, ideal_score)

In [28]:
print(test_ndcg/ideal_ndcg)
print(test_ndcg)
print(ideal_ndcg)

0.8572190164764548
0.857219016476455
1.0000000000000002


# Using MLP CLASSIFIER

In [29]:
from sklearn.neighbors import NearestNeighbors


In [30]:
samples = matrix_creation("plot + genre")

neigh = NearestNeighbors(n_neighbors=10, radius=0.5)

neigh.fit(samples)




In [31]:
test_array = neigh.kneighbors(samples[0,:], return_distance=True)

test_array
# movie_ids_list = []
# movie_title_list = []
# movie_genre = []
# distance_list = []
# for i in test_array:
       
#     for j in i:
#         movie_ids_list.append(j)
#         movie_title_list.append(metadata["Title"][j])
#         movie_genre.append(metadata["Genre_imdb"][j])


(array([[0.        , 1.33406157, 1.34238182, 1.35219435, 1.3564679 ,
         1.35759071, 1.35787008, 1.3617787 , 1.36221031, 1.36284481]]),
 array([[  0, 119,  78,  12,  47, 133,  44, 107,  88, 120]]))

In [32]:
movie_indices = [i[0] for i in test_array]
movie_indices[1]

movie_index = []
distance_list = []
for i in range(0,len(movie_indices[1])):
    movie_index.append(movie_indices[1][i])
    distance_list.append(movie_indices[0][i])

In [33]:
test = pd.DataFrame({"Title":metadata['Title'].iloc[movie_index], "Genres":metadata['Genre_imdb'].iloc[movie_index], "Plot":metadata['Plot'].iloc[movie_index]}).reset_index(drop=False)


In [34]:
test.head()

Unnamed: 0,index,Title,Genres,Plot
0,0,Toy Story (1995),"Animation, Adventure, Comedy",A cowboy doll is profoundly threatened and jea...
1,119,"Amazing Panda Adventure, The (1995)","Adventure, Drama, Family",A young American boy visiting China helps his ...
2,78,Dunston Checks In (1996),"Adventure, Comedy, Family",A young boy befriends a larcenous orangutan in...
3,12,Balto (1995),"Animation, Adventure, Drama",An outcast Husky risks his life with other sle...
4,47,Mighty Aphrodite (1995),"Comedy, Fantasy, Romance","When he discovers his adopted son is a genius,..."


In [39]:
def NN_get_reccommendation(title,clusters,):
    
    #Define model 
    neigh = NearestNeighbors(n_neighbors=clusters, radius=0.5)
    
    #Fit model to samples
    neigh.fit(samples)
    
    #Find the title index 
    idx = metadata.loc[metadata["Title"] == title].index
    
    #Find nearest neighbors of the title 
    test_array = neigh.kneighbors(samples[idx,:], return_distance=True)

    #Create list to store movie index and distance 
    movie_index = []
    distance_list = []
    
    # Loop through movie index 
    for i in range(0,len(movie_indices[1])):
        movie_index.append(movie_indices[1][i])
        distance_list.append(movie_indices[0][i])

    

In [40]:
metadata.loc[metadata["Title"] == "Toy Story (1995)"].index

Index([0], dtype='int64')

In [41]:
from sklearn.neural_network import MLPClassifier

In [42]:
clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
                    hidden_layer_sizes=(6, 4), random_state=1)