In [1]:
import pandas as pd
import numpy as np

import pickle, gzip, pickletools

In [2]:
movies = pd.read_csv("movies.csv")
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
62418,209157,We (2018),Drama
62419,209159,Window of the Soul (2001),Documentary
62420,209163,Bad Poems (2018),Comedy|Drama
62421,209169,A Girl Thing (2001),(no genres listed)


In [3]:
with gzip.open("svd_model_200.h5", 'rb') as f:
    p = pickle.Unpickler(f)
    model = p.load()

In [4]:
model

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x17d61884f40>

In [5]:
pd.DataFrame(model.qi)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,190,191,192,193,194,195,196,197,198,199
0,0.092911,-0.064177,-0.191643,-0.143336,0.027168,0.227176,-0.156938,-0.126235,0.079970,0.158107,...,0.239507,-0.144732,0.182517,-0.250486,-0.080231,0.295197,-0.178140,-0.043730,0.141406,0.228242
1,0.155719,0.006572,-0.173806,-0.467705,-0.133925,0.009027,-0.237722,-0.072540,0.173588,0.418212,...,-0.003281,-0.072620,-0.095393,0.005335,0.128289,-0.211230,-0.029933,-0.110155,0.161082,-0.165910
2,0.067867,0.131585,-0.181855,-0.392417,0.012519,-0.008601,-0.323412,-0.029370,0.169489,0.536339,...,0.056961,-0.070830,-0.079441,0.031269,0.104580,-0.201259,-0.080997,-0.068325,0.208108,-0.214221
3,0.086705,-0.200623,0.127798,0.201543,-0.274319,0.247829,0.321979,0.157721,0.093206,0.170906,...,0.257186,-0.097709,-0.034014,-0.176692,-0.267322,0.130085,0.046935,0.098134,0.227614,-0.113436
4,0.027162,-0.053153,0.248671,0.014023,-0.156786,0.019172,0.106355,-0.079683,-0.054304,-0.078932,...,0.089938,0.025405,0.116515,0.179420,-0.136900,-0.059179,-0.133654,0.209362,-0.167199,-0.310286
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10321,-0.133377,0.123795,-0.094611,-0.209561,0.238845,0.047246,-0.049117,0.008699,-0.048034,-0.014302,...,-0.053197,-0.043657,0.157117,-0.021411,0.121109,-0.043007,0.111063,-0.035564,-0.068555,0.128844
10322,0.033434,-0.034879,-0.033346,0.104356,0.040119,-0.019659,-0.191097,-0.002892,0.068158,-0.041645,...,-0.139047,0.035186,-0.029097,0.023629,0.058487,0.021235,0.069953,-0.182462,0.000644,-0.037520
10323,-0.153988,-0.071133,0.038423,-0.011384,0.052243,-0.013778,-0.069157,-0.108385,-0.064169,0.221172,...,-0.202419,-0.087412,0.105667,0.204611,0.071265,-0.029253,0.129750,0.099976,0.043406,0.036265
10324,-0.020823,-0.291100,0.133457,-0.095066,0.036794,-0.247820,0.030986,0.069741,-0.037520,-0.258988,...,-0.065199,-0.067630,0.085485,0.049197,0.316060,-0.151204,0.154312,-0.139390,0.041974,0.331698


In [6]:
model.qi.shape

(10326, 200)

In [7]:
from scipy.spatial.distance import cosine as cosine_distance

In [8]:
def get_vector(raw_id: int, trained_model=model) -> np.array:
    """Returns the latent features of a movie in the form of a numpy array"""
    movie_row_idx = trained_model.trainset._raw2inner_id_items[raw_id]
    return trained_model.qi[movie_row_idx]

In [9]:
def get_recs(liked_movie_title: str, model=model):
    try:
        """Returns the top 25 most similar movies to a specified movie
        
        This function iterates over every possible movie in MovieLens and calculates
        distance between `movie_title` vector and that movie's vector.
        """
        liked_raw_id = movies[movies['title']==liked_movie_title]["movieId"].item()
        # Get the first movie vector
        movie_vector: np.array = get_vector(liked_raw_id, model)
        similarity_table = []
        
        # Iterate over every possible movie and calculate similarity
        for other_raw_id in model.trainset._raw2inner_id_items.keys():
            other_movie_vector = get_vector(other_raw_id, model)
            
            # Get the second movie vector, and calculate distance
            similarity_score = cosine_distance(other_movie_vector, movie_vector)
            recommended_movies = movies[movies['movieId']==other_raw_id]["title"].item()
            if similarity_score != 0:
                similarity_table.append((similarity_score, recommended_movies))
        recs = pd.DataFrame(sorted(similarity_table), columns=["vector cosine distance", "Movie Title"])
        # sort movies by ascending similarity
        return recs.head(25)
    # Exception for if there isnt enough info about the movie
    except:
        print("Not enough info about movie")


In [10]:
get_recs("Harry Potter and the Goblet of Fire (2005)")

Unnamed: 0,vector cosine distance,Movie Title
0,0.025581,Harry Potter and the Order of the Phoenix (2007)
1,0.029506,Harry Potter and the Prisoner of Azkaban (2004)
2,0.036699,Harry Potter and the Half-Blood Prince (2009)
3,0.047256,Harry Potter and the Chamber of Secrets (2002)
4,0.047568,Harry Potter and the Deathly Hallows: Part 2 (...
5,0.048301,Harry Potter and the Deathly Hallows: Part 1 (...
6,0.061164,Harry Potter and the Sorcerer's Stone (a.k.a. ...
7,0.610381,Fantastic Beasts and Where to Find Them (2016)
8,0.63276,Fantastic Beasts and Where to Find Them 2 (2018)
9,0.686948,"Chronicles of Narnia: The Lion, the Witch and ..."


In [11]:
get_recs("Notebook, The (2004)")

Unnamed: 0,vector cosine distance,Movie Title
0,0.557568,P.S. I Love You (2007)
1,0.577432,"Walk to Remember, A (2002)"
2,0.608776,Sweet Home Alabama (2002)
3,0.611584,"Time Traveler's Wife, The (2009)"
4,0.622701,"Vow, The (2012)"
5,0.626631,August Rush (2007)
6,0.642285,Titanic (1997)
7,0.654184,Dear John (2010)
8,0.655111,Pretty Woman (1990)
9,0.655921,Me Before You (2016)


In [12]:
get_recs("Scream - Because I Will Kill You! (1999)")

Not enough info about movie


In [13]:
get_recs("Little Women (1994)")

Unnamed: 0,vector cosine distance,Movie Title
0,0.540658,Sense and Sensibility (1995)
1,0.572484,Emma (1996)
2,0.645985,While You Were Sleeping (1995)
3,0.651684,Yentl (1983)
4,0.658214,Roman Holiday (1953)
5,0.663841,The Second Best Exotic Marigold Hotel (2015)
6,0.664567,Meet Me in St. Louis (1944)
7,0.66986,Much Ado About Nothing (1993)
8,0.675515,Jane Eyre (1996)
9,0.684101,"Remains of the Day, The (1993)"


In [14]:
get_recs("Angels & Demons (2009)")

Unnamed: 0,vector cosine distance,Movie Title
0,0.278463,"Da Vinci Code, The (2006)"
1,0.502948,National Treasure: Book of Secrets (2007)
2,0.544117,National Treasure (2004)
3,0.564982,Inferno (2016)
4,0.576858,21 (2008)
5,0.583356,Sherlock Holmes: A Game of Shadows (2011)
6,0.586345,Sherlock Holmes (2009)
7,0.635759,"Mummy: Tomb of the Dragon Emperor, The (2008)"
8,0.657886,Now You See Me (2013)
9,0.660367,"Dilemma, The (2011)"


In [15]:
get_recs("Inglourious Basterds (2009)")

Unnamed: 0,vector cosine distance,Movie Title
0,0.248675,Django Unchained (2012)
1,0.425897,The Hateful Eight (2015)
2,0.426318,Kill Bill: Vol. 1 (2003)
3,0.432731,Kill Bill: Vol. 2 (2004)
4,0.502008,Inglorious Bastards (Quel maledetto treno blin...
5,0.542213,Pulp Fiction (1994)
6,0.54877,Reservoir Dogs (1992)
7,0.560535,"Departed, The (2006)"
8,0.5661,Death Proof (2007)
9,0.576728,Grindhouse (2007)


In [16]:
get_recs("Avengers: Age of Ultron (2015)")

Unnamed: 0,vector cosine distance,Movie Title
0,0.113676,"Avengers, The (2012)"
1,0.167925,Captain America: Civil War (2016)
2,0.199439,Captain America: The First Avenger (2011)
3,0.202669,Iron Man 2 (2010)
4,0.205441,Captain America: The Winter Soldier (2014)
5,0.214377,Iron Man 3 (2013)
6,0.221537,Thor: The Dark World (2013)
7,0.230426,Thor (2011)
8,0.258326,Iron Man (2008)
9,0.298608,Avengers: Infinity War - Part I (2018)


In [17]:
get_recs("Get Smart (2008)")

Unnamed: 0,vector cosine distance,Movie Title
0,0.461971,Date Night (2010)
1,0.56656,Bewitched (2005)
2,0.587587,Johnny English (2003)
3,0.59732,Tower Heist (2011)
4,0.597857,Night at the Museum (2006)
5,0.611228,Starsky & Hutch (2004)
6,0.611413,Johnny English Reborn (2011)
7,0.613163,"Incredible Burt Wonderstone, The (2013)"
8,0.632855,Old Dogs (2009)
9,0.633576,"Longest Yard, The (2005)"


In [18]:
get_recs("WALL·E (2008)")

Unnamed: 0,vector cosine distance,Movie Title
0,0.233832,Up (2009)
1,0.385226,Ratatouille (2007)
2,0.411262,Finding Nemo (2003)
3,0.44164,"Monsters, Inc. (2001)"
4,0.450007,"Incredibles, The (2004)"
5,0.507757,Toy Story 3 (2010)
6,0.509448,Inside Out (2015)
7,0.509724,How to Train Your Dragon (2010)
8,0.52354,Toy Story (1995)
9,0.556391,Big Hero 6 (2014)


In [19]:
get_recs("Wind River (2017)")

Unnamed: 0,vector cosine distance,Movie Title
0,0.636145,"Three Billboards Outside Ebbing, Missouri (2017)"
1,0.665552,Hostiles (2017)
2,0.68436,"Place Beyond the Pines, The (2012)"
3,0.691917,"Orphanage, The (Orfanato, El) (2007)"
4,0.695469,Sicario (2015)
5,0.701606,Sicario: Day of the Soldado (2018)
6,0.706141,"Passion of the Christ, The (2004)"
7,0.710486,True Grit (2010)
8,0.716491,"Conspirator, The (2010)"
9,0.718688,Green Room (2015)


In [20]:
get_recs("Zodiac (2007)")

Unnamed: 0,vector cosine distance,Movie Title
0,0.569029,Sicario (2015)
1,0.579726,Prisoners (2013)
2,0.592197,Michael Clayton (2007)
3,0.597214,Gone Baby Gone (2007)
4,0.60198,Contagion (2011)
5,0.602176,Collateral (2004)
6,0.603128,No Country for Old Men (2007)
7,0.646726,"Aviator, The (2004)"
8,0.654045,Assassination of Jesse James by the Coward Rob...
9,0.659574,All the President's Men (1976)


In [21]:
get_recs("The Machinist (2004)")

Unnamed: 0,vector cosine distance,Movie Title
0,0.522964,Requiem for a Dream (2000)
1,0.540017,Memento (2000)
2,0.553194,Shutter Island (2010)
3,0.553744,Following (1998)
4,0.562321,Donnie Darko (2001)
5,0.59749,21 Grams (2003)
6,0.611442,Jacob's Ladder (1990)
7,0.624966,American History X (1998)
8,0.640816,Stay (2005)
9,0.641969,"Jacket, The (2005)"
