# Hybrid Recommedation System

Using the two best performing recommnders I used code referenced from Banik, Rounak. 2018. Hands-On Recommendation Systems with Python: Start building powerful and personalized, recommendation engines with Python. Packt Publishing to create a hybrid approach

In [1]:

import numpy as np
import pandas as pd

In [12]:

#Import or compute the cosine_sim matrix
cosine_sim = pd.read_csv('/Users/connorranson/Downloads/cosine_sim.csv')
cosine_sim.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,9209,9210,9211,9212,9213,9214,9215,9216,9217,9218
0,0,1.0,0.030359,0.035245,0.03253,0.030359,0.0,0.033806,0.038778,0.0,...,0.0,0.0,0.028571,0.0,0.027789,0.040996,0.0,0.0,0.0,0.0
1,1,0.030359,1.0,0.0,0.0,0.0,0.0,0.0,0.082409,0.034565,...,0.0,0.0,0.060718,0.0,0.0,0.0,0.0,0.039193,0.031265,0.0
2,2,0.035245,0.0,1.0,0.080257,0.03745,0.0,0.083406,0.0,0.0,...,0.0,0.0,0.0,0.0,0.03428,0.0,0.047836,0.045502,0.0,0.0
3,3,0.03253,0.0,0.080257,1.0,0.034565,0.023168,0.07698,0.044151,0.0,...,0.0,0.029348,0.03253,0.046676,0.031639,0.046676,0.044151,0.083992,0.033501,0.0
4,4,0.030359,0.0,0.03745,0.034565,1.0,0.0,0.035921,0.0,0.0,...,0.0,0.0,0.0,0.0,0.029527,0.0,0.0,0.0,0.0,0.0


In [3]:

#Import or compute the cosine sim mapping matrix
cosine_sim_map = pd.read_csv('/Users/connorranson/Downloads/cosine_sim_map.csv', header=None)

#Convert cosine_sim_map into a Pandas Series
cosine_sim_map = cosine_sim_map.set_index(0)
cosine_sim_map = cosine_sim_map[1]

In [6]:
#Build the SVD based Collaborative filter
from surprise import SVD,Reader,Dataset
from surprise.model_selection import KFold

reader = Reader()
ratings = pd.read_csv('/Users/connorranson/Downloads/ratings.csv')
data = Dataset.load_from_df(ratings[['userId','movieId','rating']],reader)
kf = KFold(n_splits=5)
kf.split(data)
svd = SVD()
trainset = data.build_full_trainset()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x11ce8f4d0>

In [7]:
#Build title to ID and ID to title mappings
id_map = pd.read_csv('/Users/connorranson/Downloads/movie_ids.csv')
id_to_title = id_map.set_index('id')
title_to_id = id_map.set_index('title')

In [8]:
#Import or compute relevant metadata of the movies
smd = pd.read_csv('/Users/connorranson/Downloads/metadata_small.csv')

In [9]:

def hybrid(userId, title):
    #Extract the cosine_sim index of the movie
    idx = cosine_sim_map[title]
    
    #Extract the TMDB ID of the movie
    tmdbId = title_to_id.loc[title]['id']
    
    #Extract the movie ID internally assigned by the dataset
    movie_id = title_to_id.loc[title]['movieId']
    
    #Extract the similarity scores and their corresponding index for every movie from the cosine_sim matrix
    sim_scores = list(enumerate(cosine_sim[str(int(idx))]))
    
    #Sort the (index, score) tuples in decreasing order of similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    #Select the top 25 tuples, excluding the first 
    #(as it is the similarity score of the movie with itself)
    sim_scores = sim_scores[1:26]
    
    #Store the cosine_sim indices of the top 25 movies in a list
    movie_indices = [i[0] for i in sim_scores]

    #Extract the metadata of the aforementioned movies
    movies = smd.iloc[movie_indices][['title', 'vote_count', 'vote_average', 'year', 'id']]
    
    #Compute the predicted ratings using the SVD filter
    movies['est'] = movies['id'].apply(lambda x: svd.predict(userId, id_to_title.loc[x]['movieId']).est)
    
    #Sort the movies in decreasing order of predicted rating
    movies = movies.sort_values('est', ascending=False)
    
    #Return the top 10 movies as recommendations
    return movies.head(10)

In [10]:

hybrid(1, 'Avatar')

Unnamed: 0,title,vote_count,vote_average,year,id,est
8658,X-Men: Days of Future Past,6155.0,7.5,2014,127585,4.780257
1011,The Terminator,4208.0,7.4,1984,218,4.668587
8865,Star Wars: The Force Awakens,7993.0,7.5,2015,140607,4.568208
8401,Star Trek Into Darkness,4479.0,7.4,2013,54138,4.487362
974,Aliens,3282.0,7.7,1986,679,4.45827
522,Terminator 2: Judgment Day,4274.0,7.7,1991,280,4.414727
922,The Abyss,822.0,7.1,1989,2756,4.350248
2014,Fantastic Planet,140.0,7.6,1973,16306,4.262152
3060,Sinbad and the Eye of the Tiger,39.0,6.3,1977,11940,4.253666
7265,Dragonball Evolution,475.0,2.9,2009,14164,4.228442


In [11]:

hybrid(2, 'Avatar')

Unnamed: 0,title,vote_count,vote_average,year,id,est
974,Aliens,3282.0,7.7,1986,679,4.308547
522,Terminator 2: Judgment Day,4274.0,7.7,1991,280,4.06185
1011,The Terminator,4208.0,7.4,1984,218,3.977694
2834,Predator,2129.0,7.3,1987,106,3.965701
8401,Star Trek Into Darkness,4479.0,7.4,2013,54138,3.892212
8865,Star Wars: The Force Awakens,7993.0,7.5,2015,140607,3.847479
8658,X-Men: Days of Future Past,6155.0,7.5,2014,127585,3.792748
922,The Abyss,822.0,7.1,1989,2756,3.719707
2014,Fantastic Planet,140.0,7.6,1973,16306,3.64827
7265,Dragonball Evolution,475.0,2.9,2009,14164,3.573363
