In [1]:
#!pip install scikit-surprise
#%run -i utilities.py
%run -i CFKnnMeansModel_Class.py

# Model 1: Collab Filtering: KNN with means

## Read the model from saved file

In [2]:
import pickle
knn_filename = 'Model_KNN_Means.sav'
knn_model = pickle.load(open(knn_filename, 'rb'))


# Model 2: Content Based, Cosine Similarity

## Directly read Femke's notebook

> Important to identify the following:
> cosine_sim and cosine_sim_df

In [3]:
%run 2_ContentBasedFiltering.ipynb 

Shape: (9719, 9719)


# Hybrid Model
Hybrid Recommender leverages the best of both Content based and collaborative filtering techniques.

In [4]:
# Hybrid model
class HybridModel:
    def __init__(self, cosine_sim, cf_model):
        self.cosine_sim = cosine_sim
        self.cf_model = cf_model
        
    def recommend_movies(self, user_title_year, movies_df):
        
        # Use the Process_Avg_Rating function to manipulate the main df and find the 
        # avg rating
        
        movies_df_summary=Process_Avg_Rating(movies_df)
        
        #--------------------------------------
        # Content Based
        #--------------------------------------
        
        # Find the top 100 similar movies based on the content-based model
        similar_movies_cos_sim=genre_recommendation(user_title_year)
        
          
        #Merge
        similar_movies_cos_sim_df=pd.merge(similar_movies_cos_sim,movies_df_summary,how='left', left_on=['title','genres'], right_on = ['title','genres'])
        
        #--------------------------------------
        # Col. filter Based
        #--------------------------------------
        
        # Find the top 100 similar movies based on the Coll filter model
        similar_movies_knn=self.cf_model.recommend_similar_items_knnmeans(user_input,movies_df,100)
        
        #Take the common 10 movies
       
        similar_movies_common=pd.merge(similar_movies_knn,similar_movies_cos_sim_df, how='inner', on=['title','average rating','number of ratings'])
        similar_movies_common=similar_movies_common.sort_values(['average rating'], ascending=[False]).head(10)

        #What if most common movies are <10?
        
        return similar_movies_cos_sim_df, similar_movies_knn,similar_movies_common

## Read the df, and Ask for input

In [5]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
df=pd.read_csv("../../Data/ml-latest-small/PreprocessedData_ml_latest_year_small.csv")
#df["title"] = df["title"].str.lower()
df.head()

Unnamed: 0,userId,movieId,rating,title,genres,year
0,1,1,4.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1995
1,5,1,4.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1995
2,7,1,4.5,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1995
3,15,1,2.5,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1995
4,17,1,4.5,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1995


In [6]:
#Ask user for the movie name
user_input=AskForUserInput(df)
#Or if you want to hardcode it
#user_input="Toy Story"

Enter your Favorite Movie: Toy Story

We have multiple movies with the same name/Part of it, but with different release years:
 movieId              title  year
       1   Toy Story (1995)  1995
    3114 Toy Story 2 (1999)  1999
   78499 Toy Story 3 (2010)  2010
Which one do you have in your mind? (Enter the movieId)1
Your favourite movie: Toy Story (1995)



## Recommend movies


In [7]:
hybrid_model=HybridModel(cosine_sim, knn_model)

In [8]:
r_content,r_cf,r_hybrid=hybrid_model.recommend_movies(user_input,df)
print("Recommendations, Content based:")
r_content.head(10)

Recommendations, Content based:


Unnamed: 0,title,genres,Similarity Score,average rating,number of ratings
0,Toy Story 2 (1999),Adventure|Animation|Children|Comedy|Fantasy,1.0,3.86,97
1,"Emperor's New Groove, The (2000)",Adventure|Animation|Children|Comedy|Fantasy,1.0,3.72,37
2,"Monsters, Inc. (2001)",Adventure|Animation|Children|Comedy|Fantasy,1.0,3.87,132
3,Moana (2016),Adventure|Animation|Children|Comedy|Fantasy,1.0,3.45,10
4,Shrek the Third (2007),Adventure|Animation|Children|Comedy|Fantasy,1.0,3.02,21
5,Turbo (2013),Adventure|Animation|Children|Comedy|Fantasy,1.0,2.5,1
6,"Wild, The (2006)",Adventure|Animation|Children|Comedy|Fantasy,1.0,2.5,1
7,"Tale of Despereaux, The (2008)",Adventure|Animation|Children|Comedy|Fantasy,1.0,3.0,1
8,Asterix and the Vikings (Astérix et les Viking...,Adventure|Animation|Children|Comedy|Fantasy,1.0,5.0,1
9,Antz (1998),Adventure|Animation|Children|Comedy|Fantasy,1.0,3.24,45


In [9]:
print("Recommendations based on CF:")
r_cf.head(10)

Recommendations based on CF:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,average rating,number of ratings
movieId,title,year,genres,Unnamed: 4_level_1,Unnamed: 5_level_1
2,Jumanji (1995),1995,Adventure|Children|Fantasy,3.43,110
24,Powder (1995),1995,Drama|Sci-Fi,3.12,28
34,Babe (1995),1995,Children|Drama,3.65,128
47,Seven (a.k.a. Se7en) (1995),1995,Mystery|Thriller,3.98,203
150,Apollo 13 (1995),1995,Adventure|Drama|IMAX,3.85,201
158,Casper (1995),1995,Adventure|Children,2.81,62
165,Die Hard: With a Vengeance (1995),1995,Action|Crime|Thriller,3.56,144
296,Pulp Fiction (1994),1994,Comedy|Crime|Drama|Thriller,4.2,307
317,"Santa Clause, The (1994)",1994,Comedy|Drama|Fantasy,3.2,81
357,Four Weddings and a Funeral (1994),1994,Comedy|Romance,3.52,103


In [10]:
print("Recommendations, naive hybrid:")

r_hybrid.head(10)

Recommendations, naive hybrid:


Unnamed: 0,title,average rating,number of ratings,genres,Similarity Score
4,Finding Nemo (2003),3.96,141,Adventure|Animation|Children|Comedy,0.875595
2,Shrek (2001),3.87,170,Adventure|Animation|Children|Comedy|Fantasy|Ro...,0.933175
3,"Monsters, Inc. (2001)",3.87,132,Adventure|Animation|Children|Comedy|Fantasy,1.0
1,Toy Story 2 (1999),3.86,97,Adventure|Animation|Children|Comedy|Fantasy,1.0
0,"Bug's Life, A (1998)",3.52,92,Adventure|Animation|Children|Comedy,0.875595


# Save your model

In [11]:
import pickle
filename = 'Model_hybrid.sav'
pickle.dump(hybrid_model, open(filename, 'wb'))