In [1]:
import numpy as np
import pandas as pd
import pickle

In [2]:
with open('cosine_similarity.pickle', 'rb') as handle:
    cosine_similarity = pickle.load(handle)
    
with open('metdata.pickle', 'rb') as handle:
    required_metadata = pickle.load(handle)
    
with open('popularity_data.pickle', 'rb') as handle:
    popularity_data = pickle.load(handle)
    
with open('dataset.pickle', 'rb') as handle:
    dataset = pickle.load(handle)
    
with open('svdpp.pickle', 'rb') as handle:
    svdpp_data = pickle.load(handle)

In [3]:
with open('indices.pickle', 'rb') as handle:
    indices = pickle.load(handle)

In [4]:
def get_recommendations_for_content_based(title, cosine_sim, indices, metadata, num_of_reco):
    # Get the index of the movie that matches the title
    idx = indices[title]

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:num_of_reco+1]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return list(metadata['title'].iloc[movie_indices])


In [5]:
def get_recommendations_from_popularity(title, popularity_data, num_of_reco):
    director  = popularity_data[popularity_data['title']==title][['director']].iat[0,0]
    df = popularity_data[popularity_data['director']==director]
    df=df.sort_values(by='score', ascending=False)
    titles = df['title'].tolist()
    return titles[:5]

In [6]:
def get_recommendations_from_svdpp(uID, dataset, svdpp_data, num_of_reco):
    unique_ids = dataset['itemID'].unique()
    iids = dataset.loc[dataset['userID']==uID, 'itemID']
    movies_to_predict = np.setdiff1d(unique_ids,iids)
    my_recs = []
    for iid in movies_to_predict:
        my_recs.append((iid, svdpp_data.predict(uid=uID,iid=iid).est))    
    df1=pd.DataFrame(my_recs, columns=['iid', 'predictions']).sort_values('predictions', ascending=False).head(num_of_reco)
    titles = df1['iid'].tolist()
    return titles

In [7]:
result1 = get_recommendations_for_content_based('The Dark Knight Rises', cosine_similarity, indices, required_metadata, 5)
result2 = get_recommendations_from_popularity('The Dark Knight Rises', popularity_data, 5)
result3 = get_recommendations_from_svdpp(652, dataset, svdpp_data, 10)

In [8]:
result1

['The Dark Knight', 'Batman Begins', 'Shiner', 'Amongst Friends', 'Mitchell']

In [9]:
result2

['The Dark Knight', 'Inception', 'Interstellar', 'Memento', 'The Prestige']

In [10]:
result3

['The Sixth Sense',
 'Beetlejuice',
 'While You Were Sleeping',
 'The Green Mile',
 'Galaxy Quest',
 "Don't Worry, I'm Fine",
 'Dead Man',
 'Bonnie and Clyde',
 'Madagascar',
 'Frankenstein']

In [11]:
def hybrid_model(uID, dataset, svdpp_data, cosine_sim, indices, metadata, num_of_reco2, popularity_data):
    movies = get_recommendations_from_svdpp(uID, dataset, svdpp_data, 5)
    print(movies)
    related_movies=[]
    for i in movies:
        result = get_recommendations_for_content_based(i, cosine_sim, indices, metadata, num_of_reco2)
        for j in result:
            related_movies.append(j)
    df = pd.DataFrame(data=None, columns=popularity_data.columns)
    for j in related_movies:
        df1 = popularity_data[popularity_data["title"] == j]
        df = df.append(df1)
    df = df[['title' , 'score']].drop_duplicates()
    df = df.sort_values(by='score', ascending=False)
    titles = df['title'].tolist()
    top_10 = titles[:10]
    print(top_10)
    result = movies+titles[:10]
    return result

In [13]:
results = hybrid_model(652, dataset, svdpp_data, cosine_similarity, indices, required_metadata, 50, popularity_data)
results

['The Sixth Sense', 'Beetlejuice', 'While You Were Sleeping', 'The Green Mile', 'Galaxy Quest']
['The Shawshank Redemption', 'Forrest Gump', 'Toy Story', 'Catch Me If You Can', 'To Kill a Mockingbird', 'Edward Scissorhands', 'Mystic River', 'Toy Story 2', 'American Gangster', 'Dancer in the Dark']


['The Sixth Sense',
 'Beetlejuice',
 'While You Were Sleeping',
 'The Green Mile',
 'Galaxy Quest',
 'The Shawshank Redemption',
 'Forrest Gump',
 'Toy Story',
 'Catch Me If You Can',
 'To Kill a Mockingbird',
 'Edward Scissorhands',
 'Mystic River',
 'Toy Story 2',
 'American Gangster',
 'Dancer in the Dark']