In [1]:
# load necessary libraries
import numpy as np
from sklearn.decomposition import NMF
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import pickle

import warnings
warnings.filterwarnings("ignore")

In [2]:
#load data
df_mov = pd.read_csv('../data/movies.csv')
df_rat = pd.read_csv('../data/ratings.csv')
df_mov.drop_duplicates(subset=['title'], keep='first', inplace=True)
df = pd.merge(df_rat, df_mov , on='movieId', how='inner')

m_uniq = pd.DataFrame(df_rat['movieId'].unique())
df_uniq = pd.merge(m_uniq, df_mov, left_on=0, right_on='movieId')

In [3]:
# Reference: Course note 10.2 NMF on Movie Data Challenge
def get_recommender(result_html):
    
    column_names = ['title', 'rating']
    user_input = pd.DataFrame(result_html, columns=column_names)
    with open("nmf_model.bin", 'rb') as file:
        model = pickle.load(file)
    
    Q = model.components_

    
    user_ratings = pd.merge(df_uniq, user_input, how='outer')
    new_user = user_ratings['rating'].fillna(1.0)
    R = np.array(new_user).reshape(1, -1)
    P = model.transform(R)
    Rhat = np.dot(P, Q)
    df_uniq['recom'] = Rhat.T

    rec_movies = df_uniq.sort_values('recom', ascending=False)['title'].head(5)
    recommended_movies = pd.merge(rec_movies, df_mov, on='title', how = 'inner')
    recommended_movies = recommended_movies[['title','genres']]
    return recommended_movies

In [4]:
# Reference course note 10.4 User based collaborative filter & Item recommender challenge
def similar_users_recommendation(result_html):

    # add query of user by entering 'x' movie titles and their rating
    column_names = ['title', 'rating']
    user_input = pd.DataFrame(result_html, columns=column_names)

    user_ratings = pd.merge(df_uniq, user_input, how='outer')
    query = user_ratings['rating']
    query = np.array(query)

    # transform to matrix and add the user, i.e 'n_user' to the bottom, i.e index -1
    R = df.pivot_table(values='rating', index='userId', columns='movieId')
    R.loc['n_user'] = query
    R.fillna(R.mean(), inplace=True)

    # Create cosim matrix, pick an active user and find the top 20 most similar users
    cosim = pd.DataFrame(cosine_similarity(R)[-1]) #pick userID n_user
    sim_users = cosim.sort_values(by=[0], ascending=[False]).head(20)
    sim_users = list(sim_users.index)[1:]

    # calculate calculate the average rating based on the similar users
    users_sim = R.loc[sim_users, :]
    avg_rating = pd.DataFrame(users_sim.mean())

    # Top 5 movies that active user has not seen based on the similar users recommendation
    rec_movies = avg_rating.sort_values(by=[0], ascending=[False]).head(5)
    rec_movies = pd.merge(rec_movies, df_mov, on='movieId', how='inner')
    recommended_movies = rec_movies[['title','genres']]
    return recommended_movies