In [1]:
import keras
import warnings
import numpy as np
import pandas as pd
from math import sqrt
from sklearn import metrics
from keras.optimizers import Adam
from sklearn.cluster import KMeans
from keras.models import load_model
from scipy.sparse.linalg import svds
from models import matrix_factorization_utilities
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import Dense, Activation, Flatten ,Embedding, Reshape, concatenate
warnings.filterwarnings('ignore')

In [33]:
 def load_data():
    # Reading the ratings data
    ratings = pd.read_csv('Dataset/ratings.csv')
    #Just taking the required columns
    ratings = ratings[['userId', 'movieId','rating']]
    #reading the movies dataset
    movies = pd.read_csv('Dataset/movies.csv')
    
    
    links = pd.read_csv('Dataset/movies.dat',sep='\t', encoding='latin-1')
    for i in links.columns:
        if i=='id'or i == 'title'or i=='year' or i == 'rtPictureURL':
            continue
        else:
            links.drop(i, inplace=True, axis=1)
        
    links.columns=['movieId','title_old', 'year', 'rtPictureURL']
    links['title']=links.apply(lambda x:'%s (%s)' % (x['title_old'],x['year']),axis=1)
    links.drop('title_old', inplace=True, axis=1)
    links.drop('year', inplace=True, axis=1)
    links=links[['movieId','title','rtPictureURL']]
    movie_list=pd.merge(movies,links, on='movieId')
    movie_list.drop('title_y', inplace=True, axis=1)
    movie_list.columns=['movieId','title','genres','rtPictureURL']
    
    ratings.userId = ratings.userId.astype('category').cat.codes.values
    ratings.movieId = ratings.movieId.astype('category').cat.codes.values
    
    #take 80% as the training set and 20% as the test set
    train, test= train_test_split(ratings,test_size=0.2)
    return train, test, ratings,movie_list

In [34]:
global train
global test
global ratings
global movie_list
train, test , ratings,movie_list =load_data()

In [60]:
def matrix_factorisation_predictions(usr):
    result_list=[]
    
    model_1 = load_model('models/matrix_factorisation_model_with_n_latent_factors.h5', compile = True)
    
    y_true = test.rating
    result_list.append('MAE for testing: {}'.format(round(mean_absolute_error(y_true, model_1.predict([test.userId, test.movieId])),4)))
    rmse = np.sqrt(mean_squared_error(y_true,model_1.predict([test.userId, test.movieId]))) 
    result_list.append("Root Mean Square Error: {} ".format(round(rmse,4)))
    errors=mean_absolute_error(y_true, model_1.predict([test.userId, test.movieId]))
    mape = 100 * (errors / y_true)
    accuracy = 100 - np.mean(mape)
    result_list.append('Accuracy: {} '.format( round(accuracy, 2), '%.'))
    
    mlp_user_embedding_weights = (next(iter(filter(lambda x: x.name == 'User-Embedding', model_1.layers))).get_weights())

    # get the latent embedding for your desired user
    user_latent_matrix = mlp_user_embedding_weights[0]

    desired_user_id = usr
    one_user_vector = user_latent_matrix[desired_user_id,:]
    one_user_vector = np.reshape(one_user_vector, (1,1))

    result_list.append('Performing kmeans to find the nearest users...')
    # get similar users
    kmeans = KMeans(n_clusters=20, random_state=0, verbose=0).fit(user_latent_matrix)
    desired_user_label = kmeans.predict(one_user_vector)
    user_label = kmeans.labels_
    neighbors = []
    for user_id, user_label in enumerate(user_label):
        if user_label == desired_user_label:
            neighbors.append(user_id)
    result_list.append('Found {0} neighbor users.'.format(len(neighbors)))
    
    movies = []
    for user_id in neighbors:
        movies += list(ratings[ratings['userId'] == int(user_id)]['movieId'])
    movies = list(set(movies))
    result_list.append('Found {0} neighbor movies from these users.'.format(len(movies)))

    users = np.full(len(movies), desired_user_id, dtype='int32')
    items = np.array(movies, dtype='int32')

    result_list.append('Ranking most likely tracks using the NeuMF model...')
    # and predict movies for my user
    results = model_1.predict([users,items],batch_size=10, verbose=0) 
    results = results.tolist()
    result_list.append('Ranked the movies!')

    results = pd.DataFrame(results, columns=['pre_rating']).astype("float")
    items = pd.DataFrame(items, columns=['movieId'])
    results = pd.concat([items, results], ignore_index=True, sort=False, axis=1)
    results.columns =['movieId', 'pre_rating'] 
    results_df = pd.DataFrame(np.nan, index=range(len(results)), columns=['pre_rating','movieId'])
    for index, row in results.iterrows():
        results_df.loc[index] = [row['pre_rating'], ratings[ratings['movieId'] == row['movieId']].iloc[0]['movieId']]

    results_df= results_df.sort_values(by=['pre_rating'], ascending=False)
    results_df["movieId"]=results_df["movieId"].astype(int)
    results_df=pd.merge(results_df,movie_list,on="movieId")[:10]
    
    return  results_df,result_list

In [61]:
 mf_pred,result=matrix_factorisation_predictions(38)

In [62]:
result

['MAE for testing: 0.643',
 'Root Mean Square Error: 0.8593 ',
 'Accuracy: 78.19 ',
 'Performing kmeans to find the nearest users...',
 'Found 551 neighbor users.',
 'Found 7429 neighbor movies from these users.',
 'Ranking most likely tracks using the NeuMF model...',
 'Ranked the movies!']

In [63]:
mf_pred

Unnamed: 0,pre_rating,movieId,title,genres,rtPictureURL
0,5.202881,5523,"Adventures of Pluto Nash, The (2002)",Action|Adventure|Comedy|Sci-Fi,http://content8.flixster.com/movie/26/57/26571...
1,5.176004,7976,Ken Park (2002),Drama,http://content8.flixster.com/movie/10/85/14/10...
2,5.107168,647,Courage Under Fire (1996),Action|Crime|Drama|War,http://content8.flixster.com/movie/10/91/78/10...
3,5.103458,5775,"Night the Lights Went Out in Georgia, The (1981)",Drama,http://content7.flixster.com/movie/10/93/08/10...
4,5.097239,6541,"League of Extraordinary Gentlemen, The (a.k.a....",Action|Fantasy|Sci-Fi,http://content6.flixster.com/movie/10/89/32/10...
5,5.080536,3889,Highlander: Endgame (Highlander IV) (2000),Action|Adventure|Fantasy,http://content9.flixster.com/movie/10/94/47/10...
6,5.066854,4676,Troop Beverly Hills (1989),Comedy,http://content7.flixster.com/movie/25/48/25481...
7,5.048721,3108,"Fisher King, The (1991)",Comedy|Drama|Fantasy|Romance,http://content6.flixster.com/movie/26/82/26820...
8,5.013875,130,Angela (1995),Drama,http://content6.flixster.com/movie/10/92/31/10...
9,4.978176,3951,Two Family House (2000),Drama,http://content6.flixster.com/movie/10/92/85/10...


In [64]:
def neural_network_predictions(usr):
    result_list=[]
    model_2 = load_model('models/neural_network_model.h5', compile = True)
    
    y_hat = model_2.predict([test.userId, test.movieId])
    y_true = test.rating
    result_list.append("(MAE)Mean Absolute Error: {}".format(round(mean_absolute_error(y_true, y_hat),4)))
    result_list.append("(RMSE)Root Mean Square Error: {}".format(round(np.sqrt(mean_squared_error(y_true,y_hat)) ,4)))
    errors=mean_absolute_error(y_true,y_hat)
    mape = 100 * (errors / y_true)
    accuracy = 100 - np.mean(mape)
    result_list.append(' Accuracy: {}'.format(round(accuracy, 2), '%.'))
        
    mlp_user_embedding_weights = (next(iter(filter(lambda x: x.name == 'User-Embedding', model_2.layers))).get_weights())
    # get the latent embedding for your desired user
    user_latent_matrix = mlp_user_embedding_weights[0]
    desired_user_id = usr
    one_user_vector = user_latent_matrix[desired_user_id,:]
    one_user_vector = np.reshape(one_user_vector, (1,50))
    result_list.append('Performing kmeans to find the nearest users...')
    # get similar users
    kmeans = KMeans(n_clusters=20, random_state=0, verbose=0).fit(user_latent_matrix)
    desired_user_label = kmeans.predict(one_user_vector)
    user_label = kmeans.labels_
    neighbors = []
    for user_id, user_label in enumerate(user_label):
        if user_label == desired_user_label:
            neighbors.append(user_id)
    result_list.append('Found {0} neighbor users.'.format(len(neighbors)))

    movies = []
    for user_id in neighbors:
        movies += list(ratings[ratings['userId'] == int(user_id)]['movieId'])
    movies = list(set(movies))
    result_list.append('Found {0} neighbor movies from these users.'.format(len(movies)))
    
    users = np.full(len(movies), desired_user_id, dtype='int32')
    items = np.array(movies, dtype='int32')
    result_list.append('Ranking most likely tracks using the NeuMF model...')
    # and predict movies for my user
    results = model_2.predict([users,items],batch_size=10, verbose=0) 
    results = results.tolist()
    result_list.append('Ranked the movies!')
    
    results = pd.DataFrame(results, columns=['pre_rating']).astype("float")
    items = pd.DataFrame(items, columns=['movieId'])
    results = pd.concat([items, results], ignore_index=True, sort=False, axis=1)
    results.columns =['movieId', 'pre_rating'] 
    results_df = pd.DataFrame(np.nan, index=range(len(results)), columns=['pre_rating','movieId'])
    for index, row in results.iterrows():
        results_df.loc[index] = [row['pre_rating'], ratings[ratings['movieId'] == row['movieId']].iloc[0]['movieId']]
    
    results_df= results_df.sort_values(by=['pre_rating'], ascending=False)
    results_df["movieId"]=results_df["movieId"].astype(int)
    results_df=pd.merge(results_df,movie_list,on="movieId")[:10]
    
    return results_df , result_list

In [65]:
nn_pred, result=neural_network_predictions(38)

In [66]:
result

['(MAE)Mean Absolute Error: 0.6197',
 '(RMSE)Root Mean Square Error: 0.8411',
 ' Accuracy: 78.98',
 'Performing kmeans to find the nearest users...',
 'Found 652 neighbor users.',
 'Found 7397 neighbor movies from these users.',
 'Ranking most likely tracks using the NeuMF model...',
 'Ranked the movies!']

In [67]:
nn_pred

Unnamed: 0,pre_rating,movieId,title,genres,rtPictureURL
0,4.670856,4472,Bad Dreams (1988),Horror|Thriller,http://content6.flixster.com/movie/32/99/96/32...
1,4.629296,2471,Crocodile Dundee II (1988),Action|Adventure|Comedy,http://content8.flixster.com/movie/25/68/25681...
2,4.617923,312,Stuart Saves His Family (1995),Comedy,http://content9.flixster.com/movie/10/84/58/10...
3,4.602695,824,Kaspar Hauser (1993),Drama|Mystery,http://images.rottentomatoescdn.com/images/def...
4,4.586013,6977,New Jack City (1991),Action|Crime|Drama,http://content7.flixster.com/movie/11/14/58/11...
5,4.531926,8272,Cromwell (1970),Drama,http://content8.flixster.com/movie/25/17/25179...
6,4.501283,3526,Parenthood (1989),Comedy|Drama,http://content6.flixster.com/movie/65/01/65011...
7,4.497003,517,Rising Sun (1993),Action|Drama|Mystery,http://content6.flixster.com/movie/11/12/51/11...
8,4.488319,49,When Night Is Falling (1995),Drama|Romance,http://content6.flixster.com/movie/10/89/77/10...
9,4.468804,4545,Short Circuit (1986),Comedy|Sci-Fi,http://content6.flixster.com/movie/26/51/26510...


In [68]:
def neural_collaborative_filtering_predictions(usr):
    result_list=[]
    model_3 = load_model('models/neural_collaborative_filtering.h5', compile = True)
    y_true = test.rating
    result_list.append('MAE for testing: {}'.format(round(mean_absolute_error(y_true, model_3.predict([test.userId, test.movieId])),4)))
    rmse = np.sqrt(mean_squared_error(y_true,model_3.predict([test.userId, test.movieId]))) 
    result_list.append("Root Mean Square Error: {} ".format(round(rmse,4)))
    errors=mean_absolute_error(y_true, model_3.predict([test.userId, test.movieId]))
    mape = 100 * (errors / y_true)
    accuracy = 100 - np.mean(mape)
    result_list.append('Accuracy: {} '.format( round(accuracy, 2), '%.'))
        
    mlp_user_embedding_weights = (next(iter(filter(lambda x: x.name == 'User-Embedding-MLP', model_3.layers))).get_weights())
    # get the latent embedding for your desired user
    user_latent_matrix = mlp_user_embedding_weights[0]
    desired_user_id = usr
    one_user_vector = user_latent_matrix[desired_user_id,:]
    one_user_vector = np.reshape(one_user_vector, (1,50))
    result_list.append('\nPerforming kmeans to find the nearest users...')
    result_list.append('For user id: {0} '.format(desired_user_id))
    # get similar users
    kmeans = KMeans(n_clusters=20, random_state=0, verbose=0).fit(user_latent_matrix)
    desired_user_label = kmeans.predict(one_user_vector)
    user_label = kmeans.labels_
    neighbors = []
    for user_id, user_label in enumerate(user_label):
        if user_label == desired_user_label:
            neighbors.append(user_id)
    result_list.append('Found {0} neighbor users.'.format(len(neighbors)))
    
    movies = []
    for user_id in neighbors:
        movies += list(ratings[ratings['userId'] == int(user_id)]['movieId'])
    movies = list(set(movies))
    result_list.append('Found {0} neighbor movies from these users.'.format(len(movies)))

    users = np.full(len(movies), desired_user_id, dtype='int32')
    items = np.array(movies, dtype='int32')

    result_list.append('Ranking most likely tracks using the NeuMF model...')
    # and predict movies for my user
    results = model_3.predict([users,items],batch_size=10, verbose=0) 
    results = results.tolist()
    result_list.append('Ranked the movies!')

    results = pd.DataFrame(results, columns=['pre_rating']).astype("float")
    items = pd.DataFrame(items, columns=['movieId'])
    results = pd.concat([items, results], ignore_index=True, sort=False, axis=1)
    results.columns =['movieId', 'pre_rating'] 
    results_df = pd.DataFrame(np.nan, index=range(len(results)), columns=['pre_rating','movieId'])
    for index, row in results.iterrows():
        results_df.loc[index] = [row['pre_rating'], ratings[ratings['movieId'] == row['movieId']].iloc[0]['movieId']]

    results_df= results_df.sort_values(by=['pre_rating'], ascending=False)
    results_df["movieId"]=results_df["movieId"].astype(int)
    results_df=pd.merge(results_df,movie_list,on="movieId")[:10]
    return results_df , result_list


In [69]:
ncf_pred ,result= neural_collaborative_filtering_predictions(38)

In [70]:
result

['MAE for testing: 0.609',
 'Root Mean Square Error: 0.8527 ',
 'Accuracy: 79.34 ',
 '\nPerforming kmeans to find the nearest users...',
 'For user id: 38 ',
 'Found 398 neighbor users.',
 'Found 6193 neighbor movies from these users.',
 'Ranking most likely tracks using the NeuMF model...',
 'Ranked the movies!']

In [71]:
ncf_pred

Unnamed: 0,pre_rating,movieId,title,genres,rtPictureURL
0,4.84434,312,Stuart Saves His Family (1995),Comedy,http://content9.flixster.com/movie/10/84/58/10...
1,4.821636,824,Kaspar Hauser (1993),Drama|Mystery,http://images.rottentomatoescdn.com/images/def...
2,4.7322,517,Rising Sun (1993),Action|Drama|Mystery,http://content6.flixster.com/movie/11/12/51/11...
3,4.666759,616,"Aristocats, The (1970)",Animation|Children,http://content7.flixster.com/movie/25/07/25079...
4,4.662996,2178,Frenzy (1972),Thriller,http://content6.flixster.com/movie/27/01/27010...
5,4.655131,647,Courage Under Fire (1996),Action|Crime|Drama|War,http://content8.flixster.com/movie/10/91/78/10...
6,4.634731,1615,"Edge, The (1997)",Adventure|Drama,http://content8.flixster.com/movie/26/94/26941...
7,4.625141,722,"Haunted World of Edward D. Wood Jr., The (1996)",Documentary,http://content7.flixster.com/movie/10/88/97/10...
8,4.623024,2921,High Plains Drifter (1973),Western,http://content9.flixster.com/movie/26/80/26802...
9,4.620769,2775,Head On (1998),Drama,http://content8.flixster.com/movie/10/88/57/10...
