<a href="https://colab.research.google.com/github/JNishimura/Deep-Learning-Recommenders/blob/main/NeuMF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets
!pip install -q scann
!pip install ipywidgets
import os
import pprint
import tempfile
import scipy as sp

from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

import math
import heapq
import matplotlib.pyplot as plt
def load_rating_file_as_list(filename):
    rating_list = []
    
    with open(filename, "r") as f:
        line = f.readline()
        
        while line and line != "":
            arr = line.split("\t")
            user, item = int(arr[0]), int(arr[1])
            rating_list.append([user, item])
            line = f.readline()
    
    return rating_list

def load_negative_file(filename):
    negative_list = []
    
    with open(filename, "r") as f:
        line = f.readline()
        
        while line and line != "":
            arr = line.split("\t")
            negatives = []
            
            for x in arr[1:]:
                negatives.append(int(x))
            
            negative_list.append(negatives)
            
            line = f.readline()
    
    return negative_list

def load_rating_file_as_matrix(filename):
    num_users, num_items = 0, 0
    with open(filename, "r") as f:
        line = f.readline()
        while line != None and line != "":
            arr = line.split("\t")
            u, i = int(arr[0]), int(arr[1])
            num_users = max(num_users, u)
            num_items = max(num_items, i)
            line = f.readline()
    
    mat = sp.sparse.dok_matrix((num_users+1, num_items+1), dtype=np.float32)
    with open(filename, "r") as f:
        line = f.readline()
        while line != None and line != "":
            arr = line.split("\t")
            user, item, rating = int(arr[0]), int(arr[1]), float(arr[2])
            if (rating > 0):
                mat[user, item] = 1.0
            line = f.readline()    
    return mat

train = load_rating_file_as_matrix('ml-1m.train.rating')
test_ratings = load_rating_file_as_list('ml-1m.test.rating')
test_negatives = load_negative_file('ml-1m.test.negative')

num_users, num_items = train.shape
print('Loaded Data. # Users:', num_users, '# Items:', num_items, '# Train:', train.nnz, '# Test:', len(test_ratings))


Loaded Data. # Users: 6040 # Items: 3706 # Train: 994169 # Test: 6040


In [None]:
def get_train_instances(train, num_negatives):
    user_input, item_input, labels = [],[],[]
    num_users = train.shape[0]
    for (u, i) in train.keys():
        # positive instance
        user_input.append(u)
        item_input.append(i)
        labels.append(1)
        # negative instances
        for t in range(num_negatives):
            j = np.random.randint(num_items)
            while train.get((u, j)):
                j = np.random.randint(num_items)
            user_input.append(u)
            item_input.append(j)
            labels.append(0)
    return user_input, item_input, labels

def evaluate(model, testPosRatings, testNegRatings, N):
    hits = []
    ndcgs = []
    for i in range(len(testPosRatings)):
        hit, ncdg = evaluate_one(model, testPosRatings[i], testNegRatings[i], N)
        hits.append(hit)
        ndcgs.append(ncdg)
        
    return np.array(hits).mean(), np.array(ndcgs).mean()

def evaluate_one(model, posRating, negRatings, N):
    user = posRating[0]
    movie = posRating[1]
    negRatings.append(movie)

    user_input = np.full(len(negRatings), user)

    predictions = model.predict([user_input, np.array(negRatings)], batch_size = 100)

  # associate item with predictions
    items = {}
    for i in range(len(predictions)):
        items[negRatings[i]] = predictions[i]
    negRatings.pop()

    rankedList = heapq.nlargest(N, items, items.get)
    ndcg = evaluateNDCG(rankedList, movie)
    hit = hitRate(rankedList, movie)

    return hit, ndcg

def evaluateNDCG(ranked_list, target_item):
    for i in range(len(ranked_list)):
        if ranked_list[i] == target_item:
            return math.log(2) / math.log(i + 2)
  
    return 0

def hitRate(ranked_list, target_item):
    for rank in ranked_list:
        if target_item == rank:
            return 1
    return 0

In [None]:
from keras.models import Model
from keras.layers import Embedding, Input, Dense, Reshape, Multiply, Flatten, Lambda, Concatenate, Layer, Dropout, Average
from keras import initializers, regularizers
import sys
from keras.optimizers import Adam
from keras.regularizers import *

In [None]:
def get_MLPGMFmodel(num_users, num_items, latent_dim):
  user_input = Input(shape = (1,), dtype = 'int32', name = 'user')
  movie_input = Input(shape = (1,), dtype = 'int32', name = 'item')
  user_embedding = Embedding(input_dim = num_users, output_dim = latent_dim, name = 'user_embed', embeddings_initializer = initializers.RandomNormal(stddev = 0.01), embeddings_regularizer = regularizers.l2(0), input_length = 1)
  movie_embedding = Embedding(input_dim = num_items, output_dim = latent_dim, name = 'item_embed',embeddings_initializer = initializers.RandomNormal(stddev = 0.01), embeddings_regularizer = regularizers.l2(0), input_length = 1)
  user_latent = Flatten()(user_embedding(user_input))
  item_latent = Flatten()(movie_embedding(movie_input))
  prediction_vec = Multiply()([user_latent, item_latent])
  prediction_GMF = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name = 'prediction')(prediction_vec)
  inputs = Concatenate()([user_latent, item_latent])
  layer = Dense(64,activation='relu', name='Layer1', kernel_initializer='glorot_uniform', kernel_regularizer=l2())(inputs)
  layer = Dense(32, activation='relu', name='Layer3', kernel_regularizer=l2())(layer)
  layer = Dense(8, activation='relu', kernel_regularizer=l2())(layer)
  prediction_MLP = Dense(1, activation='sigmoid', name='Layer4')(layer)
  predictions = Combine()([prediction_GMF, prediction_MLP])
  return Model(inputs=[user_input, movie_input], outputs=predictions)

class Combine(Layer):
  def __init__(self):
        super(Combine, self).__init__()
        random_alpha = tf.random.uniform(shape=[1])
        self.alpha = tf.Variable(initial_value=random_alpha, trainable=True)
  
  def call(self, inputs):
    return (1 - self.alpha)*inputs[0] + self.alpha*inputs[1]

modelMLPGMF =  get_MLPGMFmodel(num_users, num_items, 4)
modelMLPGMF.compile(optimizer=Adam(0.01), loss='binary_crossentropy', metrics=[tf.keras.metrics.BinaryAccuracy()])

In [None]:
NUM_EPOCHS = 10
best_hr = 0
best_ncdg = 0
best_epoch = -1


hit_rate, ncdg = evaluate(modelMLPGMF, test_ratings, test_negatives, N = 10)
print('Initial Model', 'Hit Rate:', hit_rate, 'NCDG:', ncdg)

for epoch in range(1, NUM_EPOCHS + 1):
    user_input, item_input, labels = get_train_instances(train, num_negatives = 4)

    hist = modelMLPGMF.fit([np.array(user_input), np.array(item_input)],
                      np.array(labels),
                      batch_size = 256, epochs = 1)

    hit_rate, ncdg = evaluate(modelMLPGMF, test_ratings, test_negatives, N = 10)
    print('Epoch', epoch, 'Hit Rate:', hit_rate, 'NCDG:', ncdg)

    if hit_rate > best_hr:
        best_hr, best_ncdg, best_iter = hit_rate, ncdg, epoch

print("Best Iteration %d:  HR = %.4f, NDCG = %.4f. " %(best_iter, best_hr, best_ncdg))

Initial Model Hit Rate: 0.09387417218543047 NCDG: 0.04282874498204579
Epoch 1 Hit Rate: 0.49817880794701985 NCDG: 0.2772806144187781
Epoch 2 Hit Rate: 0.519205298013245 NCDG: 0.28973695751265494
Epoch 3 Hit Rate: 0.5197019867549669 NCDG: 0.2898943889148783
Epoch 4 Hit Rate: 0.5397350993377483 NCDG: 0.30072658848914346
Epoch 5 Hit Rate: 0.5458609271523179 NCDG: 0.30607305635509846
Epoch 6 Hit Rate: 0.541887417218543 NCDG: 0.3044568061611843
Epoch 7 Hit Rate: 0.5509933774834437 NCDG: 0.3080007059181892
Epoch 8 Hit Rate: 0.5533112582781456 NCDG: 0.31164745819869927
Epoch 9 Hit Rate: 0.5498344370860927 NCDG: 0.30848053304988116
Epoch 10 Hit Rate: 0.5528145695364238 NCDG: 0.3089514144966369
Best Iteration 8:  HR = 0.5533, NDCG = 0.3116. 


In [None]:
Initial Model Hit Rate: 0.09884105960264901 NCDG: 0.044829000697016945
19418/19418 [==============================] - 55s 3ms/step - loss: 0.3574 - binary_accuracy: 0.8426
Epoch 1 Hit Rate: 0.5894039735099338 NCDG: 0.32918984589722095
19418/19418 [==============================] - 57s 3ms/step - loss: 0.2873 - binary_accuracy: 0.8737
Epoch 2 Hit Rate: 0.6132450331125828 NCDG: 0.3495621870250538
19418/19418 [==============================] - 63s 3ms/step - loss: 0.2819 - binary_accuracy: 0.8762
Epoch 3 Hit Rate: 0.6195364238410596 NCDG: 0.3555492930337562
19418/19418 [==============================] - 55s 3ms/step - loss: 0.2801 - binary_accuracy: 0.8770
Epoch 4 Hit Rate: 0.616225165562914 NCDG: 0.3547719312439317
19418/19418 [==============================] - 55s 3ms/step - loss: 0.2797 - binary_accuracy: 0.8771
Epoch 5 Hit Rate: 0.6188741721854305 NCDG: 0.3533865042684087
19418/19418 [==============================] - 55s 3ms/step - loss: 0.2790 - binary_accuracy: 0.8774
Epoch 6 Hit Rate: 0.6205298013245033 NCDG: 0.3552725920313917
19418/19418 [==============================] - 55s 3ms/step - loss: 0.2793 - binary_accuracy: 0.8774
Epoch 7 Hit Rate: 0.6152317880794702 NCDG: 0.35143490842186614
19418/19418 [==============================] - 55s 3ms/step - loss: 0.2793 - binary_accuracy: 0.8776
Epoch 8 Hit Rate: 0.6274834437086093 NCDG: 0.35935637150230465
19418/19418 [==============================] - 54s 3ms/step - loss: 0.2795 - binary_accuracy: 0.8776
Epoch 9 Hit Rate: 0.623841059602649 NCDG: 0.36075202520517824
19418/19418 [==============================] - 56s 3ms/step - loss: 0.2799 - binary_accuracy: 0.8774
Epoch 10 Hit Rate: 0.6241721854304636 NCDG: 0.35797406302255125
Best Iteration 8:  HR = 0.6275, NDCG = 0.3594. 
Initial Model Hit Rate: 0.6241721854304636 NCDG: 0.35797406302255125
19418/19418 [==============================] - 57s 3ms/step - loss: 0.2803 - binary_accuracy: 0.8774
Epoch 1 Hit Rate: 0.6024834437086093 NCDG: 0.34306248220169855
19418/19418 [==============================] - 57s 3ms/step - loss: 0.2809 - binary_accuracy: 0.8772
Epoch 2 Hit Rate: 0.6124172185430463 NCDG: 0.3495815820908394
19418/19418 [==============================] - 58s 3ms/step - loss: 0.2814 - binary_accuracy: 0.8772
Epoch 3 Hit Rate: 0.6339403973509934 NCDG: 0.36179081879004027
19418/19418 [==============================] - 58s 3ms/step - loss: 0.2818 - binary_accuracy: 0.8771
Epoch 4 Hit Rate: 0.6192052980132451 NCDG: 0.35434720631476707
19418/19418 [==============================] - 54s 3ms/step - loss: 0.2821 - binary_accuracy: 0.8771
Epoch 5 Hit Rate: 0.6271523178807947 NCDG: 0.35813844001850637
19418/19418 [==============================] - 55s 3ms/step - loss: 0.2827 - binary_accuracy: 0.8769
Epoch 6 Hit Rate: 0.627317880794702 NCDG: 0.35848418361642953
19418/19418 [==============================] - 57s 3ms/step - loss: 0.2830 - binary_accuracy: 0.8768
Epoch 7 Hit Rate: 0.625 NCDG: 0.3578477042948891
19418/19418 [==============================] - 58s 3ms/step - loss: 0.2835 - binary_accuracy: 0.8767
Epoch 8 Hit Rate: 0.6279801324503311 NCDG: 0.3582184543427913
19418/19418 [==============================] - 55s 3ms/step - loss: 0.2837 - binary_accuracy: 0.8765
Epoch 9 Hit Rate: 0.6302980132450331 NCDG: 0.36178773947134196
19418/19418 [==============================] - 57s 3ms/step - loss: 0.2841 - binary_accuracy: 0.8767
Epoch 10 Hit Rate: 0.6278145695364239 NCDG: 0.35903504455957974
Best Iteration 3:  HR = 0.6339, NDCG = 0.3618. 

16
Initial Model Hit Rate: 0.08973509933774834 NCDG: 0.041006180572674446
19418/19418 [==============================] - 75s 4ms/step - loss: 0.3808 - binary_accuracy: 0.8348
Epoch 1 Hit Rate: 0.5412251655629139 NCDG: 0.3000172075421776
19418/19418 [==============================] - 74s 4ms/step - loss: 0.2967 - binary_accuracy: 0.8692
Epoch 2 Hit Rate: 0.5940397350993377 NCDG: 0.33441136397987165
19418/19418 [==============================] - 69s 4ms/step - loss: 0.2819 - binary_accuracy: 0.8767
Epoch 3 Hit Rate: 0.616887417218543 NCDG: 0.3517137410070091
19418/19418 [==============================] - 70s 4ms/step - loss: 0.2765 - binary_accuracy: 0.8792
Epoch 4 Hit Rate: 0.6210264900662251 NCDG: 0.35694308135189423
19418/19418 [==============================] - 70s 4ms/step - loss: 0.2729 - binary_accuracy: 0.8812
Epoch 5 Hit Rate: 0.6334437086092716 NCDG: 0.3614142027065812
19418/19418 [==============================] - 73s 4ms/step - loss: 0.2691 - binary_accuracy: 0.8831
Epoch 6 Hit Rate: 0.652317880794702 NCDG: 0.3759829957264442
19418/19418 [==============================] - 71s 4ms/step - loss: 0.2656 - binary_accuracy: 0.8851
Epoch 7 Hit Rate: 0.6460264900662251 NCDG: 0.375211989055551
19418/19418 [==============================] - 70s 4ms/step - loss: 0.2640 - binary_accuracy: 0.8859
Epoch 8 Hit Rate: 0.6296357615894039 NCDG: 0.3619163686329125
19418/19418 [==============================] - 71s 4ms/step - loss: 0.2627 - binary_accuracy: 0.8867
Epoch 9 Hit Rate: 0.6639072847682119 NCDG: 0.3836137413879617
19418/19418 [==============================] - 68s 3ms/step - loss: 0.2615 - binary_accuracy: 0.8874
Epoch 10 Hit Rate: 0.6738410596026491 NCDG: 0.39059073595343996
Best Iteration 10:  HR = 0.6738, NDCG = 0.3906. 
Initial Model Hit Rate: 0.6738410596026491 NCDG: 0.39059073595343996
19418/19418 [==============================] - 69s 4ms/step - loss: 0.2608 - binary_accuracy: 0.8880
Epoch 1 Hit Rate: 0.6602649006622516 NCDG: 0.38586664445681246
19418/19418 [==============================] - 68s 4ms/step - loss: 0.2608 - binary_accuracy: 0.8881
Epoch 2 Hit Rate: 0.66158940397351 NCDG: 0.3852333595327239
19418/19418 [==============================] - 69s 4ms/step - loss: 0.2611 - binary_accuracy: 0.8879
Epoch 3 Hit Rate: 0.6690397350993378 NCDG: 0.39179113137438737
19418/19418 [==============================] - 68s 3ms/step - loss: 0.2616 - binary_accuracy: 0.8879

32
Initial Model Hit Rate: 0.09983443708609271 NCDG: 0.046672917419764544
19418/19418 [==============================] - 94s 5ms/step - loss: 0.3748 - binary_accuracy: 0.8382
Epoch 1 Hit Rate: 0.5682119205298013 NCDG: 0.3163363479507807
19418/19418 [==============================] - 91s 5ms/step - loss: 0.2856 - binary_accuracy: 0.8752
Epoch 2 Hit Rate: 0.6205298013245033 NCDG: 0.355729848730191
19418/19418 [==============================] - 94s 5ms/step - loss: 0.2725 - binary_accuracy: 0.8817
Epoch 3 Hit Rate: 0.6390728476821192 NCDG: 0.37318865242025384
19418/19418 [==============================] - 90s 5ms/step - loss: 0.2654 - binary_accuracy: 0.8855
Epoch 4 Hit Rate: 0.6566225165562913 NCDG: 0.382269699624981
19418/19418 [==============================] - 90s 5ms/step - loss: 0.2605 - binary_accuracy: 0.8880
Epoch 5 Hit Rate: 0.6577814569536424 NCDG: 0.3810512532217693
19418/19418 [==============================] - 89s 5ms/step - loss: 0.2568 - binary_accuracy: 0.8901
Epoch 6 Hit Rate: 0.6561258278145695 NCDG: 0.3820141123293288
19418/19418 [==============================] - 89s 5ms/step - loss: 0.2536 - binary_accuracy: 0.8917
Epoch 7 Hit Rate: 0.6677152317880795 NCDG: 0.3916654060012053
19418/19418 [==============================] - 91s 5ms/step - loss: 0.2518 - binary_accuracy: 0.8929
Epoch 8 Hit Rate: 0.6589403973509934 NCDG: 0.3878571543814116
19418/19418 [==============================] - 90s 5ms/step - loss: 0.2509 - binary_accuracy: 0.8935
Epoch 9 Hit Rate: 0.6690397350993378 NCDG: 0.3951155773396269
19418/19418 [==============================] - 90s 5ms/step - loss: 0.2502 - binary_accuracy: 0.8941
Epoch 10 Hit Rate: 0.6711920529801324 NCDG: 0.3963004695911284
Best Iteration 10:  HR = 0.6712, NDCG = 0.3963. 

4
Initial Model Hit Rate: 0.09387417218543047 NCDG: 0.04282874498204579
19418/19418 [==============================] - 51s 3ms/step - loss: 0.3726 - binary_accuracy: 0.8348
Epoch 1 Hit Rate: 0.49817880794701985 NCDG: 0.2772806144187781
19418/19418 [==============================] - 50s 3ms/step - loss: 0.3233 - binary_accuracy: 0.8528
Epoch 2 Hit Rate: 0.519205298013245 NCDG: 0.28973695751265494
19418/19418 [==============================] - 51s 3ms/step - loss: 0.3203 - binary_accuracy: 0.8538
Epoch 3 Hit Rate: 0.5197019867549669 NCDG: 0.2898943889148783
19418/19418 [==============================] - 50s 3ms/step - loss: 0.3131 - binary_accuracy: 0.8577
Epoch 4 Hit Rate: 0.5397350993377483 NCDG: 0.30072658848914346
19418/19418 [==============================] - 49s 3ms/step - loss: 0.3097 - binary_accuracy: 0.8596
Epoch 5 Hit Rate: 0.5458609271523179 NCDG: 0.30607305635509846
19418/19418 [==============================] - 50s 3ms/step - loss: 0.3088 - binary_accuracy: 0.8601
Epoch 6 Hit Rate: 0.541887417218543 NCDG: 0.3044568061611843
19418/19418 [==============================] - 50s 3ms/step - loss: 0.3085 - binary_accuracy: 0.8603
Epoch 7 Hit Rate: 0.5509933774834437 NCDG: 0.3080007059181892
19418/19418 [==============================] - 49s 3ms/step - loss: 0.3082 - binary_accuracy: 0.8607
Epoch 8 Hit Rate: 0.5533112582781456 NCDG: 0.31164745819869927
19418/19418 [==============================] - 50s 3ms/step - loss: 0.3085 - binary_accuracy: 0.8607
Epoch 9 Hit Rate: 0.5498344370860927 NCDG: 0.30848053304988116
19418/19418 [==============================] - 52s 3ms/step - loss: 0.3086 - binary_accuracy: 0.8606
Epoch 10 Hit Rate: 0.5528145695364238 NCDG: 0.3089514144966369
Best Iteration 8:  HR = 0.5533, NDCG = 0.3116. 