<a href="https://colab.research.google.com/github/JNishimura/Deep-Learning-Recommenders/blob/main/MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets
!pip install -q scann
!pip install ipywidgets
import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs
import scipy as sp
import math
import heapq
import matplotlib.pyplot as plt

def load_rating_file_as_list(filename):
    rating_list = []
    
    with open(filename, "r") as f:
        line = f.readline()
        
        while line and line != "":
            arr = line.split("\t")
            user, item = int(arr[0]), int(arr[1])
            rating_list.append([user, item])
            line = f.readline()
    
    return rating_list

def load_negative_file(filename):
    negative_list = []
    
    with open(filename, "r") as f:
        line = f.readline()
        
        while line and line != "":
            arr = line.split("\t")
            negatives = []
            
            for x in arr[1:]:
                negatives.append(int(x))
            
            negative_list.append(negatives)
            
            line = f.readline()
    
    return negative_list

def load_rating_file_as_matrix(filename):
    num_users, num_items = 0, 0
    with open(filename, "r") as f:
        line = f.readline()
        while line != None and line != "":
            arr = line.split("\t")
            u, i = int(arr[0]), int(arr[1])
            num_users = max(num_users, u)
            num_items = max(num_items, i)
            line = f.readline()
    
    mat = sp.sparse.dok_matrix((num_users+1, num_items+1), dtype=np.float32)
    with open(filename, "r") as f:
        line = f.readline()
        while line != None and line != "":
            arr = line.split("\t")
            user, item, rating = int(arr[0]), int(arr[1]), float(arr[2])
            if (rating > 0):
                mat[user, item] = 1.0
            line = f.readline()    
    return mat

train = load_rating_file_as_matrix('ml-1m.train.rating')
test_ratings = load_rating_file_as_list('ml-1m.test.rating')
test_negatives = load_negative_file('ml-1m.test.negative')

num_users, num_items = train.shape
print('Loaded Data. # Users:', num_users, '# Items:', num_items, '# Train:', train.nnz, '# Test:', len(test_ratings))

Loaded Data. # Users: 6040 # Items: 3706 # Train: 994169 # Test: 6040


In [None]:
def get_train_instances(train, num_negatives):
    user_input, item_input, labels = [],[],[]
    num_users = train.shape[0]
    for (u, i) in train.keys():
        # positive instance
        user_input.append(u)
        item_input.append(i)
        labels.append(1)
        # negative instances
        for t in range(num_negatives):
            j = np.random.randint(num_items)
            while train.get((u, j)):
                j = np.random.randint(num_items)
            user_input.append(u)
            item_input.append(j)
            labels.append(0)
    return user_input, item_input, labels

def evaluate(model, testPosRatings, testNegRatings, N):
    hits = []
    ndcgs = []
    for i in range(len(testPosRatings)):
        hit, ncdg = evaluate_one(model, testPosRatings[i], testNegRatings[i], N)
        hits.append(hit)
        ndcgs.append(ncdg)
        
    return np.array(hits).mean(), np.array(ndcgs).mean()

def evaluate_one(model, posRating, negRatings, N):
    user = posRating[0]
    movie = posRating[1]
    negRatings.append(movie)

    user_input = np.full(len(negRatings), user)

    predictions = model.predict([user_input, np.array(negRatings)], batch_size = 100)

  # associate item with predictions
    items = {}
    for i in range(len(predictions)):
        items[negRatings[i]] = predictions[i]
    negRatings.pop()

    rankedList = heapq.nlargest(N, items, items.get)
    ndcg = evaluateNDCG(rankedList, movie)
    hit = hitRate(rankedList, movie)

    return hit, ndcg

def evaluateNDCG(ranked_list, target_item):
    for i in range(len(ranked_list)):
        if ranked_list[i] == target_item:
            return math.log(2) / math.log(i + 2)
  
    return 0

def hitRate(ranked_list, target_item):
    for rank in ranked_list:
        if target_item == rank:
            return 1
    return 0

In [None]:
from keras.models import Model
from keras.layers import Embedding, Input, Dense, Reshape, Multiply, Flatten, Lambda, Concatenate, Layer, Dropout, Average
from keras import initializers, regularizers
import sys
from keras.optimizers import Adam
from keras.regularizers import *

In [None]:
def get_MLPmodel(num_users, num_items, latent_dim):
  user_input = Input(shape = (1,), dtype = 'int32', name = 'user')
  item_input = Input(shape = (1,), dtype = 'int32', name = 'item')
  user_embedding = Embedding(input_dim = num_users, output_dim = latent_dim, name = 'user_embed', embeddings_initializer = initializers.RandomNormal(stddev = 0.01), embeddings_regularizer = regularizers.l2(0), input_length = 1)
  item_embedding = Embedding(input_dim = num_items, output_dim = latent_dim, name = 'item_embed',embeddings_initializer = initializers.RandomNormal(stddev = 0.01), embeddings_regularizer = regularizers.l2(0), input_length = 1)
  user_latent = Flatten()(user_embedding(user_input))
  item_latent = Flatten()(item_embedding(item_input))
  inputs = Concatenate()([user_latent, item_latent])
  layer = Dense(64,activation='relu', name='Layer1', kernel_initializer='glorot_uniform', kernel_regularizer=l2())(inputs)
  layer = Dense(32, activation='relu', name='Layer3')(layer)
  layer = Dense(16, activation='relu')(layer)
  output = Dense(1, activation='sigmoid', name='Layer4')(layer)
  return Model(inputs=[user_input, item_input], outputs=output)


modelMLP =  get_MLPmodel(num_users, num_items, 8)
modelMLP.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=[tf.keras.metrics.BinaryAccuracy()])

In [None]:
NUM_EPOCHS = 20
best_hr = 0
best_ncdg = 0
best_epoch = -1
model_path = "OP_model.h5"

hit_rate, ncdg = evaluate(modelMLP, test_ratings, test_negatives, N = 10)
print('Initial Model', 'Hit Rate:', hit_rate, 'NCDG:', ncdg)

for epoch in range(1, NUM_EPOCHS + 1):
    user_input, item_input, labels = get_train_instances(train, num_negatives = 4)

    hist = modelMLP.fit([np.array(user_input), np.array(item_input)],
                      np.array(labels),
                      batch_size = 256, epochs = 1)

    hit_rate, ncdg = evaluate(modelMLP, test_ratings, test_negatives, N = 10)
    print('Epoch', epoch, 'Hit Rate:', hit_rate, 'NCDG:', ncdg)

    if hit_rate > best_hr:
        best_hr, best_ncdg, best_iter = hit_rate, ncdg, epoch

print("Best Iteration %d:  HR = %.4f, NDCG = %.4f. " %(best_iter, best_hr, best_ncdg))


Initial Model Hit Rate: 0.09602649006622517 NCDG: 0.043228602061250045
Epoch 1 Hit Rate: 0.44420529801324504 NCDG: 0.24608411104959674
Epoch 2 Hit Rate: 0.44917218543046356 NCDG: 0.2474341167960106
Epoch 3 Hit Rate: 0.48956953642384105 NCDG: 0.2695412242546553
Epoch 4 Hit Rate: 0.5004966887417218 NCDG: 0.27710885600975393
Epoch 5 Hit Rate: 0.503476821192053 NCDG: 0.2788999692017012
Epoch 6 Hit Rate: 0.5130794701986755 NCDG: 0.2875578325216388
Epoch 7 Hit Rate: 0.5155629139072848 NCDG: 0.2881733422422159
Epoch 8 Hit Rate: 0.5309602649006623 NCDG: 0.29595239331362183
Epoch 9 Hit Rate: 0.5390728476821192 NCDG: 0.299698985889525
Epoch 10 Hit Rate: 0.543046357615894 NCDG: 0.30186444561938136
Epoch 11 Hit Rate: 0.5526490066225166 NCDG: 0.30578529940035637
Epoch 12 Hit Rate: 0.5420529801324503 NCDG: 0.29805844125319353
Epoch 13 Hit Rate: 0.5526490066225166 NCDG: 0.30581229583569747
Epoch 14 Hit Rate: 0.5556291390728477 NCDG: 0.30773876901427794
Epoch 15 Hit Rate: 0.5544701986754967 NCDG: 0.30