In [7]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
print(f'TF version: {tf.__version__}')

TF version: 2.2.0


# Read Data

In [9]:
# path = 'C:/Users/robin.opdam/Google Drive/Thesis (Msc)/Thesis_shared_files/'
path = '/Users/Robin/Google Drive/Thesis (Msc)/Thesis_shared_files/'

## Amazon Fashion

In [10]:
data_path = 'data/Amazon/'
# file_name = 'Amazon_full' # file_name = 'Amazon_05_users' 
file_name = 'Amazon_01_users'

## MovieLens

In [11]:
# data_path = 'data/ML/'
# file_name = 'ML_full' # file_name = 'ML_05_users'
# file_name = 'ML_01_users'

In [12]:
df = pd.read_pickle(path + data_path + file_name)
df.user_id = df.user_id.astype('category').cat.codes
df.item_id = df.item_id.astype('category').cat.codes
df.head()

Unnamed: 0,user,item,datetime,rating,item_id,user_id
4983863,A39ZLL8ILVT2J8,B00FXSELCM,2014-03-24,3.0,104506,73226
7294092,A39ZLL8ILVT2J8,B00VDPQ884,2016-06-29,5.0,175639,73226
4809981,A39ZLL8ILVT2J8,B00EWC0W3W,2016-08-14,5.0,99224,73226
9337932,A39ZLL8ILVT2J8,B01EZKMD64,2016-10-03,5.0,238824,73226
8832820,A39ZLL8ILVT2J8,B01ABS4646,2016-12-22,5.0,222085,73226


---
# Data Prep

## Dataset Params

In [13]:
val_perc = test_perc = 0.1
n_last_items_val = n_last_items_test = 1

total_items = len(df.item_id.unique())
total_users = len(df.user_id.unique())

## Create Split

In [14]:
from Data_prep import train_val_test_split
datasets = train_val_test_split(df, val_perc, test_perc, n_last_items_val, n_last_items_test)
train_set, val_set, test_set = datasets

---
# Neural Collaborative Filtering (NCF)
Using the NCF we build Generalized Matrix Factorisation (GMF), Multiplayer Perceptron Matrix Factorisation (MLP) and combine the two in Neural Matrix Factorisation (NeuMF)
- paper: http://papers.www2017.com.au.s3-website-ap-southeast-2.amazonaws.com/proceedings/p173.pdf
- blog: https://medium.com/@victorkohler/collaborative-filtering-using-deep-neural-networks-in-tensorflow-96e5d41a39a1
- code: https://github.com/Leavingseason/NeuralCF/blob/master/GMF.py

## Params

In [18]:
GMF_params = {
    'learning_rate': 0.0001,
    'batch_size': 64,
    'nolf': 16,
    'regs': [0,0],
    'epochs': 1,#20,
    'sample_size': int(0.5*len(train_set.user_id.unique())),
    'num_neg': 5,
    'ckpt_dir': '../NeuMF_storage/GMF_ckpts/ckpts',
    'optimizer':'rmsprop'
}

In [19]:
MLP_params = {
    'learning_rate': 0.01,
    'batch_size': 256,
    'layers': [32,16,8],
    'reg_layers': [0,0,0],
    'epochs': 1,#20,
    'sample_size': int(0.5*len(train_set.user_id.unique())),
    'num_neg': 4,
    'ckpt_dir': '../NeuMF_storage/MLP_ckpts/ckpts',
    'optimizer':'Adam'
}

In [20]:
NeuMF_params = {
    'learning_rate': 0.001,
    'batch_size': 256,
    'layers': [32,16,8],
    'reg_layers': [0,0,0],
    'reg_mf': [0,0],
    'nolf': 16,
    'epochs': 1,#20,
    'sample_size': 10000, #int(0.5*len(train_set.user_id.unique())),
    'num_neg': 4,
    'ckpt_dir': '../NeuMF_storage/MLP_ckpts/ckpts',
    'optimizer':'Adam'
}

In [35]:
from NCF import NCF
NCF = NCF(total_users, total_items, GMF_params, MLP_params, NeuMF_params)

NCF.build_GMF_model()
NCF.build_MLP_model()
NCF.build_NeuMF_model()

In [22]:
GMF_samples = NCF.create_samples(name='GMF', data=train_set)
MLP_samples = NCF.create_samples(name='MLP', data=train_set)
# NeuMF_samples = NCF.create_samples(name='GMF', data=train_set)
NeuMF_samples = MLP_samples

Creating Samples for GMF


100% |########################################################################|


Creating Samples for MLP


100% |########################################################################|


In [25]:
NCF.train_model('GMF', GMF_samples, store_path='../NeuMF_storage/GMF_weights/GMF_weights') 


Fitting GMF with parameters:
                                              0
learning_rate                            0.0001
batch_size                                   64
nolf                                         16
regs                                     [0, 0]
epochs                                        1
sample_size                               10000
num_neg                                       5
ckpt_dir       ../NeuMF_storage/GMF_ckpts/ckpts
optimizer                               rmsprop
Epoch: 0


In [26]:
NCF.train_model('MLP', MLP_samples, store_path='../NeuMF_storage/MLP_weights/MLP_weights') 


Fitting MLP with parameters:
                                              0
learning_rate                              0.01
batch_size                                  256
layers                              [32, 16, 8]
reg_layers                            [0, 0, 0]
epochs                                        1
sample_size                               10000
num_neg                                       4
ckpt_dir       ../NeuMF_storage/MLP_ckpts/ckpts
optimizer                                  Adam
Epoch: 0


In [36]:
NCF.use_pretrain_model(GMF_weights_path='../NeuMF_storage/GMF_weights/GMF_weights',
                       MLP_weights_path='../NeuMF_storage/MLP_weights/MLP_weights')

In [37]:
# NCF.train_model('NeuMF', NeuMF_samples) 


Fitting NeuMF with parameters:
                                              0
learning_rate                             0.001
batch_size                                  256
layers                              [32, 16, 8]
reg_layers                            [0, 0, 0]
reg_mf                                   [0, 0]
nolf                                         16
epochs                                        1
sample_size                               10000
num_neg                                       4
ckpt_dir       ../NeuMF_storage/MLP_ckpts/ckpts
optimizer                                  Adam
Epoch: 0
