In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os

%load_ext autoreload
%autoreload 2

In [2]:
print(f'TF version: {tf.__version__}')

TF version: 2.2.0


# Read Data

In [3]:
# path = 'C:/Users/robin.opdam/Google Drive/Thesis (Msc)/Thesis_shared_files/'
path = '/Users/Robin/Google Drive/Thesis (Msc)/Thesis_shared_files/'

## Amazon Fashion

In [62]:
data_path = 'data/Amazon/'
# file_name = 'Amazon_full' # file_name = 'Amazon_05_users' 
file_name = 'Amazon_01_users'
file_name = 'am_80k_users'

## MovieLens

In [63]:
# data_path = 'data/ML/'
# file_name = 'ML_full' # file_name = 'ML_05_users'
# file_name = 'ML_01_users'

In [64]:
df = pd.read_pickle(path + data_path + file_name)
df.user_id = df.user_id.astype('category').cat.codes
df.item_id = df.item_id.astype('category').cat.codes
df.head()

Unnamed: 0,user,item,datetime,rating,user_id,item_id
6904244,A2EQZT4NOBKME3,B00SFLJZ52,2015-06-11,3.0,29949,137482
4472551,A2EQZT4NOBKME3,B00D4TJRWG,2015-06-11,5.0,29949,73746
10630561,A2EQZT4NOBKME3,B00OBT081W,2016-01-15,5.0,29949,126216
5801430,A2EQZT4NOBKME3,B00KD9AGAC,2016-08-24,5.0,29949,108075
5505899,A1QKA075BTCNIH,B00ISY7VNO,2015-01-13,5.0,15581,100488


---
# Data Prep

## Dataset Params

In [65]:
val_perc = test_perc = 0.1
n_last_items_val = n_last_items_test = 1

total_items = len(df.item_id.unique())
total_users = len(df.user_id.unique())

## Create Split

In [66]:
from Data_prep import train_val_test_split
datasets = train_val_test_split(df, val_perc, test_perc, n_last_items_val, n_last_items_test)
train_set, val_set, test_set = datasets

---
# Neural Collaborative Filtering (NCF)
Using the NCF we build Generalized Matrix Factorisation (GMF), Multiplayer Perceptron Matrix Factorisation (MLP) and combine the two in Neural Matrix Factorisation (NeuMF)
- paper: http://papers.www2017.com.au.s3-website-ap-southeast-2.amazonaws.com/proceedings/p173.pdf
- blog: https://medium.com/@victorkohler/collaborative-filtering-using-deep-neural-networks-in-tensorflow-96e5d41a39a1
- code: https://github.com/Leavingseason/NeuralCF/blob/master

## Set Parameters

In [67]:
GMF_params = {
    'learning_rate': 0.0001,
    'batch_size': 64,
    'nolf': 16,
    'regs': [0,0],
    'epochs': 20,
    'sample_size': int(0.5*len(train_set.user_id.unique())),
    'num_neg': 5,
    'ckpt_dir': '../NeuMF_storage/GMF_ckpts/ckpts',
    'optimizer':'rmsprop'
}

In [68]:
MLP_params = {
    'learning_rate': 0.01,
    'batch_size': 256,
    'layers': [32,16,8],
    'reg_layers': [0,0,0],
    'epochs': 20,
    'sample_size': int(0.5*len(train_set.user_id.unique())),
    'num_neg': 4,
    'ckpt_dir': '../NeuMF_storage/MLP_ckpts/ckpts',
    'optimizer':'Adam'
}

In [19]:
NeuMF_params = {
    'learning_rate': 0.001,
    'batch_size': 256,
    'layers': [32,16,8],
    'reg_layers': [0,0,0],
    'reg_mf': [0,0],
    'nolf': 16,
    'epochs': 1,#20,
    'sample_size': 10000, #int(0.5*len(train_set.user_id.unique())),
    'num_neg': 4,
    'ckpt_dir': '../NeuMF_storage/MLP_ckpts/ckpts',
    'optimizer':'Adam'
}

## Init

In [None]:
from NCF import NCF
NCF = NCF(total_users, total_items, GMF_params, MLP_params, NeuMF_params)

NCF.build_GMF_model()
NCF.build_MLP_model()
NCF.build_NeuMF_model()

## Create Samples

In [21]:
GMF_samples = NCF.create_samples(name='GMF', data=train_set)
MLP_samples = NCF.create_samples(name='MLP', data=train_set)
# NeuMF_samples = NCF.create_samples(name='GMF', data=train_set)
NeuMF_samples = MLP_samples

  0% |                                                                        |

Creating Samples for GMF


100% |########################################################################|
  0% |                                                                        |

Creating Samples for MLP


100% |########################################################################|


## Training

In [None]:
NCF.train_model('GMF', GMF_samples, store_path='../NeuMF_storage/GMF_weights_try/GMF_weights') 

In [None]:
NCF.train_model('MLP', MLP_samples, store_path='../NeuMF_storage/MLP_weights_try/MLP_weights') 

In [None]:
# NCF.train_model('NeuMF', NeuMF_samples) 

#### Load weights for NeuMF

In [38]:
NCF.use_pretrain_model(GMF_weights_path='../NeuMF_storage/GMF_weights_try/GMF_weights',
                       MLP_weights_path='../NeuMF_storage/MLP_weights_try/MLP_weights')

---
## Evaluation

In [None]:
rank_at = 20
sample_len = 100

## Full set scores

In [46]:
ranked_df_full = NCF.get_predictions('NeuMF', train_set, test_set)

100% |########################################################################|


In [47]:
get_metrics(ranked_df_full, 5, 20)

Obtaining metrics time: 0.13


Unnamed: 0,rank_at,hitcounts,recall,precision
0,1,0,0.0,0.0
1,5,0,0.0,0.0
2,10,0,0.0,0.0
3,15,0,0.0,0.0
4,20,0,0.0,0.0


## Sample scores

In [41]:
ranked_df = NCF.sample_prediction('NeuMF', train_set, test_set)

100% |########################################################################|


In [42]:
from Evaluation import get_metrics
get_metrics(ranked_df, 5, 20)

Obtaining metrics time: 0.14


Unnamed: 0,rank_at,hitcounts,recall,precision
0,1,1,0.008475,0.008475
1,5,3,0.025424,0.005085
2,10,9,0.076271,0.007627
3,15,15,0.127119,0.008475
4,20,19,0.161017,0.008051
