In [1]:
# -*- coding: utf-8 -*-
import numpy as np
import torch
import pdb
from sklearn.metrics import roc_auc_score
np.random.seed(2020)
torch.manual_seed(2020)
import pdb

from dataset import load_data

from matrix_factorization import NCF, NCF_VITA, NCF_IPS, NCF_SNIPS

from utils import gini_index, get_user_wise_ctr, rating_mat_to_sample, binarize, shuffle, minU
mse_func = lambda x,y: np.mean((x-y)**2)
acc_func = lambda x,y: np.sum(x == y) / len(x)

dataset_name = "yahoo"

if dataset_name == "coat":
    train_mat, test_mat = load_data("coat")        
    x_train, y_train = rating_mat_to_sample(train_mat)
    x_test, y_test = rating_mat_to_sample(test_mat)
    num_user = train_mat.shape[0]
    num_item = train_mat.shape[1]

elif dataset_name == "yahoo":
    x_train, y_train, x_test, y_test = load_data("yahoo")
    x_train, y_train = shuffle(x_train, y_train)
    num_user = x_train[:,0].max() + 1
    num_item = x_train[:,1].max() + 1

print("# user: {}, # item: {}".format(num_user, num_item))
# binarize
y_train = binarize(y_train)
y_test = binarize(y_test)

===>Load from yahoo data set<===
[train] num data: 311704
[test]  num data: 54000
# user: 15401, # item: 1001


In [2]:
"NCF VITA"
ncf_vita = NCF_VITA(num_user, num_item)
ncf_vita.fit(x_train, y_train, lr=0.01, 
    alpha=1e-3, gamma=1e-3, lamb=1e-4, tol=1e-6, 
    batch_size = 2048, verbose=1)

test_pred = ncf_vita.predict(x_test)
mse_ncf_vita = mse_func(y_test, test_pred)
auc_ncf_vita = roc_auc_score(y_test, test_pred)
print("***"*5 + "[NCF-VITA]" + "***"*5)
print("[NCF] test mse:", mse_func(y_test, test_pred))
print("[NCF] test auc:", auc_ncf_vita)
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[NCF-VITA]" + "***"*5)

[NCF-VITA] epoch:0, xent:102.41808754205704
[NCF-VITA] epoch:10, xent:71.46804741024971
[NCF-VITA] epoch:20, xent:70.42273560166359
[NCF-VITA] epoch:28, xent:70.08186256885529
***************[NCF-VITA]***************
[NCF] test mse: 0.2819975356917161
[NCF] test auc: 0.6989449468018082
Num User: 5400
Gini index: 0.5428619551766933
Global utility: 0.26937037037037037
***************[NCF-VITA]***************


In [3]:
"NCF naive"
ncf = NCF(num_user, num_item)
ncf.fit(x_train, y_train, lr=0.01, lamb=1e-4, tol=1e-6, 
    batch_size = 2048, verbose=1)
test_pred = ncf.predict(x_test)
mse_ncf = mse_func(y_test, test_pred)
auc_ncf = roc_auc_score(y_test, test_pred)
print("***"*5 + "[NCF]" + "***"*5)
print("[NCF] test mse:", mse_func(y_test, test_pred))
print("[NCF] test auc:", auc_ncf)
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[NCF]" + "***"*5)

[NCF] epoch:0, xent:102.8900898694992
[NCF] epoch:10, xent:85.17771834135056
***************[NCF]***************
[NCF] test mse: 0.3313150855314741
[NCF] test auc: 0.6772202864552993
Num User: 5400
Gini index: 0.56529110291104
Global utility: 0.26096296296296295
***************[NCF]***************


In [4]:
"NCF IPS"
ncf_ips = NCF_IPS(num_user, num_item)

ips_idxs = np.arange(len(y_test))
np.random.shuffle(ips_idxs)
y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]

ncf_ips.fit(x_train, y_train, 
    y_ips=y_ips,
    lr=0.01,
    batch_size=2048,
    lamb=1e-4,tol=1e-6, verbose=1)

test_pred = ncf_ips.predict(x_test)
mse_ncfips = mse_func(y_test, test_pred)
auc_ncfips = roc_auc_score(y_test, test_pred)
print("***"*5 + "[NCF-IPS]" + "***"*5)
print("[NCF-IPS] test mse:", mse_ncfips)
print("[NCF-IPS] test auc:", auc_ncfips)
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[NCF-IPS]" + "***"*5)


[NCF-IPS] epoch:0, xent:4103.263013839722
[NCF-IPS] epoch:10, xent:2927.3574962615967
[NCF-IPS] epoch:20, xent:2844.8160438537598
[NCF-IPS] epoch:30, xent:2778.3027992248535
[NCF-IPS] epoch:40, xent:2741.9010314941406
[NCF-IPS] epoch:50, xent:2718.723768234253
[NCF-IPS] epoch:60, xent:2698.268035888672
[NCF-IPS] epoch:70, xent:2682.119041442871
[NCF-IPS] epoch:80, xent:2664.8209190368652
[NCF-IPS] epoch:87, xent:2654.0771675109863
***************[NCF-IPS]***************
[NCF-IPS] test mse: 0.17769356075206216
[NCF-IPS] test auc: 0.6707938915130047
Num User: 5400
Gini index: 0.5526796131000107
Global utility: 0.25725925925925924
***************[NCF-IPS]***************


In [5]:
"NCF SNIPS"
ncf_snips = NCF_SNIPS(num_user, num_item)

ips_idxs = np.arange(len(y_test))
np.random.shuffle(ips_idxs)
y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]

ncf_snips.fit(x_train, y_train, 
    y_ips=y_ips,
    lr=0.01,
    batch_size=2048,
    lamb=1e-4,tol=1e-6, verbose=1)

test_pred = ncf_snips.predict(x_test)
mse_ncfips = mse_func(y_test, test_pred)
auc_ncfips = roc_auc_score(y_test, test_pred)
print("***"*5 + "[NCF-SNIPS]" + "***"*5)
print("[NCF-SNIPS] test mse:", mse_ncfips)
print("[NCF-SNIPS] test auc:", auc_ncfips)
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[NCF-SNIPS]" + "***"*5)

[NCF-SNIPS] epoch:0, xent:84.56735223531723
[NCF-SNIPS] epoch:10, xent:61.797143638134
[NCF-SNIPS] epoch:20, xent:61.27409288287163
***************[NCF-SNIPS]***************
[NCF-SNIPS] test mse: 0.16991930315856799
[NCF-SNIPS] test auc: 0.6879031329287465
Num User: 5400
Gini index: 0.5420429543185992
Global utility: 0.2665925925925926
***************[NCF-SNIPS]***************
