In [1]:
# -*- coding: utf-8 -*-
import numpy as np
import torch
import pdb
from sklearn.metrics import roc_auc_score
np.random.seed(2020)
torch.manual_seed(2020)
import pdb

from dataset import load_data

from matrix_factorization import NCF, NCF_CVIB, NCF_IPS, NCF_SNIPS, NCF_DR

from utils import gini_index, ndcg_func, get_user_wise_ctr, rating_mat_to_sample, binarize, shuffle, minU
mse_func = lambda x,y: np.mean((x-y)**2)
acc_func = lambda x,y: np.sum(x == y) / len(x)

dataset_name = "yahoo"

if dataset_name == "coat":
    train_mat, test_mat = load_data("coat")        
    x_train, y_train = rating_mat_to_sample(train_mat)
    x_test, y_test = rating_mat_to_sample(test_mat)
    num_user = train_mat.shape[0]
    num_item = train_mat.shape[1]

elif dataset_name == "yahoo":
    x_train, y_train, x_test, y_test = load_data("yahoo")
    x_train, y_train = shuffle(x_train, y_train)
    num_user = x_train[:,0].max() + 1
    num_item = x_train[:,1].max() + 1

print("# user: {}, # item: {}".format(num_user, num_item))
# binarize
y_train = binarize(y_train)
y_test = binarize(y_test)

===>Load from yahoo data set<===
[train] num data: 311704
[test]  num data: 54000
# user: 15401, # item: 1001


In [2]:
"NCF CVIB"
ncf_cvib = NCF_CVIB(num_user, num_item)
ncf_cvib.fit(x_train, y_train, lr=0.01, 
    alpha=1.0, gamma=1e-1, lamb=1e-4, tol=1e-6, 
    batch_size = 2048, verbose=1)

test_pred = ncf_cvib.predict(x_test)
mse_ncf = mse_func(y_test, test_pred)
auc_ncf = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(ncf_cvib, x_test, y_test)

print("***"*5 + "[NCF-CVIB]" + "***"*5)
print("[NCF-CVIB] test mse:", mse_ncf)
print("[NCF-CVIB] test auc:", auc_ncf)
print("ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
    np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[NCF-CVIB]" + "***"*5)

[NCF-CVIB] epoch:0, xent:115.93364042043686
[NCF-CVIB] epoch:10, xent:97.04958033561707
[NCF-CVIB] epoch:20, xent:96.58860766887665
[NCF-CVIB] epoch:30, xent:95.87860035896301
[NCF-CVIB] epoch:40, xent:95.3306735754013
[NCF-CVIB] epoch:48, xent:95.07919818162918
***************[NCF-CVIB]***************
[NCF-CVIB] test mse: 0.4427629712806506
[NCF-CVIB] test auc: 0.6966606110800491
ndcg@5:0.675872, ndcg@10:0.784883
Num User: 5400
Gini index: 0.5437158465789664
Global utility: 0.2705555555555556
***************[NCF-CVIB]***************


In [3]:
"NCF naive"
ncf = NCF(num_user, num_item)
ncf.fit(x_train, y_train, lr=0.01, lamb=1e-4, tol=1e-6, 
    batch_size = 2048, verbose=1)
test_pred = ncf.predict(x_test)
mse_ncf = mse_func(y_test, test_pred)
auc_ncf = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(ncf, x_test, y_test)

print("***"*5 + "[NCF]" + "***"*5)
print("[NCF] test mse:", mse_func(y_test, test_pred))
print("[NCF] test auc:", auc_ncf)
print("ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
    np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[NCF]" + "***"*5)

[NCF] epoch:0, xent:102.9376904964447
[NCF] epoch:10, xent:85.20729166269302
[NCF] epoch:20, xent:84.76616615056992
[NCF] epoch:30, xent:84.42630195617676
[NCF] epoch:40, xent:84.05371761322021
[NCF] epoch:50, xent:83.66002357006073
***************[NCF]***************
[NCF] test mse: 0.33389237573647923
[NCF] test auc: 0.677314377423672
ndcg@5:0.654634, ndcg@10:0.774168
Num User: 5400
Gini index: 0.5624442706605475
Global utility: 0.2604814814814815
***************[NCF]***************


In [4]:
"NCF IPS"
ncf_ips = NCF_IPS(num_user, num_item)

ips_idxs = np.arange(len(y_test))
np.random.shuffle(ips_idxs)
y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]

ncf_ips.fit(x_train, y_train, 
    y_ips=y_ips,
    lr=0.01,
    batch_size=2048,
    lamb=1e-4,tol=1e-6, verbose=1)

test_pred = ncf_ips.predict(x_test)
mse_ncfips = mse_func(y_test, test_pred)
auc_ncfips = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(ncf_ips, x_test, y_test)

print("***"*5 + "[NCF-IPS]" + "***"*5)
print("[NCF-IPS] test mse:", mse_ncfips)
print("[NCF-IPS] test auc:", auc_ncfips)
print("ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
    np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[NCF-IPS]" + "***"*5)


[NCF-IPS] epoch:0, xent:4072.9857234954834
[NCF-IPS] epoch:10, xent:2899.3560104370117
[NCF-IPS] epoch:20, xent:2819.6494884490967
[NCF-IPS] epoch:30, xent:2734.750234603882
[NCF-IPS] epoch:40, xent:2678.3535194396973
[NCF-IPS] epoch:50, xent:2645.0100326538086
[NCF-IPS] epoch:60, xent:2620.6374502182007
[NCF-IPS] epoch:70, xent:2602.413637161255
[NCF-IPS] epoch:80, xent:2587.4010343551636
[NCF-IPS] epoch:90, xent:2577.7195386886597
[NCF-IPS] epoch:100, xent:2570.600706100464
[NCF-IPS] epoch:110, xent:2562.5005655288696
[NCF-IPS] epoch:120, xent:2557.655520439148
[NCF-IPS] epoch:122, xent:2558.3437633514404
***************[NCF-IPS]***************
[NCF-IPS] test mse: 0.17781794054197667
[NCF-IPS] test auc: 0.6733292253853745
ndcg@5:0.655470, ndcg@10:0.771176
Num User: 5400
Gini index: 0.5495443277923712
Global utility: 0.2634074074074074
***************[NCF-IPS]***************


In [5]:
"NCF SNIPS"
ncf_snips = NCF_SNIPS(num_user, num_item)

ips_idxs = np.arange(len(y_test))
np.random.shuffle(ips_idxs)
y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]

ncf_snips.fit(x_train, y_train, 
    y_ips=y_ips,
    lr=0.01,
    batch_size=2048,
    lamb=1e-4,tol=1e-6, verbose=1)

test_pred = ncf_snips.predict(x_test)
mse_ncfips = mse_func(y_test, test_pred)
auc_ncfips = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(ncf_snips, x_test, y_test)

print("***"*5 + "[NCF-SNIPS]" + "***"*5)
print("[NCF-SNIPS] test mse:", mse_ncfips)
print("[NCF-SNIPS] test auc:", auc_ncfips)
print("ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
    np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[NCF-SNIPS]" + "***"*5)

[NCF-SNIPS] epoch:0, xent:81.95224589109421
[NCF-SNIPS] epoch:10, xent:59.18703892827034
[NCF-SNIPS] epoch:20, xent:58.71371906995773
[NCF-SNIPS] epoch:30, xent:58.45278751850128
[NCF-SNIPS] epoch:40, xent:58.34858602285385
[NCF-SNIPS] epoch:49, xent:58.22364658117294
***************[NCF-SNIPS]***************
[NCF-SNIPS] test mse: 0.16845158876808575
[NCF-SNIPS] test auc: 0.6907556703391617
ndcg@5:0.671587, ndcg@10:0.781287
Num User: 5400
Gini index: 0.5426085484962895
Global utility: 0.26951851851851855
***************[NCF-SNIPS]***************


In [6]:
"NCF DR"
ncf_dr = NCF_DR(num_user, num_item)

ips_idxs = np.arange(len(y_test))
np.random.shuffle(ips_idxs)
y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]

ncf_dr.fit(x_train, y_train, y_ips=y_ips, batch_size=2048, 
    lr=0.01, lamb=1e-4, verbose=1)
test_pred = ncf_dr.predict(x_test)
mse_mfdr = mse_func(y_test, test_pred)
auc_mfdr = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(ncf_dr, x_test, y_test)

print("***"*5 + "[NCF-DR]" + "***"*5)
print("[NCF-DR] test mse:", mse_mfdr)
print("[NCF-DR] test auc:", auc_mfdr)
print("ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
    np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[NCF-DR]" + "***"*5)

  xent_loss = F.binary_cross_entropy(pred, sub_y, weight=inv_prop, reduction="sum")


[NCF-DR] epoch:0, xent:8069209.515625
[NCF-DR] epoch:10, xent:5976472.453125
[NCF-DR] epoch:20, xent:5929477.52734375
[NCF-DR] epoch:30, xent:5869332.7578125
[NCF-DR] epoch:40, xent:5792328.41796875
[NCF-DR] epoch:50, xent:5736723.046875
[NCF-DR] epoch:60, xent:5712239.44140625
[NCF-DR] epoch:70, xent:5699553.22265625
[NCF-DR] epoch:78, xent:5690278.1328125
***************[NCF-DR]***************
[NCF-DR] test mse: 0.1727529916573135
[NCF-DR] test auc: 0.6819140780991799
ndcg@5:0.653807, ndcg@10:0.770040
Num User: 5400
Gini index: 0.547887855442828
Global utility: 0.26281481481481483
***************[NCF-DR]***************
