# Import

In [1]:
import sys

#TODO: 相対パス
root_dir = '/Users/taichi/Library/CloudStorage/OneDrive-個人用/Documents/project/lab/bachelor/cml_pytorch2'
if root_dir not in sys.path:
    sys.path.append('/Users/taichi/Library/CloudStorage/OneDrive-個人用/Documents/project/lab/bachelor/cml_pytorch2')

import torch
from torch import nn, optim
import numpy as np
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix

from modules import losses, models, samplers, regularizers, evaluators, trainers, datasets, distributions

# DataSet

In [2]:
dataset = datasets.ML100k()
metadata = dataset.metadata()
n_user = metadata['n_user']
n_item = metadata['n_item']
train_set, test_set = dataset.implicit_feedback_data()
item_feature_set = dataset.item_feature_data_for_eval()

# device setting
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train_set = torch.LongTensor(train_set).to(device)
test_set = torch.LongTensor(test_set).to(device)
item_feature_set = torch.LongTensor(item_feature_set).to(device)

# Evaluator

In [3]:
# k
ks = [10, 50, 100]

# Accuracy
score_function_dict = {
    # "nDCG" : evaluators.ndcg,
    # "MAP" : evaluators.average_precision,
    "Recall": evaluators.recall
}
accuracy_evaluator = evaluators.AccuracyEvaluator(test_set, score_function_dict, ks)

# Coverage
coverage_evaluator = evaluators.CoverageEvaluator(test_set, ks)

# Diversity
diversity_evaluator = evaluators.DiversityEvaluator(test_set, item_feature_set, ks, emb_sim=False)

# Hubness
hubness_evaluator = evaluators.HubnessEvaluator(test_set, ks)

# Sampler

In [4]:
sampler = samplers.BaseSampler(train_set, n_user, n_item, device=device, strict_negative=False)

# Model

In [5]:
# Hyperparameters
lr = 1e-3
n_dim = 50
n_batch = 256
n_epoch = 50
valid_per_epoch = 10

# distributiuons
dist1 = distributions.Empirical()
dist2 = distributions.Gaussian()
dist3 = distributions.Gamma()

# models
model0 = models.CollaborativeMetricLearning(n_user, n_item, n_dim).to(device)
model1 = models.MutualProximityCML(n_user, n_item, dist1, n_dim).to(device)
# Empericalと条件を揃えるために S=n_item,n_user とする
model2 = models.MutualProximityCML(n_user, n_item, dist2, n_dim, n_item_sample=n_item, n_user_sample=n_user).to(device)
model3 = models.MutualProximityCML(n_user, n_item, dist3, n_dim, n_item_sample=n_item, n_user_sample=n_user).to(device)

# learning late optimizer
optimizer0 = optim.Adam(model0.parameters(), lr=lr)
optimizer1 = optim.Adam(model1.parameters(), lr=lr)
optimizer2 = optim.Adam(model2.parameters(), lr=lr)
optimizer3 = optim.Adam(model3.parameters(), lr=lr)

# loss function
criterion = losses.SumTripletLoss(margin=1).to(device)

# trainer
trainer0 = trainers.BaseTrainer(model0, optimizer0, criterion, sampler)
trainer1 = trainers.BaseTrainer(model1, optimizer1, criterion, sampler)
trainer2 = trainers.BaseTrainer(model2, optimizer2, criterion, sampler)
trainer3 = trainers.BaseTrainer(model3, optimizer3, criterion, sampler)

# Exp1

In [6]:
# train0
trainer0.fit(n_batch, n_epoch, accuracy_evaluator, valid_per_epoch)

100%|██████████████████████████████████████████████████████████████████████████████████████| 943/943 [00:10<00:00, 92.94it/s]
epoch1 avg_loss:0.761: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:02<00:00, 88.53it/s]
epoch2 avg_loss:0.539: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:02<00:00, 88.69it/s]
epoch3 avg_loss:0.443: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:02<00:00, 90.19it/s]
epoch4 avg_loss:0.380: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:02<00:00, 90.28it/s]
epoch5 avg_loss:0.335: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:02<00:00, 88.26it/s]
epoch6 avg_loss:0.300: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 83.92it/s]
epoch7 avg_loss:0.275: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 81.

In [7]:
# train1
trainer1.fit(n_batch, n_epoch, accuracy_evaluator, valid_per_epoch)

100%|██████████████████████████████████████████████████████████████████████████████████████| 943/943 [07:41<00:00,  2.05it/s]
epoch1 avg_loss:0.764: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 66.30it/s]
epoch2 avg_loss:0.540: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 65.42it/s]
epoch3 avg_loss:0.453: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 72.02it/s]
epoch4 avg_loss:0.387: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 79.70it/s]
epoch5 avg_loss:0.340: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 82.93it/s]
epoch6 avg_loss:0.304: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 82.42it/s]
epoch7 avg_loss:0.277: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 81.

In [8]:
# train2
trainer2.fit(n_batch, n_epoch, accuracy_evaluator, valid_per_epoch)

100%|██████████████████████████████████████████████████████████████████████████████████████| 943/943 [22:18<00:00,  1.42s/it]
epoch1 avg_loss:0.760: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:02<00:00, 86.31it/s]
epoch2 avg_loss:0.536: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 83.36it/s]
epoch3 avg_loss:0.443: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 81.74it/s]
epoch4 avg_loss:0.378: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 79.91it/s]
epoch5 avg_loss:0.333: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 81.47it/s]
epoch6 avg_loss:0.300: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 79.67it/s]
epoch7 avg_loss:0.273: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 78.

In [9]:
# train3
trainer3.fit(n_batch, n_epoch, accuracy_evaluator, valid_per_epoch)

100%|██████████████████████████████████████████████████████████████████████████████████████| 943/943 [20:39<00:00,  1.31s/it]
epoch1 avg_loss:0.761: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 70.30it/s]
epoch2 avg_loss:0.535: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:04<00:00, 63.97it/s]
epoch3 avg_loss:0.443: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:03<00:00, 65.05it/s]
epoch4 avg_loss:0.378: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:04<00:00, 63.34it/s]
epoch5 avg_loss:0.333: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:04<00:00, 57.38it/s]
epoch6 avg_loss:0.299: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:04<00:00, 54.71it/s]
epoch7 avg_loss:0.273: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:04<00:00, 62.

In [10]:
# BaseCML
trainer0.valid_scores

Unnamed: 0,Recall@10,Recall@50,Recall@100,epoch,losses
0,0.005913,0.035095,0.063494,0,
0,0.200082,0.480086,0.623408,10,0.22234
0,0.240047,0.555836,0.705425,20,0.186681
0,0.24897,0.569593,0.714497,30,0.179332
0,0.252003,0.569671,0.71948,40,0.178035
0,0.250564,0.567501,0.722378,50,0.176543


In [11]:
# Empirical
trainer1.valid_scores

Unnamed: 0,Recall@10,Recall@50,Recall@100,epoch,losses
0,0.005534,0.032186,0.06287,0,
0,0.111995,0.325558,0.462693,10,0.224617
0,0.146693,0.399327,0.556174,20,0.185228
0,0.157933,0.413874,0.569478,30,0.178856
0,0.163475,0.423446,0.573823,40,0.178339
0,0.162725,0.426663,0.577015,50,0.175653


In [12]:
# Gaussian
trainer2.valid_scores

Unnamed: 0,Recall@10,Recall@50,Recall@100,epoch,losses
0,0.005824,0.021959,0.052652,0,
0,0.065534,0.293031,0.487851,10,0.222595
0,0.066734,0.313054,0.530022,20,0.185182
0,0.069261,0.315946,0.545187,30,0.179239
0,0.068871,0.31892,0.544328,40,0.176571
0,0.07061,0.325331,0.544655,50,0.175675


In [13]:
# Gamma
trainer3.valid_scores

Unnamed: 0,Recall@10,Recall@50,Recall@100,epoch,losses
0,0.005532,0.030319,0.062122,0,
0,0.094215,0.343067,0.501511,10,0.222876
0,0.128705,0.408684,0.593699,20,0.185085
0,0.139092,0.428922,0.602288,30,0.178946
0,0.14137,0.431692,0.607398,40,0.177799
0,0.140332,0.429969,0.60876,50,0.175551
