# negativeサンプリングの結果によってどれだけスコアがブレるかのテスト

In [1]:
import sys

root_dir = '../../'
if root_dir not in sys.path:
    sys.path.append(root_dir)

import torch
from torch import nn, optim
import numpy as np
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix

from modules import losses, models, samplers, regularizers, evaluators, trainers, datasets, distributions

## ML100K

In [2]:
lr = 1e-3
n_dim = 50
ks=[5, 10, 50]

dataset = datasets.ML100k()
n_user = dataset.n_user
n_item = dataset.n_item

li_neg_pair_weight = [1, 3,  5, 10, 30]
for neg_pair_weight in li_neg_pair_weight:
    print(f"neg_pair_weight={neg_pair_weight}:")
    train_set, test_set = dataset.get_train_and_test_set(
        use_negative_sampling=True,
        use_popularity=False,
        neg_pair_weight=neg_pair_weight
    )
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    train_set = torch.LongTensor(train_set).to(device)
    test_set = torch.LongTensor(test_set).to(device)
    model = models.CollaborativeMetricLearning(n_user, n_item, n_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = losses.SumTripletLoss(margin=1).to(device)
    sampler = samplers.BaseSampler(train_set, n_user, n_item, device=device, strict_negative=False)
    evaluator = evaluators.RecallEvaluator(test_set, ks)
    trainer = trainers.BaseTrainer(model, optimizer, criterion, sampler, no_progressbar=True)
    trainer.fit(n_batch=256, n_epoch=30, valid_evaluator=evaluator, valid_per_epoch=30)
    print(trainer.valid_scores)

neg_pair_weight=1:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.203380   0.405916   0.966484      0       NaN
0  0.497598   0.703181   0.975131     30  0.207804
neg_pair_weight=3:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.081391   0.164901   0.846370      0       NaN
0  0.441982   0.645181   0.970315     30  0.207904
neg_pair_weight=5:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.060323   0.110908   0.571272      0       NaN
0  0.399732   0.590716   0.951048     30  0.206721
neg_pair_weight=10:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.032324   0.062347   0.313896      0       NaN
0  0.335403   0.506510   0.903303     30  0.206358
neg_pair_weight=30:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.009777   0.021198   0.107548      0       NaN
0  0.238257   0.369374   0.757835     30  0.209189


# ML20M

In [3]:
lr = 1e-3
n_dim = 50
ks=[5, 10, 50]

dataset = datasets.ML20m()
n_user = dataset.n_user
n_item = dataset.n_item

li_neg_pair_weight = [1, 3,  5, 10, 30]
for neg_pair_weight in li_neg_pair_weight:
    print(f"neg_pair_weight={neg_pair_weight}:")
    train_set, test_set = dataset.get_train_and_test_set(
        use_negative_sampling=True,
        use_popularity=False,
        neg_pair_weight=neg_pair_weight
    )
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    train_set = torch.LongTensor(train_set).to(device)
    test_set = torch.LongTensor(test_set).to(device)
    model = models.CollaborativeMetricLearning(n_user, n_item, n_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = losses.SumTripletLoss(margin=1).to(device)
    sampler = samplers.BaseSampler(train_set, n_user, n_item, device=device, strict_negative=False)
    evaluator = evaluators.RecallEvaluator(test_set, ks)
    trainer = trainers.BaseTrainer(model, optimizer, criterion, sampler, no_progressbar=True)
    trainer.fit(n_batch=256, n_epoch=30, valid_evaluator=evaluator, valid_per_epoch=30)
    print(trainer.valid_scores)

neg_pair_weight=1:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.172305   0.344365   0.937497      0       NaN
0  0.495429   0.700267   0.956247     30  0.218682
neg_pair_weight=3:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.074002   0.147180   0.728287      0       NaN
0  0.402953   0.644992   0.945526     30  0.219841
neg_pair_weight=5:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.047140   0.094489   0.471046      0       NaN
0  0.324417   0.582919   0.928568     30  0.224704
neg_pair_weight=10:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.025133   0.050418   0.251547      0       NaN
0  0.208284   0.442734   0.905752     30  0.222599
neg_pair_weight=30:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.008807   0.017739   0.088401      0       NaN
0  0.075374   0.184516   0.813949     30  0.222988
