# negativeサンプリングの結果によってどれだけスコアがブレるかのテスト

In [1]:
import sys

root_dir = '../../'
if root_dir not in sys.path:
    sys.path.append(root_dir)

import torch
from torch import nn, optim
import numpy as np
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix

from modules import losses, models, samplers, regularizers, evaluators, trainers, datasets, distributions

## ML100K

In [2]:
lr = 1e-3
n_dim = 50
ks=[5, 10, 50]
no_progressbar=True

dataset = datasets.ML100k()
n_user = dataset.n_user
n_item = dataset.n_item

li_neg_pair_weight = [1, 3, 5, 10, 30, 50]
for neg_pair_weight in li_neg_pair_weight:
    print(f"neg_pair_weight={neg_pair_weight}:")
    train_set, test_set = dataset.get_train_and_test_set(
        neg_pair_weight=neg_pair_weight
    )
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    train_set = torch.LongTensor(train_set).to(device)
    test_set = torch.LongTensor(test_set).to(device)
    model = models.CollaborativeMetricLearning(n_user, n_item, n_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = losses.SumTripletLoss(margin=1).to(device)
    sampler = samplers.BaseSampler(train_set, n_user, n_item, device=device, strict_negative=False)
    evaluator = evaluators.RecallEvaluator(test_set, ks)
    trainer = trainers.BaseTrainer(model, optimizer, criterion, sampler, no_progressbar=no_progressbar)
    trainer.fit(n_batch=256, n_epoch=30, valid_evaluator=evaluator, valid_per_epoch=15)
    print(trainer.valid_scores)

neg_pair_weight=1:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.326650   0.557585   0.942280      0       NaN
0  0.526155   0.725313   0.983823     15  0.222544
0  0.527327   0.727907   0.983856     30  0.206949
neg_pair_weight=3:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.172974   0.336753   0.814728      0       NaN
0  0.481755   0.675808   0.956926     15  0.223607
0  0.489538   0.683390   0.958758     30  0.208655
neg_pair_weight=5:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.113273   0.225106   0.709151      0       NaN
0  0.449775   0.637154   0.936067     15  0.224357
0  0.464125   0.647118   0.939874     30  0.207965
neg_pair_weight=10:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.062386   0.123933   0.524035      0       NaN
0  0.402040   0.568420   0.886768     15  0.223938
0  0.411641   0.584265   0.893062     30  0.209498
neg_pair_weight=30:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.023143   0.042134   0.217623   

# ML20M

In [3]:
lr = 1e-3
n_dim = 50
ks=[5, 10, 50]
no_progressbar=True

dataset = datasets.ML20m()
n_user = dataset.n_user
n_item = dataset.n_item

li_neg_pair_weight = [1, 3,  5, 10, 30, 50]
for neg_pair_weight in li_neg_pair_weight:
    print(f"neg_pair_weight={neg_pair_weight}:")
    train_set, test_set = dataset.get_train_and_test_set(
        neg_pair_weight=neg_pair_weight
    )
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    train_set = torch.LongTensor(train_set).to(device)
    test_set = torch.LongTensor(test_set).to(device)
    model = models.CollaborativeMetricLearning(n_user, n_item, n_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = losses.SumTripletLoss(margin=1).to(device)
    sampler = samplers.BaseSampler(train_set, n_user, n_item, device=device, strict_negative=False)
    evaluator = evaluators.RecallEvaluator(test_set, ks)
    trainer = trainers.BaseTrainer(model, optimizer, criterion, sampler, no_progressbar=no_progressbar)
    trainer.fit(n_batch=256, n_epoch=30, valid_evaluator=evaluator, valid_per_epoch=15)
    print(trainer.valid_scores)

neg_pair_weight=1:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.331615   0.554936   0.914798      0       NaN
0  0.523870   0.721732   0.965717     15  0.306756
0  0.526874   0.725509   0.967798     30  0.222006
neg_pair_weight=3:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.172371   0.331497   0.797423      0       NaN
0  0.478042   0.679595   0.949830     15  0.307834
0  0.484607   0.688138   0.954576     30  0.224607
neg_pair_weight=5:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.116243   0.228287   0.700816      0       NaN
0  0.439880   0.645020   0.936003     15  0.306640
0  0.449858   0.657510   0.942602     30  0.224381
neg_pair_weight=10:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.063593   0.126303   0.522167      0       NaN
0  0.364384   0.573804   0.905353     15  0.309464
0  0.380572   0.594413   0.916388     30  0.221438
neg_pair_weight=30:
   Recall@5  Recall@10  Recall@50  epoch    losses
0  0.022135   0.044574   0.221361   