# Import

In [1]:
import sys

root_dir = '../../'
if root_dir not in sys.path:
    sys.path.append(root_dir)

import torch
from torch import nn, optim
import numpy as np
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix

from modules import losses, models, samplers, searches, regularizers, evaluators, trainers, datasets, distributions

# Dataset

In [2]:
dataset = datasets.ML20m()
n_user = dataset.n_user
n_item = dataset.n_item
train_set, test_set = dataset.get_train_and_test_set()

# device setting
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train_set = torch.LongTensor(train_set).to(device)
test_set = torch.LongTensor(test_set).to(device)

# Evaluator

In [3]:
# k
ks = [5, 10, 50]

recall = evaluators.RecallEvaluator(test_set, ks)
unpopularity = evaluators.UnpopularityEvaluator(test_set, ks)
f1_score = evaluators.F1ScoreEvaluator(test_set, ks)
# coverage = evaluators.CoverageEvaluator(test_set, ks)
# hubness = evaluators.HubnessEvaluator(test_set, ks)

# Sampler

In [4]:
sampler = samplers.BaseSampler(train_set, n_user, n_item, device=device, strict_negative=False)

# Model

In [5]:
# Hyperparameters
lr = 1e-3
n_dim = 10
n_batch = 256
n_epoch = 50
no_progressbar = True

model = models.CollaborativeMetricLearning(n_user, n_item, n_dim).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = losses.SumTripletLoss(margin=1).to(device)
trainer = trainers.BaseTrainer(model, optimizer, criterion, sampler, no_progressbar)

# Search

In [6]:
# Hyperparameters
n_item_sample = 30
n_user_sample = 30

gaussian = distributions.Gaussian()
gamma = distributions.Gamma()

knn = searches.NearestNeighborhood(model)
mp = searches.MutualProximity(model, gamma, n_item_sample, n_user_sample)

# Training

In [7]:
# only traing no validation
trainer.fit(n_batch, n_epoch)

# Result

## Recall

In [8]:
print('Base CML:')
trainer.valid(knn, recall)
display(trainer.valid_scores)
print('CML+MP:')
trainer.valid(mp, recall)
display(trainer.valid_scores)

Base CML:


Unnamed: 0,Recall@5,Recall@10,Recall@50
0,0.238145,0.485688,0.899653


CML+MP:


Unnamed: 0,Recall@5,Recall@10,Recall@50
0,0.026507,0.088836,0.777591


## Unpopularity

In [9]:
print('Base CML:')
trainer.valid(knn, unpopularity)
display(trainer.valid_scores)
print('CML+MP:')
trainer.valid(mp, unpopularity)
display(trainer.valid_scores)

Base CML:


Unnamed: 0,Unpopularity@5,Unpopularity@10,Unpopularity@50
0,0.019,0.042382,0.424729


CML+MP:


Unnamed: 0,Unpopularity@5,Unpopularity@10,Unpopularity@50
0,0.070066,0.131263,0.457849


## F1 score

In [10]:
print('Base CML:')
trainer.valid(knn, f1_score)
display(trainer.valid_scores)
print('CML+MP:')
trainer.valid(mp, f1_score)
display(trainer.valid_scores)

Base CML:


Unnamed: 0,F1-score@5,F1-score@10,F1-score@50
0,0.024753,0.067907,0.488603


CML+MP:


Unnamed: 0,F1-score@5,F1-score@10,F1-score@50
0,0.007192,0.044477,0.514841
