In [1]:
import sys

root_dir = '../../'
if root_dir not in sys.path:
    sys.path.append(root_dir)

import torch
from torch import nn, optim
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix
import pandas as pd

pd.set_option('display.max_columns', 100)

from modules import losses, models, samplers, searches, regularizers, evaluators, trainers, datasets, distributions

In [2]:
dataset = datasets.ML20mTo100k()
n_user = dataset.n_user
n_item = dataset.n_item
print(f"n_user={n_user}, n_item={n_item}")
train_set, test_set = dataset.get_train_and_test_set()

# device setting
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train_set = torch.LongTensor(train_set).to(device)
test_set = torch.FloatTensor(test_set).to(device)

n_user=1355, n_item=3039


In [3]:
# k
ks = [5, 10, 50]

score_function_dict = {
    "Recall"       : evaluators.recall,
    "Unpopularity" : evaluators.unpopularity,
    "Unpopularity2": evaluators.unpopularity2,
    "Unpopularity3": evaluators.unpopularity3,
    "F1-score"     : evaluators.f1_score,
    "F1-score2"    : evaluators.f1_score2,
    "F1-score3"    : evaluators.f1_score3
}
userwise = evaluators.UserwiseEvaluator(test_set, score_function_dict, ks)
# coverage = evaluators.CoverageEvaluator(test_set, ks)
# hubness = evaluators.HubnessEvaluator(test_set, ks)

In [4]:
sampler = samplers.BaseSampler(train_set, n_user, n_item, device=device, strict_negative=False)

In [5]:
# Hyperparameters
lr = 1e-3
n_dim = 10
n_batch = 256
n_epoch = 50
valid_per_epoch = 10
n_item_sample = 30
n_user_sample = 30
no_progressbar = True
search_range = 30

# models
model = models.CollaborativeMetricLearning(n_user, n_item, n_dim).to(device)

# distributiuons
gaussian = distributions.Gaussian()
gamma = distributions.Gamma()

# search
knn = searches.NearestNeighborhood(model)
mp = searches.MutualProximity(model, gamma)
mymp = searches.Mymp(model, search_range)

# learning late optimizer
optimizer = optim.Adam(model.parameters(), lr=lr)

# loss function
criterion = losses.SumTripletLoss(margin=1).to(device)

# trainer
trainer = trainers.BaseTrainer(model, optimizer, criterion, sampler, no_progressbar)

In [6]:
trainer.fit(n_batch, n_epoch, knn, userwise, valid_per_epoch)

In [7]:
re_knn = trainer.valid_scores.copy()
display(re_knn)

Unnamed: 0,Recall@5,Unpopularity@5,Unpopularity2@5,Unpopularity3@5,F1-score@5,F1-score2@5,F1-score3@5,Recall@10,Unpopularity@10,Unpopularity2@10,Unpopularity3@10,F1-score@10,F1-score2@10,F1-score3@10,Recall@50,Unpopularity@50,Unpopularity2@50,Unpopularity3@50,F1-score@50,F1-score2@50,F1-score3@50,epoch,losses
0,0.36802,0.999213,10.746269,0.3457,0.39716,0.679837,0.21504,0.686039,0.999222,10.751546,0.691986,0.727483,1.265221,0.622273,1.0,0.331123,3.563647,1.0,0.484325,1.523792,1.0,0,
0,0.39631,0.999072,10.597022,0.319307,0.428438,0.731554,0.217078,0.717097,0.999149,10.670513,0.669501,0.757289,1.321088,0.632219,1.0,0.331123,3.563647,1.0,0.484325,1.523792,1.0,10,0.103175
0,0.376507,0.999138,10.666859,0.329714,0.40435,0.694632,0.210943,0.702399,0.999177,10.704604,0.677848,0.744362,1.294832,0.62948,1.0,0.331123,3.563647,1.0,0.484325,1.523792,1.0,20,0.050311
0,0.349139,0.99916,10.682531,0.333302,0.375853,0.644334,0.196532,0.687023,0.99919,10.71661,0.682389,0.726561,1.266269,0.618889,1.0,0.331123,3.563647,1.0,0.484325,1.523792,1.0,30,0.034995
0,0.347478,0.99915,10.659735,0.331444,0.37391,0.641235,0.195892,0.6877,0.999192,10.715786,0.682293,0.724716,1.266881,0.617853,1.0,0.331123,3.563647,1.0,0.484325,1.523792,1.0,40,0.028069
0,0.345326,0.999133,10.630153,0.326964,0.370536,0.636795,0.190911,0.691697,0.999189,10.709447,0.68179,0.73047,1.27456,0.620698,1.0,0.331123,3.563647,1.0,0.484325,1.523792,1.0,50,0.023707


In [8]:
trainer.fit(n_batch, n_epoch, mp, userwise, valid_per_epoch)

In [9]:
re_mp = trainer.valid_scores.copy()
display(re_mp)

Unnamed: 0,Recall@5,Unpopularity@5,Unpopularity2@5,Unpopularity3@5,F1-score@5,F1-score2@5,F1-score3@5,Recall@10,Unpopularity@10,Unpopularity2@10,Unpopularity3@10,F1-score@10,F1-score2@10,F1-score3@10,Recall@50,Unpopularity@50,Unpopularity2@50,Unpopularity3@50,F1-score@50,F1-score2@50,F1-score3@50,epoch,losses
0,0.335609,0.999184,10.695463,0.336146,0.361325,0.619487,0.189853,0.690959,0.999206,10.732359,0.68692,0.729221,1.273247,0.622412,1.0,0.331123,3.563647,1.0,0.484325,1.523792,1.0,0,
0,0.339422,0.999174,10.675533,0.333378,0.365455,0.626459,0.19056,0.683149,0.999206,10.73055,0.686108,0.723025,1.259353,0.61738,1.0,0.331123,3.563647,1.0,0.484325,1.523792,1.0,10,0.021698
0,0.344096,0.999168,10.668661,0.332425,0.371626,0.635306,0.1929,0.684994,0.999204,10.727006,0.6868,0.724844,1.262684,0.616942,1.0,0.331123,3.563647,1.0,0.484325,1.523792,1.0,20,0.019944
0,0.344711,0.999163,10.66133,0.331799,0.372632,0.636453,0.193103,0.693973,0.999203,10.725303,0.685711,0.735025,1.279417,0.625141,1.0,0.331123,3.563647,1.0,0.484325,1.523792,1.0,30,0.019257
0,0.355351,0.99916,10.657133,0.331632,0.382862,0.655837,0.19974,0.684194,0.999205,10.729306,0.686992,0.724722,1.261455,0.617409,1.0,0.331123,3.563647,1.0,0.484325,1.523792,1.0,40,0.018336
0,0.353629,0.999159,10.657931,0.331774,0.380428,0.652583,0.199558,0.688069,0.999206,10.731498,0.686805,0.728878,1.268596,0.621031,1.0,0.331123,3.563647,1.0,0.484325,1.523792,1.0,50,0.017994


In [10]:
dataset = datasets.ML100k()
n_user = dataset.n_user
n_item = dataset.n_item
print(f"n_user={n_user}, n_item={n_item}")
train_set, test_set = dataset.get_train_and_test_set()

# device setting
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train_set = torch.LongTensor(train_set).to(device)
test_set = torch.FloatTensor(test_set).to(device)

n_user=940, n_item=1447


In [11]:
# k
ks = [5, 10, 50]

score_function_dict = {
    "Recall"       : evaluators.recall,
    "Unpopularity" : evaluators.unpopularity,
    "Unpopularity2": evaluators.unpopularity2,
    "Unpopularity3": evaluators.unpopularity3,
    "F1-score"     : evaluators.f1_score,
    "F1-score2"    : evaluators.f1_score2,
    "F1-score3"    : evaluators.f1_score3
}
userwise = evaluators.UserwiseEvaluator(test_set, score_function_dict, ks)
# coverage = evaluators.CoverageEvaluator(test_set, ks)
# hubness = evaluators.HubnessEvaluator(test_set, ks)

In [12]:
sampler = samplers.BaseSampler(train_set, n_user, n_item, device=device, strict_negative=False)

In [13]:
# Hyperparameters
lr = 1e-3
n_dim = 10
n_batch = 256
n_epoch = 50
valid_per_epoch = 10
n_item_sample = 30
n_user_sample = 30
no_progressbar = True
search_range = 30

# models
model = models.CollaborativeMetricLearning(n_user, n_item, n_dim).to(device)

# distributiuons
gaussian = distributions.Gaussian()
gamma = distributions.Gamma()

# search
knn = searches.NearestNeighborhood(model)
mp = searches.MutualProximity(model, gamma)
mymp = searches.Mymp(model, search_range)

# learning late optimizer
optimizer = optim.Adam(model.parameters(), lr=lr)

# loss function
criterion = losses.SumTripletLoss(margin=1).to(device)

# trainer
trainer = trainers.BaseTrainer(model, optimizer, criterion, sampler, no_progressbar)

In [14]:
trainer.fit(n_batch, n_epoch, knn, userwise, valid_per_epoch)

In [15]:
re_knn = trainer.valid_scores.copy()
display(re_knn)

Unnamed: 0,Recall@5,Unpopularity@5,Unpopularity2@5,Unpopularity3@5,F1-score@5,F1-score2@5,F1-score3@5,Recall@10,Unpopularity@10,Unpopularity2@10,Unpopularity3@10,F1-score@10,F1-score2@10,F1-score3@10,Recall@50,Unpopularity@50,Unpopularity2@50,Unpopularity3@50,F1-score@50,F1-score2@50,F1-score3@50,epoch,losses
0,0.059136,0.970476,6.706725,0.062131,0.092875,0.112843,0.029028,0.123594,0.969954,6.674454,0.12374,0.184008,0.233553,0.090851,0.528628,0.917553,6.293752,0.524182,0.581673,0.930178,0.517972,0,
0,0.21065,0.905855,3.841072,0.014909,0.291044,0.375471,0.017826,0.37773,0.899929,3.763069,0.036359,0.474571,0.6565,0.054536,0.810204,0.88418,4.702413,0.417342,0.820621,1.370287,0.475723,10,0.47686
0,0.332037,0.875368,3.293243,0.012539,0.428609,0.574654,0.019717,0.496653,0.886499,3.530139,0.042017,0.59082,0.845799,0.066711,0.855184,0.885528,4.645226,0.428194,0.850526,1.43424,0.496467,20,0.299577
0,0.383946,0.875575,3.296876,0.014435,0.478784,0.658646,0.025018,0.551501,0.890013,3.606141,0.049472,0.63511,0.932928,0.080672,0.876215,0.88768,4.72743,0.43674,0.864442,1.469046,0.509978,30,0.25485
0,0.398299,0.876287,3.309297,0.015334,0.492206,0.681686,0.027062,0.562952,0.890673,3.628201,0.051093,0.643694,0.950912,0.083988,0.885105,0.887894,4.740555,0.437212,0.869876,1.48251,0.511722,40,0.241435
0,0.401943,0.8769,3.318118,0.01554,0.495523,0.687528,0.027238,0.569944,0.891,3.64257,0.051126,0.649511,0.962115,0.084078,0.887145,0.888098,4.749432,0.437653,0.871232,1.485775,0.51235,50,0.234219


In [16]:
trainer.fit(n_batch, n_epoch, mp, userwise, valid_per_epoch)

In [17]:
re_mp = trainer.valid_scores.copy()
display(re_mp)

Unnamed: 0,Recall@5,Unpopularity@5,Unpopularity2@5,Unpopularity3@5,F1-score@5,F1-score2@5,F1-score3@5,Recall@10,Unpopularity@10,Unpopularity2@10,Unpopularity3@10,F1-score@10,F1-score2@10,F1-score3@10,Recall@50,Unpopularity@50,Unpopularity2@50,Unpopularity3@50,F1-score@50,F1-score2@50,F1-score3@50,epoch,losses
0,0.326367,0.925154,4.244456,0.034907,0.42576,0.579714,0.054587,0.492523,0.931487,4.55256,0.084471,0.587452,0.862201,0.131458,0.85167,0.900086,5.211312,0.473222,0.851117,1.45019,0.554588,0,
0,0.326351,0.925951,4.271929,0.035557,0.427843,0.581393,0.057251,0.496012,0.931857,4.562648,0.084339,0.591894,0.86915,0.132095,0.855172,0.900206,5.218239,0.473376,0.853567,1.455997,0.555431,10,0.231915
0,0.326774,0.92532,4.262814,0.034927,0.426986,0.580916,0.055705,0.502626,0.930989,4.547815,0.084169,0.597346,0.879304,0.132024,0.855303,0.900215,5.224944,0.473643,0.853731,1.456448,0.55547,20,0.227153
0,0.329577,0.926202,4.274096,0.035406,0.42959,0.585739,0.056286,0.496849,0.932166,4.572584,0.084709,0.593354,0.870803,0.132866,0.855734,0.90029,5.226624,0.473247,0.853754,1.456883,0.555503,30,0.226777
0,0.332552,0.926811,4.296593,0.035528,0.434702,0.591901,0.057186,0.500817,0.932273,4.577252,0.084482,0.597461,0.877774,0.132792,0.8571,0.900466,5.237293,0.474944,0.855109,1.459611,0.557217,40,0.226326
0,0.329159,0.926752,4.295243,0.035541,0.430321,0.585928,0.057074,0.497153,0.932389,4.582216,0.084704,0.594771,0.872101,0.132855,0.856894,0.900643,5.244614,0.47487,0.855043,1.459627,0.557292,50,0.225917


In [2]:
dataset = datasets.ML200mHoge()
n_user = dataset.n_user
n_item = dataset.n_item
print(f"n_user={n_user}, n_item={n_item}")
train_set, test_set = dataset.get_train_and_test_set()

# device setting
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train_set = torch.LongTensor(train_set).to(device)
test_set = torch.FloatTensor(test_set).to(device)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_rating["rating"] = (df_rating["rating"] >= 4.0).astype(int)


n_user=994, n_item=6470


In [3]:
# k
ks = [5, 10, 50]

score_function_dict = {
    "Recall"       : evaluators.recall,
    "Unpopularity" : evaluators.unpopularity,
    "Unpopularity2": evaluators.unpopularity2,
    "Unpopularity3": evaluators.unpopularity3,
    "F1-score"     : evaluators.f1_score,
    "F1-score2"    : evaluators.f1_score2,
    "F1-score3"    : evaluators.f1_score3
}
userwise = evaluators.UserwiseEvaluator(test_set, score_function_dict, ks)
# coverage = evaluators.CoverageEvaluator(test_set, ks)
# hubness = evaluators.HubnessEvaluator(test_set, ks)

In [4]:
sampler = samplers.BaseSampler(train_set, n_user, n_item, device=device, strict_negative=False)

In [5]:
# Hyperparameters
lr = 1e-3
n_dim = 10
n_batch = 256
n_epoch = 50
valid_per_epoch = 10
n_item_sample = 30
n_user_sample = 30
no_progressbar = True
search_range = 30

# models
model = models.CollaborativeMetricLearning(n_user, n_item, n_dim).to(device)

# distributiuons
gaussian = distributions.Gaussian()
gamma = distributions.Gamma()

# search
knn = searches.NearestNeighborhood(model)
mp = searches.MutualProximity(model, gamma)
mymp = searches.Mymp(model, search_range)

# learning late optimizer
optimizer = optim.Adam(model.parameters(), lr=lr)

# loss function
criterion = losses.SumTripletLoss(margin=1).to(device)

# trainer
trainer = trainers.BaseTrainer(model, optimizer, criterion, sampler, no_progressbar)

In [6]:
trainer.fit(n_batch, n_epoch, knn, userwise, valid_per_epoch)

In [7]:
re_knn = trainer.valid_scores.copy()
display(re_knn)

Unnamed: 0,Recall@5,Unpopularity@5,Unpopularity2@5,Unpopularity3@5,F1-score@5,F1-score2@5,F1-score3@5,Recall@10,Unpopularity@10,Unpopularity2@10,Unpopularity3@10,F1-score@10,F1-score2@10,F1-score3@10,Recall@50,Unpopularity@50,Unpopularity2@50,Unpopularity3@50,F1-score@50,F1-score2@50,F1-score3@50,epoch,losses
0,0.062337,0.997172,10.482322,0.064993,0.098471,0.121097,0.031939,0.121669,0.99709,10.455102,0.128963,0.183381,0.234882,0.091705,0.527695,0.936738,9.808585,0.523245,0.576163,0.96645,0.517599,0,
0,0.274164,0.986569,7.123615,0.015212,0.365107,0.506581,0.019117,0.464637,0.986393,7.194426,0.0437,0.563676,0.849019,0.067144,0.816166,0.932546,8.245098,0.428271,0.835365,1.472258,0.486341,10,0.4272
0,0.363283,0.980575,6.260824,0.007799,0.465731,0.661015,0.009343,0.532555,0.983454,6.647232,0.032512,0.631884,0.963305,0.049747,0.841153,0.93239,8.068,0.421412,0.853715,1.512005,0.478548,20,0.25541
0,0.39252,0.980205,6.143147,0.006515,0.496757,0.71237,0.008902,0.558957,0.98335,6.61675,0.033331,0.656866,1.009207,0.051368,0.859716,0.932499,8.097308,0.42468,0.866995,1.543852,0.484918,30,0.197119
0,0.404649,0.98061,6.173445,0.006517,0.508962,0.733864,0.008893,0.57699,0.983687,6.658165,0.034121,0.671677,1.040231,0.053542,0.8689,0.932596,8.142012,0.427977,0.873383,1.559984,0.489894,40,0.172272
0,0.408725,0.98076,6.195148,0.007115,0.513358,0.741344,0.010063,0.583827,0.983822,6.67613,0.034601,0.677272,1.052259,0.054311,0.872609,0.932645,8.16379,0.429611,0.876143,1.566697,0.492284,50,0.160087


In [8]:
trainer.fit(n_batch, n_epoch, mp, userwise, valid_per_epoch)

In [9]:
re_mp = trainer.valid_scores.copy()
display(re_mp)

Unnamed: 0,Recall@5,Unpopularity@5,Unpopularity2@5,Unpopularity3@5,F1-score@5,F1-score2@5,F1-score3@5,Recall@10,Unpopularity@10,Unpopularity2@10,Unpopularity3@10,F1-score@10,F1-score2@10,F1-score3@10,Recall@50,Unpopularity@50,Unpopularity2@50,Unpopularity3@50,F1-score@50,F1-score2@50,F1-score3@50,epoch,losses
0,0.300104,0.990801,7.736324,0.025145,0.39832,0.556823,0.031788,0.453149,0.991643,8.079543,0.069538,0.55367,0.833837,0.096582,0.807996,0.934729,8.791622,0.466437,0.826459,1.462612,0.534909,0,
0,0.311972,0.99077,7.726884,0.024926,0.412257,0.578367,0.033384,0.465171,0.991649,8.074367,0.06916,0.56652,0.855683,0.098898,0.810989,0.934781,8.806917,0.467029,0.829215,1.46811,0.536083,10,0.152818
0,0.314612,0.990821,7.756063,0.025394,0.415056,0.583594,0.034553,0.47623,0.991725,8.079209,0.068699,0.578391,0.876181,0.10227,0.812757,0.934819,8.816359,0.467985,0.830921,1.471552,0.537421,20,0.148923
0,0.319079,0.99081,7.712113,0.024298,0.41997,0.591252,0.033133,0.477058,0.991855,8.097246,0.069496,0.579873,0.878199,0.10238,0.819729,0.934789,8.816918,0.467757,0.836129,1.483694,0.53842,30,0.145269
0,0.320737,0.990856,7.723386,0.024606,0.422482,0.59467,0.034295,0.484242,0.991785,8.095611,0.069827,0.586172,0.890697,0.103536,0.820793,0.934814,8.821656,0.467939,0.836619,1.485434,0.539013,40,0.14404
0,0.323648,0.990841,7.723171,0.024269,0.42461,0.59943,0.033747,0.483111,0.991799,8.117939,0.071048,0.585064,0.888942,0.104716,0.820628,0.934857,8.837546,0.469311,0.836742,1.485422,0.540436,50,0.142839
