### Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
import torch
from torch import Tensor
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchmetrics import MetricCollection, AUROC, Recall, Precision, F1Score, Accuracy, MatthewsCorrCoef
from torchmetrics.functional.classification import multiclass_matthews_corrcoef
from pytorch_tabnet.tab_model import TabNetClassifier
import optuna


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.preprocessing import StandardScaler
from sklearn.impute import IterativeImputer
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, FunctionTransformer, OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder, TargetEncoder, LabelEncoder
from sklearn.pipeline import make_pipeline

In [4]:
import pickle
import random
import tqdm
import gc

In [5]:
SEED = 13
random.seed(SEED)
torch.manual_seed(SEED)
np.random.seed(SEED)

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
PATH = './'
EVAL_SIZE = 0.2

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

### Preprocessing

In [133]:
df_train = pd.read_csv(PATH+'train.csv', index_col='id')
df_test = pd.read_csv(PATH+'test.csv', index_col='id')

In [134]:
display(df_train.head())
display(df_train.info(show_counts=True))
display(df_train.describe())
display(df_train.isna().sum())

Unnamed: 0_level_0,class,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,stem-width,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,e,8.8,f,s,u,f,a,c,w,4.51,15.39,,,w,,,f,f,,d,a
1,p,4.51,x,h,o,f,a,c,n,4.79,6.48,,y,o,,,t,z,,d,w
2,e,6.94,f,s,b,f,x,c,w,6.85,9.93,,s,n,,,f,f,,l,w
3,e,3.88,f,y,g,f,s,,g,4.16,6.53,,,w,,,f,f,,d,u
4,e,5.85,x,l,w,f,d,,w,3.37,8.36,,,w,,,f,f,,g,a


<class 'pandas.core.frame.DataFrame'>
Int64Index: 3116945 entries, 0 to 3116944
Data columns (total 21 columns):
 #   Column                Non-Null Count    Dtype  
---  ------                --------------    -----  
 0   class                 3116945 non-null  object 
 1   cap-diameter          3116941 non-null  float64
 2   cap-shape             3116905 non-null  object 
 3   cap-surface           2445922 non-null  object 
 4   cap-color             3116933 non-null  object 
 5   does-bruise-or-bleed  3116937 non-null  object 
 6   gill-attachment       2593009 non-null  object 
 7   gill-spacing          1858510 non-null  object 
 8   gill-color            3116888 non-null  object 
 9   stem-height           3116945 non-null  float64
 10  stem-width            3116945 non-null  float64
 11  stem-root             359922 non-null   object 
 12  stem-surface          1136084 non-null  object 
 13  stem-color            3116907 non-null  object 
 14  veil-type             159452 non-n

None

Unnamed: 0,cap-diameter,stem-height,stem-width
count,3116941.0,3116945.0,3116945.0
mean,6.309848,6.348333,11.15379
std,4.657931,2.699755,8.095477
min,0.03,0.0,0.0
25%,3.32,4.67,4.97
50%,5.75,5.88,9.65
75%,8.24,7.41,15.63
max,80.67,88.72,102.9


class                         0
cap-diameter                  4
cap-shape                    40
cap-surface              671023
cap-color                    12
does-bruise-or-bleed          8
gill-attachment          523936
gill-spacing            1258435
gill-color                   57
stem-height                   0
stem-width                    0
stem-root               2757023
stem-surface            1980861
stem-color                   38
veil-type               2957493
veil-color              2740947
has-ring                     24
ring-type                128880
spore-print-color       2849682
habitat                      45
season                        0
dtype: int64

In [135]:
display(df_test.head())
display(df_test.info(show_counts=True))
display(df_test.describe())
display(df_test.isna().sum())

Unnamed: 0_level_0,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,stem-width,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
3116945,8.64,x,,n,t,,,w,11.13,17.12,b,,w,u,w,t,g,,d,a
3116946,6.9,o,t,o,f,,c,y,1.27,10.75,,,n,,,f,f,,d,a
3116947,2.0,b,g,n,f,,c,n,6.18,3.14,,,n,,,f,f,,d,s
3116948,3.47,x,t,n,f,s,c,n,4.98,8.51,,,w,,n,t,z,,d,u
3116949,6.17,x,h,y,f,p,,y,6.73,13.7,,,y,,y,t,,,d,u


<class 'pandas.core.frame.DataFrame'>
Int64Index: 2077964 entries, 3116945 to 5194908
Data columns (total 20 columns):
 #   Column                Non-Null Count    Dtype  
---  ------                --------------    -----  
 0   cap-diameter          2077957 non-null  float64
 1   cap-shape             2077933 non-null  object 
 2   cap-surface           1631060 non-null  object 
 3   cap-color             2077951 non-null  object 
 4   does-bruise-or-bleed  2077954 non-null  object 
 5   gill-attachment       1728143 non-null  object 
 6   gill-spacing          1238369 non-null  object 
 7   gill-color            2077915 non-null  object 
 8   stem-height           2077963 non-null  float64
 9   stem-width            2077964 non-null  float64
 10  stem-root             239952 non-null   object 
 11  stem-surface          756476 non-null   object 
 12  stem-color            2077943 non-null  object 
 13  veil-type             106419 non-null   object 
 14  veil-color            251840

None

Unnamed: 0,cap-diameter,stem-height,stem-width
count,2077957.0,2077963.0,2077964.0
mean,6.306192,6.346509,11.14837
std,4.685462,2.698978,8.100181
min,0.0,0.0,0.0
25%,3.31,4.67,4.97
50%,5.74,5.88,9.64
75%,8.23,7.41,15.62
max,607.0,57.29,102.91


cap-diameter                  7
cap-shape                    31
cap-surface              446904
cap-color                    13
does-bruise-or-bleed         10
gill-attachment          349821
gill-spacing             839595
gill-color                   49
stem-height                   1
stem-width                    0
stem-root               1838012
stem-surface            1321488
stem-color                   21
veil-type               1971545
veil-color              1826124
has-ring                     19
ring-type                 86195
spore-print-color       1899617
habitat                      25
season                        0
dtype: int64

In [136]:
# for i in df_test.select_dtypes(include='object').columns:
#     print(i)
#     display(df_test[i].value_counts().head(10))
#     display(df_train[i].value_counts().head(10))
#     print()

In [137]:
# veil-type category doesnt look so good
# we do not treat NaN as different characteristic

df_train.drop('veil-type', axis=1, inplace=True)
df_test.drop('veil-type', axis=1, inplace=True)

In [138]:
target = ['class']
num_cols = ['cap-diameter', 'stem-height', 'stem-width']
cat_cols = ['cap-shape', 'cap-surface', 'cap-color',
            'does-bruise-or-bleed', 'gill-attachment', 'gill-spacing', 'gill-color',
            'stem-root', 'stem-surface', 'stem-color', #'veil-color',
            'has-ring', 'ring-type', 'spore-print-color', 'habitat', 'season']

In [139]:
class2idx = {'e':0, 'p':1}
df_train['class'].replace(class2idx, inplace=True)

In [140]:
df_train = df_train.reindex(columns=num_cols+cat_cols+target, copy=False)
df_test = df_test.reindex(columns=num_cols+cat_cols, copy=False)

In [142]:
def tail_cutter(df, column, edge):
    value_counts = df[column].value_counts()
    idx2drop = value_counts[value_counts < edge].index
    df.loc[df[column].isin(idx2drop), column] = np.nan

for col in df_test.select_dtypes(include='object'):
    tail_cutter(df_train, col, 1000)
    tail_cutter(df_test, col, 1000)

In [146]:
for num_col in num_cols:
    df_test.loc[df_test[num_col].isna(), num_col] = df_test[num_col].median()
df_train.dropna(subset=num_cols, axis=0, inplace=True)
df_test.dropna(subset=num_cols, axis=0, inplace=True)

In [147]:
df_train.fillna('nan', inplace=True)
df_test.fillna('nan', inplace=True)

In [17]:
def to_float32(x):
    return(x.astype(np.float32))

pipeline = make_pipeline(
    ColumnTransformer([('num_encode',
                        make_pipeline(StandardScaler(),
                                      # FunctionTransformer(lambda x: x.astype(np.float32))
                                      FunctionTransformer(func=to_float32),
                                     ), num_cols),

                       ('cat_encode',
                       make_pipeline(OrdinalEncoder(),
                                     # FunctionTransformer(lambda x: x.astype(np.int32))
                                     FunctionTransformer(func=to_float32),
                                    ), cat_cols)],

                       remainder='drop')
)

### Embeddings Training

##### functions and classes

In [60]:
class Base_Dataset(Dataset):
    def __init__(self, df, is_eval=False, is_test=False):
        
        self.df = df
        self.is_eval = is_eval
        self.is_test = is_test

        if self.is_test:
            self.X = self.df
        else:
            self.X, self.y = self.df.drop(target, axis=1), self.df[target].values
        
        if self.is_test or self.is_eval:
            self.X = pipeline.transform(self.X)
        else:
            self.X = pipeline.fit_transform(self.X)
        gc.collect()

    def __getitem__(self, index):
        if self.is_test: return self.X[index], -1
        return self.X[index], self.y[index]

    def __len__(self):
        return self.X.shape[0]

In [19]:
class FCNNet(nn.Module):
    def __init__(self,
                 input_dim = None,
                 layers_num = 2,
                 layers_dim = 32,
                 activation = nn.ReLU,
                 emb_szs = None,
                 dropout: float = 0.,
                ):
        super(FCNNet, self).__init__()
    
        self.embeddings = nn.ModuleList([nn.Embedding(in_sz, out_sz) for in_sz, out_sz in emb_szs])
    
        fc_layers = []
        fc_layers.append(nn.Linear(input_dim, layers_dim))
        fc_layers.append(nn.LazyBatchNorm1d())
        fc_layers.append(activation())
        fc_layers.append(nn.Dropout(p=dropout))
        for i in range(layers_num):
            fc_layers.append(nn.Linear(layers_dim, layers_dim))
            fc_layers.append(nn.LazyBatchNorm1d())
            fc_layers.append(activation())
            fc_layers.append(nn.Dropout(p=dropout))
        fc_layers.append(nn.Linear(layers_dim, 1))
    
        self.fc_layers = nn.Sequential(*fc_layers)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x_num = x[:, :3]
        x_cat = x[:, 3:].long()
        x_cat = [emb_layer(x_cat[:, i]) for i, emb_layer in enumerate(self.embeddings)]
        x_cat = torch.cat(x_cat, dim=-1)

        x = torch.cat([x_num, x_cat], dim=-1).float()
        x = self.fc_layers(x)
        return x
    

In [20]:
def train_epoch_fcnn(model, optimizer, loss_fn, train_dataloader, altmetric=None):
    model.train()
    losses = 0

    for X, y in tqdm.tqdm(train_dataloader):
        X, y = X.to(DEVICE), y.to(DEVICE)

        preds = model(X)
        optimizer.zero_grad()
        loss = loss_fn(preds, y.float())
        if altmetric: altmetric.update(preds,y.float())
        
        loss.backward()
        optimizer.step()
        losses += loss.item()

    return losses / len(train_dataloader)
    
def evaluate_fcnn(model, loss_fn, test_dataloader, altmetric=None):
    model.eval()
    losses = 0

    for X, y in test_dataloader:
        X, y = X.to(DEVICE), y.to(DEVICE)
        
        preds = model(X)
        loss = loss_fn(preds, y.float())
        if altmetric: altmetric.update(preds,y.float())
        losses += loss.item()

    return losses / len(test_dataloader)


##### training

In [21]:
# df_train_train, df_train_eval = train_test_split(df_train, test_size=EVAL_SIZE, random_state=SEED,
#                                                  shuffle=True, stratify=df_train[target])

# dataset_train = Base_Dataset(df_train_train)
# dataset_eval = Base_Dataset(df_train_eval, is_eval=True)

# display(len(dataset_train))
# display(len(dataset_eval))


In [22]:
# DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# INPUT_DIM = sum([df_train_train[cat_col].nunique()//2+1
#                  for cat_col in cat_cols]) + 3
# LAYERS_NUM = 2
# LAYERS_DIM = 256
# ACTIVATION = nn.ReLU
# EMB_SZS = [[df_train_train[cat_col].nunique(),
#             df_train_train[cat_col].nunique()//2+1]
#            for cat_col in cat_cols]
# DROPOUT = 0.25

# NUM_EPOCHS = 32
# BATCH_SIZE = 1024*8 #2048
# LR = 0.001 #0.001
# WEIGHT_DECAY = 2e-5

# fcnn = FCNNet(input_dim=INPUT_DIM,
#               layers_num=LAYERS_NUM,
#               layers_dim=LAYERS_DIM,
#               activation=ACTIVATION,
#               emb_szs=EMB_SZS,
#               dropout=DROPOUT
#               )

# altmetric_train = MetricCollection([AUROC(task='binary'),
#                                     Recall(task='binary'),
#                                     Precision(task='binary'),
#                                     F1Score(task='binary'),
#                                     Accuracy(task='binary'),
#                                     MatthewsCorrCoef(task='binary')
#                                    ])
# altmetric_eval = MetricCollection([AUROC(task='binary'),
#                                    Recall(task='binary'),
#                                    Precision(task='binary'),
#                                    F1Score(task='binary'),
#                                    Accuracy(task='binary'),
#                                    MatthewsCorrCoef(task='binary')
#                                   ])

# fcnn.to(DEVICE)
# altmetric_train.to(DEVICE)
# altmetric_eval.to(DEVICE)

# trainloader = torch.utils.data.DataLoader(dataset_train,
#                                           batch_size=BATCH_SIZE, shuffle=True,
#                                           num_workers=8, drop_last=False)
# evalloader = torch.utils.data.DataLoader(dataset_eval,
#                                          batch_size=BATCH_SIZE, shuffle=True,
#                                          num_workers=8, drop_last=False)

# loss_fn = nn.BCEWithLogitsLoss()
# optimizer = torch.optim.AdamW(fcnn.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)


In [23]:
# %%time
# PRINT_EVERY = 4

# for epoch in range(1, NUM_EPOCHS+1):
#     train_loss = train_epoch_fcnn(fcnn, optimizer, loss_fn, trainloader, altmetric=altmetric_train)
#     eval_loss = evaluate_fcnn(fcnn, loss_fn, evalloader, altmetric=altmetric_eval)
#     print((f"Epoch: {epoch}, Train loss: {train_loss:.5f}, Val loss: {eval_loss:.5f}"))

#     if ((epoch)%PRINT_EVERY==0):
#         print('Train')
#         for j in [(i, round(altmetric_train[i].compute().item(), 5))
#                   for i in altmetric_train.keys()]: print(j)
#         print()
#         print('Test')
#         for j in [(i, round(altmetric_eval[i].compute().item(), 5))
#                   for i in altmetric_eval.keys()]: print(j)
#         print()

#     altmetric_train.reset()
#     altmetric_eval.reset()

In [24]:
# with open(PATH+'pipeline', 'wb') as fp:
#     pickle.dump(pipeline, fp)

In [25]:
# with open(PATH+'embeddings', 'wb') as fp:
#     pickle.dump(fcnn.embeddings, fp)

##### optuna fine-tunning

In [26]:
# df_train_train, df_train_eval = train_test_split(df_train, test_size=EVAL_SIZE, random_state=SEED,
#                                                  shuffle=True, stratify=df_train[target])

# dataset_train = Base_Dataset(df_train_train)
# dataset_eval = Base_Dataset(df_train_eval, is_eval=True)

In [27]:
ACTIVATIONS = {'ReLU': nn.ReLU,
               'SELU': nn.SELU,
               'GELU': nn.GELU,
               'RReLU': nn.RReLU,
               'SiLU': nn.SiLU,
               'LeakyReLU': nn.LeakyReLU,
               'IDENTITY': nn.Identity,
              }

def objective(trial):

    # model's params
    INPUT_DIM = sum([df_train_train[cat_col].nunique()//2+1
                 for cat_col in cat_cols]) + 3
    LAYERS_NUM = trial.suggest_int('LAYERS_NUM', 2, 4, step=1)
    LAYERS_DIM = trial.suggest_int('LAYERS_DIM', 256, 512, step=64) #64
    ACTIVATION_OPTIONS = trial.suggest_categorical('ACTIVATION', ['ReLU', 'SELU', 'GELU', 'RReLU'])
    ACTIVATION = ACTIVATIONS[ACTIVATION_OPTIONS]
    EMB_SZS = [[df_train_train[cat_col].nunique(),
                df_train_train[cat_col].nunique()//2+1]
               for cat_col in cat_cols]
    DROPOUT = trial.suggest_float('DROPOUT', 0, 0.5)
    
    # learning params
    NUM_EPOCHS = trial.suggest_int('NUM_EPOCHS', 8, 32, step=4)
    BATCH_SIZE = trial.suggest_int('BATCH_SIZE', 1024, 4096, step=1024)
    LR = trial.suggest_float('LR', 1e-5, 1e-3, log=True)
    WEIGHT_DECAY = trial.suggest_float('WEIGHT_DECAY', 1e-8, 1e-4, log=True)
    
    fcnn = FCNNet(input_dim=INPUT_DIM,
                  layers_num=LAYERS_NUM,
                  layers_dim=LAYERS_DIM,
                  activation=ACTIVATION,
                  emb_szs=EMB_SZS,
                  dropout=DROPOUT
                  )
    # altmetric_train = MetricCollection([
    #                                     AUROC(task='binary'),
    #                                     Recall(task='binary'),
    #                                     Precision(task='binary'),
    #                                     F1Score(task='binary'),
    #                                     Accuracy(task='binary'),
    #                                     MatthewsCorrCoef(task='binary')
    #                                    ])
    
    altmetric_eval = MetricCollection([
                                       # AUROC(task='binary'),
                                       # Recall(task='binary'),
                                       # Precision(task='binary'),
                                       # F1Score(task='binary'),
                                       # Accuracy(task='binary'),
                                       MatthewsCorrCoef(task='binary')
                                      ])
    
    fcnn.to(DEVICE)
    # altmetric_train.to(DEVICE)
    altmetric_eval.to(DEVICE)
    
    trainloader = torch.utils.data.DataLoader(dataset_train,
                                              batch_size=BATCH_SIZE, shuffle=True,
                                              num_workers=8, drop_last=False)
    evalloader = torch.utils.data.DataLoader(dataset_eval,
                                             batch_size=BATCH_SIZE, shuffle=True,
                                             num_workers=8, drop_last=False)
    
    loss_fn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.AdamW(fcnn.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)

    for epoch in range(1, NUM_EPOCHS+1):
        train_loss = train_epoch_fcnn(fcnn, optimizer, loss_fn, trainloader)
        
        if ((epoch)%4==0):
            eval_loss = evaluate_fcnn(fcnn, loss_fn, evalloader, altmetric=altmetric_eval)
            intermid_value = altmetric_eval['BinaryMatthewsCorrCoef'].compute().item()
            # trial.report(eval_loss, epoch)
            trial.report(intermid_value, epoch)
            altmetric_eval.reset()
            if trial.should_prune():
                raise optuna.TrialPruned()

    eval_loss = evaluate_fcnn(fcnn, loss_fn, evalloader, altmetric=altmetric_eval)

    torch.cuda.empty_cache()
    gc.collect()
    # return eval_loss
    return altmetric_eval['BinaryMatthewsCorrCoef'].compute().item()

In [28]:
# sampler = optuna.samplers.TPESampler(seed=SEED)
# # storage = optuna.storages.InMemoryStorage()

# study = optuna.create_study(direction='maximize', sampler=sampler,
#                             study_name='fcnn-study_matthews1', storage='sqlite:///fcnn-study_matthews.db', load_if_exists=True,
#                             pruner=optuna.pruners.MedianPruner(n_startup_trials=16,
#                                                                n_warmup_steps=8)
#                            )
# study.optimize(objective, n_trials=200)


### DAE Training

##### functions and classes

In [29]:
class DAE(nn.Module):
    def __init__(self,
                 input_dim = None,
                 layers_num: int = 3,
                 layers_dim: int = 64,
                 activation = nn.ReLU,
                 emb_szs = None,
                 emb_weights = False,
                 dropout: float = 0.,
                 swapnoise_ratio = 0.15,
                 return_obfuscation_mask = False,
                ):
        super(DAE, self).__init__()
    
        self.embeddings = nn.ModuleList([nn.Embedding(in_sz, out_sz) for in_sz, out_sz in emb_szs])
        if (emb_weights==True):
            for i, emb in enumerate(embeddings):
                emb.weight = emb_weights[i].weight
            for p in embeddings.parameters():
                p.requires_grad_(False)
        self.swapnoise_ratio = swapnoise_ratio
        self.return_obfuscation_mask = return_obfuscation_mask
    
        dae_layers = []
        dae_layers.append(nn.Linear(input_dim, layers_dim))
        dae_layers.append(activation())
        dae_layers.append(nn.Dropout(p=dropout))
        for i in range(layers_num):
            dae_layers.append(nn.Linear(layers_dim, layers_dim))
            dae_layers.append(activation())
            dae_layers.append(nn.Dropout(p=dropout))
        dae_layers.append(nn.Linear(layers_dim, input_dim))
    
        self.dae = nn.Sequential(*dae_layers)

        ###########################
        # distinctions for make_denoise
        self.dae_layers = []
        self.dae_layers.append([list(self.dae.children())[0]])
        for i in range(1, len(self.dae)-3, 3):
            self.dae_layers.append(list(self.dae.children())[:i+2+1])
        self.dae_layers = [nn.Sequential(*i) for i in self.dae_layers]

    def forward(self, x: torch.Tensor):

        with torch.no_grad():
            x_orig = x.clone().detach()
            x_orig, _ = self.add_swapnoise(x_orig, ratio=self.swapnoise_ratio)
            x_orig = self.make_embedded(x_orig).detach()

        x = self.make_embedded(x)
        x = self.dae(x)
        # if (self.return_obfuscation_mask==True): return x, x_orig, _
        # return x, x_orig, None
        return x, x_orig

    def make_denoise(self, x: torch.Tensor):
        if len(x.shape) == 1: x = x.unsqueeze(0)
        x = self.make_embedded(x)
        return torch.cat([i(x) for i in self.dae_layers][1:], dim=-1) #dropping first output

    def make_embedded(self, x: torch.Tensor):
        x_num = x[:, :3]
        x_cat = x[:, 3:].long()
        x_cat = [emb_layer(x_cat[:, i]) for i, emb_layer in enumerate(self.embeddings)]
        x_cat = torch.cat(x_cat, dim=-1)
        x = torch.cat([x_num, x_cat], dim=-1).float()
        return x

    # https://www.kaggle.com/code/ryanzhang/pytorch-dae-starter-code
    # it's a row permutation noising by the way
    def add_swapnoise(self, x, ratio=0.15):
        obfuscation_mask = torch.bernoulli(ratio * torch.ones(x.shape)).to(DEVICE)
        
        # for row-column-wise noising
        # obfuscated_x = torch.where(obfuscation_mask == 1, x[torch.randperm(x.shape[0])][:, torch.randperm(x.shape[1])], x)
        # for column-wise noising
        # obfuscated_x = torch.where(obfuscation_mask == 1, x[torch.randperm([:, torch.randperm(x.shape[1])], x)
        # for row-wise noising 
        obfuscated_x = torch.where(obfuscation_mask == 1, x[torch.randperm(x.shape[0])], x)
        
        return obfuscated_x, obfuscation_mask

In [30]:
class FCHead(nn.Module):
    def __init__(self,
                 dae_model,
                 dae_out_dim: int = 1024*4,
                 # layers_num: int = 2,
                 # layers_dim: int = 32,
                 activation = nn.ReLU,
                 dropout: float = 0.,
                 feature_dim = 256,
                ):
        super(FCHead, self).__init__()

        self.dae_model = dae_model
        for p in self.dae_model.parameters():
            p.requires_grad_(False)
        
        fc_layers = []
        fc_layers.append(nn.Linear(dae_out_dim, dae_out_dim//4))
        fc_layers.append(nn.LazyBatchNorm1d())
        fc_layers.append(activation())
        fc_layers.append(nn.Dropout(p=dropout))
        fc_layers.append(nn.Linear(dae_out_dim//4, dae_out_dim//16))
        fc_layers.append(nn.LazyBatchNorm1d())
        fc_layers.append(activation())
        fc_layers.append(nn.Dropout(p=dropout))
        fc_layers.append(nn.Linear(dae_out_dim//16, 1))
        
        self.fc_layers = nn.Sequential(*fc_layers)
        self.feature_extractor = nn.Sequential(*list(self.fc_layers.children())[0:5])

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.dae_model.make_denoise(x)
        x = self.fc_layers(x)
        return x

    def feature_maker(self, x: torch.Tensor):
        x = self.dae_model.make_denoise(x)
        x = self.feature_extractor(x)
        return x
    

In [31]:
def train_epoch_dae(model, optimizer, loss_fn, train_dataloader):
    model.train()
    losses = 0

    for X, y in tqdm.tqdm(train_dataloader):
        X = X.to(DEVICE)

        # preds, orig, mask = model(X)
        preds, orig = model(X)
        optimizer.zero_grad()
        # loss = loss_fn(preds, orig, mask)
        loss = loss_fn(preds, orig)
        
        loss.backward()
        optimizer.step()
        losses += loss.item()

    return losses / len(train_dataloader)
    
def evaluate_dae(model, loss_fn, test_dataloader):
    model.eval()
    losses = 0

    for X, y in test_dataloader:
        X = X.to(DEVICE)
        
        # preds, orig, mask = model(X)
        preds, orig = model(X)
        # loss = loss_fn(preds, orig, mask)
        loss = loss_fn(preds, orig)
        losses += loss.item()

    return losses / len(test_dataloader)


In [32]:
class MSE_Weighted(nn.Module):
    # when no mask and emphasis = 1 - equvivalent to MSE
    def __init__(self, emphasis=1):
        self.emphasis = emphasis
        # emphasis between 0 and 1
        super().__init__()

    def forward(self, pred, actual, mask=None):
        if (mask is None): mask = torch.ones(pred.shape).to(DEVICE)
        loss_weights = mask * self.emphasis + (1 - mask) * (1 - self.emphasis)
        unweighted_loss = nn.functional.mse_loss(pred, actual, reduction='none')
        weighted_loss = loss_weights * unweighted_loss
        return weighted_loss.mean()


##### training

In [33]:
# with open(PATH+'pipeline', 'rb') as fp:
#     pipeline = pickle.load(fp)

In [34]:
# with open(PATH+'embeddings', 'rb') as fp:
#     embeddings = pickle.load(fp)

In [35]:
# df_combined = pd.concat([df_train, df_test], axis=0)
# df_combined['class'].fillna(-1, inplace=True)

In [36]:
# df_combined_train, df_combined_eval = train_test_split(df_combined, test_size=EVAL_SIZE, random_state=SEED, shuffle=True)

# # is_eval to not to override the pipeline and to not return the y
# dataset_dae_train = Base_Dataset(df_combined_train, is_eval=True)
# dataset_dae_eval = Base_Dataset(df_combined_eval, is_eval=True)

# display(len(dataset_train))
# display(len(dataset_eval))


In [37]:
# DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# INPUT_DIM = sum([df_combined_train[cat_col].nunique()//2+1
#                  for cat_col in cat_cols]) + 3
# LAYERS_NUM = 3
# LAYERS_DIM = 2048
# ACTIVATION = nn.ReLU
# EMB_SZS = [[df_combined_train[cat_col].nunique(),
#             df_combined_train[cat_col].nunique()//2+1]
#            for cat_col in cat_cols]
# EMB_WEIGHTS = embeddings
# DROPOUT = 0.
# SWAPNOISE_RATIO = 0.15

# NUM_EPOCHS = 16
# BATCH_SIZE = 1024*4 #96
# LR = 1e-4 #2e-4 
# WEIGHT_DECAY = 1e-6 #6e-5

# dae = DAE(input_dim = INPUT_DIM,
#           layers_num = LAYERS_NUM,
#           layers_dim = LAYERS_DIM,
#           activation = ACTIVATION,
#           emb_szs = EMB_SZS,
#           emb_weights = EMB_WEIGHTS,
#           dropout = DROPOUT,
#           swapnoise_ratio = SWAPNOISE_RATIO,
#           return_obfuscation_mask=False)
# dae.to(DEVICE)

# trainloader_dae = torch.utils.data.DataLoader(dataset_dae_train,
#                                           batch_size=BATCH_SIZE, shuffle=True,
#                                           num_workers=8, drop_last=False)
# evalloader_dae = torch.utils.data.DataLoader(dataset_dae_eval,
#                                          batch_size=BATCH_SIZE, shuffle=True,
#                                          num_workers=8, drop_last=False)

# # loss_fn = MSE_Weighted(emphasis=4/5)
# # loss_fn = MSE_Weighted(emphasis=1)
# loss_fn = nn.MSELoss()
# optimizer = torch.optim.AdamW(dae.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)


In [38]:
# %%time
# for epoch in range(1, NUM_EPOCHS+1):
#     train_loss = train_epoch_dae(dae, optimizer, loss_fn, trainloader_dae)
#     eval_loss = evaluate_dae(dae, loss_fn, evalloader_dae)
#     print((f"Epoch: {epoch}, Train loss: {train_loss:.5f}, Val loss: {eval_loss:.5f}"))


In [39]:
# df_train_train, df_train_eval = train_test_split(df_train, test_size=EVAL_SIZE, random_state=SEED,
#                                                  shuffle=True, stratify=df_train[target])

# dataset_train = Base_Dataset(df_train_train, is_eval=True)
# dataset_eval = Base_Dataset(df_train_eval, is_eval=True)

# display(len(dataset_train))
# display(len(dataset_eval))

In [40]:
# DAE_MODEL = dae
# DAE_OUT_DUM = 2048*3
# FEATURE_DIM = 256
# ACTIVATION = nn.ReLU
# DROPOUT = 0.

# NUM_EPOCHS = 8
# BATCH_SIZE = 1024*4 #96
# LR = 1e-3 #2e-4 
# WEIGHT_DECAY = 1e-6 #6e-5


# fchead = FCHead(dae_model = DAE_MODEL,
#                 dae_out_dim = DAE_OUT_DUM,
#                 feature_dim = FEATURE_DIM,
#                 activation = ACTIVATION,
#                 dropout = DROPOUT)

# altmetric_train = MetricCollection([AUROC(task='binary'),
#                                     Recall(task='binary'),
#                                     Precision(task='binary'),
#                                     F1Score(task='binary'),
#                                     Accuracy(task='binary'),
#                                     MatthewsCorrCoef(task='binary')
#                                    ])
# altmetric_eval = MetricCollection([AUROC(task='binary'),
#                                    Recall(task='binary'),
#                                    Precision(task='binary'),
#                                    F1Score(task='binary'),
#                                    Accuracy(task='binary'),
#                                    MatthewsCorrCoef(task='binary')
#                                   ])

# fchead.to(DEVICE)
# altmetric_train.to(DEVICE)
# altmetric_eval.to(DEVICE)

# trainloader = torch.utils.data.DataLoader(dataset_train,
#                                           batch_size=BATCH_SIZE, shuffle=True,
#                                           num_workers=8, drop_last=False)
# evalloader = torch.utils.data.DataLoader(dataset_eval,
#                                          batch_size=BATCH_SIZE, shuffle=True,
#                                          num_workers=8, drop_last=False)

# loss_fn = nn.BCEWithLogitsLoss()
# optimizer = torch.optim.AdamW(fchead.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)


In [41]:
# %%time
# PRINT_EVERY = 1

# for epoch in range(1, NUM_EPOCHS+1):
#     train_loss = train_epoch_fcnn(fchead, optimizer, loss_fn, trainloader, altmetric=altmetric_train)
#     eval_loss = evaluate_fcnn(fchead, loss_fn, evalloader, altmetric=altmetric_eval)
#     print((f"Epoch: {epoch}, Train loss: {train_loss:.5f}, Val loss: {eval_loss:.5f}"))

#     if ((epoch)%PRINT_EVERY==0):
#         print('Train')
#         for j in [(i, round(altmetric_train[i].compute().item(), 5))
#                   for i in altmetric_train.keys()]: print(j)
#         print()
#         print('Test')
#         for j in [(i, round(altmetric_eval[i].compute().item(), 5))
#                   for i in altmetric_eval.keys()]: print(j)
#         print()

#     altmetric_train.reset()
#     altmetric_eval.reset()

##### optuna fine-tunning

In [42]:
# with open(PATH+'pipeline', 'rb') as fp:
#     pipeline = pickle.load(fp)
# with open(PATH+'embeddings', 'rb') as fp:
#     embeddings = pickle.load(fp)

In [43]:
# df_combined = pd.concat([df_train, df_test], axis=0)
# df_combined['class'].fillna(-1, inplace=True)

# df_combined_train, df_combined_eval = train_test_split(df_combined, test_size=EVAL_SIZE, random_state=SEED, shuffle=True)
# dataset_dae_train = Base_Dataset(df_combined, is_eval=True) #df_combined_train
# dataset_dae_eval = Base_Dataset(df_combined_eval, is_eval=True)

# df_train_train, df_train_eval = train_test_split(df_train, test_size=EVAL_SIZE, random_state=SEED,
#                                                  shuffle=True, stratify=df_train[target])
# dataset_train = Base_Dataset(df_train_train, is_eval=True)
# dataset_eval = Base_Dataset(df_train_eval, is_eval=True)


In [44]:
# ACTIVATIONS = {'ReLU': nn.ReLU,
#                'SELU': nn.SELU,
#                'GELU': nn.GELU,
#                'RReLU': nn.RReLU,
#                'SiLU': nn.SiLU,
#                'LeakyReLU': nn.LeakyReLU,
#                'IDENTITY': nn.Identity,
#               }

# def objective(trial):

#     # model's params
#     INPUT_DIM = sum([df_train_train[cat_col].nunique()//2+1
#                  for cat_col in cat_cols]) + 3
#     LAYERS_NUM_DAE = trial.suggest_int('LAYERS_NUM_DAE', 2, 4, step=1)
#     LAYERS_DIM_DAE = trial.suggest_int('LAYERS_DIM_DAE', 512, 2048, step=512) #64
#     ACTIVATION_OPTIONS = trial.suggest_categorical('ACTIVATION_DAE', ['ReLU', 'SELU', 'GELU', 'RReLU'])
#     ACTIVATION_DAE = ACTIVATIONS[ACTIVATION_OPTIONS]
#     EMB_SZS = [[df_combined_train[cat_col].nunique(),
#                 df_combined_train[cat_col].nunique()//2+1]
#                for cat_col in cat_cols]
#     EMB_WEIGHTS = embeddings
#     DROPOUT_DAE = trial.suggest_float('DROPOUT_DAE', 0, 0.3)
#     SWAPNOISE_RATIO = trial.suggest_float('SWAPNOISE_RATIO', 0.1, 0.3)
    
#     # learning params
#     NUM_EPOCHS_DAE = trial.suggest_int('NUM_EPOCHS_DAE', 16, 64, step=8)
#     BATCH_SIZE_DAE = trial.suggest_int('BATCH_SIZE_DAE', 1024, 4096, step=1024)
#     LR_DAE = trial.suggest_float('LR_DAE', 1e-5, 1e-3, log=True)
#     WEIGHT_DECAY_DAE = trial.suggest_float('WEIGHT_DECAY_DAE', 1e-8, 1e-4, log=True)
    
#     dae = DAE(input_dim = INPUT_DIM,
#               layers_num = LAYERS_NUM_DAE,
#               layers_dim = LAYERS_DIM_DAE,
#               activation = ACTIVATION_DAE,
#               emb_szs = EMB_SZS,
#               emb_weights = EMB_WEIGHTS,
#               dropout = DROPOUT_DAE,
#               swapnoise_ratio = SWAPNOISE_RATIO,
#               return_obfuscation_mask=False)
#     dae.to(DEVICE)
    
#     trainloader_dae = torch.utils.data.DataLoader(dataset_dae_train,
#                                                   batch_size=BATCH_SIZE_DAE, shuffle=True,
#                                                   num_workers=4, drop_last=False)
#     evalloader_dae = torch.utils.data.DataLoader(dataset_dae_eval,
#                                                  batch_size=BATCH_SIZE_DAE, shuffle=True,
#                                                  num_workers=4, drop_last=False)
    
#     loss_fn = nn.MSELoss()
#     optimizer = torch.optim.AdamW(dae.parameters(), lr=LR_DAE, weight_decay=WEIGHT_DECAY_DAE)

#     for epoch in range(1, NUM_EPOCHS_DAE+1):
#         train_loss = train_epoch_dae(dae, optimizer, loss_fn, trainloader_dae)
        
#         if ((epoch)%4==0):
#             eval_loss = evaluate_dae(dae, loss_fn, evalloader_dae)
#             trial.report(eval_loss, epoch)
#             # if trial.should_prune():
#             #     raise optuna.TrialPruned()

#     DAE_MODEL = dae
#     DAE_OUT_DIM = LAYERS_NUM_DAE*LAYERS_DIM_DAE
#     FEATURE_DIM = 1024 # not using
#     ACTIVATION = nn.ReLU
#     DROPOUT = 0.

#     NUM_EPOCHS = trial.suggest_int('NUM_EPOCHS', 8, 16, step=4)
#     BATCH_SIZE = trial.suggest_int('BATCH_SIZE', 1024, 4096, step=1024)
#     LR = trial.suggest_float('LR', 1e-4, 1e-2, log=True)
#     WEIGHT_DECAY = trial.suggest_float('WEIGHT_DECAY', 1e-6, 1e-3, log=True)

#     fchead = FCHead(dae_model = DAE_MODEL,
#                     dae_out_dim = DAE_OUT_DIM,
#                     feature_dim = FEATURE_DIM,
#                     activation = ACTIVATION,
#                     dropout = DROPOUT)
    
#     altmetric_eval = MetricCollection([
#                                        # AUROC(task='binary'),
#                                        # Recall(task='binary'),
#                                        # Precision(task='binary'),
#                                        # F1Score(task='binary'),
#                                        # Accuracy(task='binary'),
#                                        MatthewsCorrCoef(task='binary')
#                                       ])
    
#     fchead.to(DEVICE)
#     altmetric_eval.to(DEVICE)

#     trainloader = torch.utils.data.DataLoader(dataset_train,
#                                               batch_size=BATCH_SIZE, shuffle=True,
#                                               num_workers=4, drop_last=False)
#     evalloader = torch.utils.data.DataLoader(dataset_eval,
#                                              batch_size=BATCH_SIZE, shuffle=True,
#                                              num_workers=4, drop_last=False)

#     loss_fn = nn.BCEWithLogitsLoss()
#     optimizer = torch.optim.AdamW(fchead.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)


#     for epoch in range(1, NUM_EPOCHS+1):
#         train_loss = train_epoch_fcnn(fchead, optimizer, loss_fn, trainloader)

#     eval_loss = evaluate_fcnn(fchead, loss_fn, evalloader, altmetric=altmetric_eval)

#     torch.cuda.empty_cache()
#     gc.collect()
#     # return eval_loss
#     return altmetric_eval['BinaryMatthewsCorrCoef'].compute().item()
    

In [45]:
# sampler = optuna.samplers.TPESampler(seed=SEED)
# # storage = optuna.storages.InMemoryStorage()

# study = optuna.create_study(direction='maximize', sampler=sampler,
#                             study_name='fchead-study_matthews1', storage='sqlite:///fcnn-study_matthews.db', load_if_exists=True,
#                             pruner=optuna.pruners.MedianPruner(n_startup_trials=16,
#                                                                n_warmup_steps=8)
#                            )
# study.optimize(objective, n_trials=200)


### TabNet

##### preprocessing

In [46]:
with open(PATH+'pipeline', 'rb') as fp:
    pipeline = pickle.load(fp)
with open(PATH+'embeddings', 'rb') as fp:
    embeddings = pickle.load(fp)

  return torch.load(io.BytesIO(b))


In [47]:
from pytorch_tabnet.tab_model import TabNetClassifier
from pytorch_tabnet.pretraining import TabNetPretrainer
from pytorch_tabnet.metrics import Metric as TabMetric
from sklearn.metrics import matthews_corrcoef

In [89]:
df_train_train, df_train_eval = train_test_split(df_train, test_size=EVAL_SIZE, random_state=SEED,
                                                 shuffle=True, stratify=df_train[target])

dataset_train = Base_Dataset(df_train) #df_train_train
dataset_eval = Base_Dataset(df_train_eval, is_eval=True)

display(len(dataset_train))
display(len(dataset_eval))


3116941

623389

2077956

In [90]:
BATCH_SIZE = 4096

trainloader = torch.utils.data.DataLoader(dataset_train,
                                          batch_size=BATCH_SIZE, shuffle=True,
                                          num_workers=8, drop_last=False)
evalloader = torch.utils.data.DataLoader(dataset_eval,
                                         batch_size=BATCH_SIZE, shuffle=True,
                                         num_workers=8, drop_last=False)


In [91]:
def make_embedded(embeddings, loader):
    embeddings.eval()
    X_array, y_array = [], []

    with torch.no_grad():
        for X, y in loader:
            X, y = X.to(DEVICE), y.to(DEVICE)
            
            X_num = X[:, :3]
            X_cat = X[:, 3:].long()
            X_cat = [emb_layer(X_cat[:, i]) for i, emb_layer in enumerate(embeddings)]
            X_cat = torch.cat(X_cat, dim=-1)
            # print(X_num.shape, X_cat.shape)
            X = torch.cat([X_num, X_cat], dim=-1).float()

            X, y = X.detach().cpu().numpy(), y.detach().cpu().numpy()
            X_array.append(X)
            y_array.append(y)
            
    X_array = np.concatenate(X_array, axis=0)
    y_array = np.concatenate(y_array, axis=0)
    return X_array, y_array
    

In [92]:
X_train, y_train = make_embedded(embeddings, trainloader)
X_eval, y_eval = make_embedded(embeddings, evalloader)


In [52]:
class matthews(TabMetric):
    def __init__(self):
        self._name = "matthews"
        self._maximize = True

    def __call__(self, y_true, y_score):
        # return matthews_corrcoef((y_true > 0.5).astype('int'), y_score)
        return multiclass_matthews_corrcoef(torch.Tensor(y_score),
                                            torch.Tensor(y_true),
                                            num_classes=2)\
               .item()

##### training

In [54]:
# # for direct training
# X_train, y_train = df_train_train[num_cols+cat_cols], df_train_train[target].values
# X_train = pipeline.transform(X_train)
# X_eval, y_eval = df_train_eval[num_cols+cat_cols], df_train_eval[target].values
# X_eval = pipeline.transform(X_eval)

# cat_idxs = list(range(3, 18))
# cat_dims = [df_train_train[cat_col].nunique() for cat_col in cat_cols]


In [55]:
MASK_TYPE = 'entmax'
LR = 0.04
PRETRAIN_RATIO = 0.8
N_D = 64
N_A = N_D
N_STEPS = 6
GAMMA = 1.5
N_INDEPENDENT = 5
N_SHARED = 3

MAX_EPOCHES = 64
BATCH_SIZE = 1024*32
VIRT_BATCH_SIZE = 256

unsupervised_model = TabNetPretrainer(n_d=N_D,
                                      n_a=N_A,
                                      n_steps=N_STEPS,
                                      gamma=GAMMA,
                                      n_independent=N_INDEPENDENT,
                                      n_shared=N_SHARED,
                                      optimizer_fn=torch.optim.Adam,
                                      optimizer_params=dict(lr=LR),
                                      mask_type=MASK_TYPE,
                                      device_name=DEVICE,
                                      seed=SEED)

unsupervised_model.fit(X_train=X_train,
                       eval_set=[X_eval],
                       pretraining_ratio=PRETRAIN_RATIO,
                       max_epochs=MAX_EPOCHES,
                       batch_size=BATCH_SIZE,
                       virtual_batch_size=VIRT_BATCH_SIZE,
                       num_workers=4,
                       patience=8)




epoch 0  | loss: 3.05603 | val_0_unsup_loss_numpy: 0.9530799984931946|  0:02:31s
epoch 1  | loss: 0.83271 | val_0_unsup_loss_numpy: 0.9931700229644775|  0:04:57s
epoch 2  | loss: 0.76134 | val_0_unsup_loss_numpy: 0.8679800033569336|  0:07:24s
epoch 3  | loss: 0.71029 | val_0_unsup_loss_numpy: 0.7047399878501892|  0:10:00s
epoch 4  | loss: 0.66579 | val_0_unsup_loss_numpy: 0.6416100263595581|  0:12:34s
epoch 5  | loss: 0.62994 | val_0_unsup_loss_numpy: 0.546209990978241|  0:15:05s
epoch 6  | loss: 0.60968 | val_0_unsup_loss_numpy: 0.5282899737358093|  0:17:34s
epoch 7  | loss: 0.58982 | val_0_unsup_loss_numpy: 0.5473200082778931|  0:20:02s
epoch 8  | loss: 0.57451 | val_0_unsup_loss_numpy: 0.5228999853134155|  0:22:32s
epoch 9  | loss: 0.55794 | val_0_unsup_loss_numpy: 0.5154399871826172|  0:25:00s
epoch 10 | loss: 0.54012 | val_0_unsup_loss_numpy: 0.5223900079727173|  0:27:30s
epoch 11 | loss: 0.52764 | val_0_unsup_loss_numpy: 0.5306500196456909|  0:29:58s
epoch 12 | loss: 0.52567 | va



In [56]:
clf = TabNetClassifier(n_d=N_D,
                       n_a=N_A,
                       n_steps=N_STEPS,
                       gamma=GAMMA,
                       n_independent=N_INDEPENDENT,
                       n_shared=N_SHARED,
                       optimizer_fn=torch.optim.Adam,
                       optimizer_params=dict(lr=LR),
                       mask_type=MASK_TYPE,
                       device_name=DEVICE,
                       seed=SEED)

clf.fit(X_train, y_train.reshape(-1,),
        eval_set=[(X_eval, y_eval.reshape(-1,))],
        eval_metric=[matthews],
        max_epochs=MAX_EPOCHES,
        batch_size=BATCH_SIZE,
        virtual_batch_size=VIRT_BATCH_SIZE,
        from_unsupervised=unsupervised_model,
        num_workers=4,
        patience=8)



epoch 0  | loss: 0.53561 | val_0_matthews: 0.85514 |  0:01:56s
epoch 1  | loss: 0.05024 | val_0_matthews: 0.96693 |  0:03:55s
epoch 2  | loss: 0.04503 | val_0_matthews: 0.98182 |  0:05:53s
epoch 3  | loss: 0.04313 | val_0_matthews: 0.98279 |  0:07:50s
epoch 4  | loss: 0.0417  | val_0_matthews: 0.98349 |  0:09:48s
epoch 5  | loss: 0.04087 | val_0_matthews: 0.98374 |  0:11:47s
epoch 6  | loss: 0.04028 | val_0_matthews: 0.98429 |  0:13:45s
epoch 7  | loss: 0.0398  | val_0_matthews: 0.98434 |  0:15:44s
epoch 8  | loss: 0.03964 | val_0_matthews: 0.98457 |  0:17:43s
epoch 9  | loss: 0.03921 | val_0_matthews: 0.98463 |  0:19:43s
epoch 10 | loss: 0.03886 | val_0_matthews: 0.98475 |  0:21:41s
epoch 11 | loss: 0.03839 | val_0_matthews: 0.9851  |  0:23:39s
epoch 12 | loss: 0.03843 | val_0_matthews: 0.98508 |  0:25:37s
epoch 13 | loss: 0.03813 | val_0_matthews: 0.98504 |  0:27:35s
epoch 14 | loss: 0.03808 | val_0_matthews: 0.98544 |  0:29:32s
epoch 15 | loss: 0.03752 | val_0_matthews: 0.98538 |  0



##### optuna fune-tunning

In [57]:
# X_train, y_train = make_embedded(embeddings, trainloader)
# X_eval, y_eval = make_embedded(embeddings, evalloader)


In [58]:
def objective(trial):

    MASK_TYPE = trial.suggest_categorical('MASK_TYPE', ['entmax', 'sparsemax'])
    LR = trial.suggest_float('LR', 1e-3, 1e-1, log=True)
    PRETRAIN_RATIO = trial.suggest_float('PRETRAIN_RATIO', 0.2, 0.8, log=True)
    N_D = trial.suggest_int('N_D', 16, 64, step=16)
    N_A = N_D
    N_STEPS = trial.suggest_int('N_STEPS', 3, 6, step=1)
    GAMMA = trial.suggest_float('GAMMA', 1.0, 2.0, log=True)
    N_INDEPENDENT = trial.suggest_int('N_INDEPENDENT', 1, 5, step=1)
    N_SHARED = trial.suggest_int('N_SHARED', 1, 5, step=1)
    
    MAX_EPOCHES = 32 #trial.suggest_int('MAX_EPOCHES_UNSUP', 8, 32, step=4)
    BATCH_SIZE = trial.suggest_int('BATCH_SIZE', 1024*8, 1024*32, step=1024*4)
    VIRT_BATCH_SIZE = trial.suggest_int('VIRT_BATCH_SIZE', 512, 2048, step=512)
    
    unsupervised_model = TabNetPretrainer(n_d=N_D,
                                          n_a=N_A,
                                          n_steps=N_STEPS,
                                          gamma=GAMMA,
                                          n_independent=N_INDEPENDENT,
                                          n_shared=N_SHARED,
                                          optimizer_fn=torch.optim.Adam,
                                          optimizer_params=dict(lr=LR),
                                          mask_type=MASK_TYPE,
                                          device_name=DEVICE,
                                          seed=SEED)
    
    unsupervised_model.fit(X_train=X_train,
                           eval_set=[X_eval],
                           pretraining_ratio=PRETRAIN_RATIO,
                           max_epochs=MAX_EPOCHES,
                           batch_size=BATCH_SIZE,
                           virtual_batch_size=VIRT_BATCH_SIZE,
                           num_workers=8,
                           patience=4)

    clf = TabNetClassifier(n_d=N_D,
                           n_a=N_A,
                           n_steps=N_STEPS,
                           gamma=GAMMA,
                           n_independent=N_INDEPENDENT,
                           n_shared=N_SHARED,
                           optimizer_fn=torch.optim.Adam,
                           optimizer_params=dict(lr=LR),
                           mask_type=MASK_TYPE,
                           device_name=DEVICE,
                           seed=SEED)
    
    clf.fit(X_train, y_train.reshape(-1,),
            eval_set=[(X_eval, y_eval.reshape(-1,))],
            eval_metric=[matthews],
            max_epochs=MAX_EPOCHES,
            batch_size=BATCH_SIZE,
            virtual_batch_size=VIRT_BATCH_SIZE,
            from_unsupervised=unsupervised_model,
            num_workers=8,
            patience=4)

    # torch.cuda.empty_cache()
    # gc.collect()
    return clf.best_cost

In [None]:
# sampler = optuna.samplers.TPESampler(seed=SEED)
# # storage = optuna.storages.InMemoryStorage()

# study = optuna.create_study(direction='maximize', sampler=sampler,
#                             study_name='tabnet', storage='sqlite:///fcnn-study_matthews.db', load_if_exists=True,
#                            )
# study.optimize(objective, n_trials=200)


[I 2024-08-26 23:02:34,256] A new study created in RDB with name: tabnet


epoch 0  | loss: 2.22341 | val_0_unsup_loss_numpy: 0.9936100244522095|  0:00:47s
epoch 1  | loss: 0.87779 | val_0_unsup_loss_numpy: 1.0253299474716187|  0:01:35s
epoch 2  | loss: 0.79187 | val_0_unsup_loss_numpy: 0.9890300035476685|  0:02:22s
epoch 3  | loss: 0.72855 | val_0_unsup_loss_numpy: 0.8093500137329102|  0:03:10s
epoch 4  | loss: 0.67873 | val_0_unsup_loss_numpy: 0.6462500095367432|  0:03:58s
epoch 5  | loss: 0.63331 | val_0_unsup_loss_numpy: 0.5638300180435181|  0:04:45s
epoch 6  | loss: 0.59683 | val_0_unsup_loss_numpy: 0.51569002866745|  0:05:33s
epoch 7  | loss: 0.56295 | val_0_unsup_loss_numpy: 0.46432000398635864|  0:06:21s
epoch 8  | loss: 0.53297 | val_0_unsup_loss_numpy: 0.49028998613357544|  0:07:08s
epoch 9  | loss: 0.50923 | val_0_unsup_loss_numpy: 0.4300999939441681|  0:07:56s
epoch 10 | loss: 0.50127 | val_0_unsup_loss_numpy: 0.40501999855041504|  0:08:45s
epoch 11 | loss: 0.49459 | val_0_unsup_loss_numpy: 0.44238999485969543|  0:09:35s
epoch 12 | loss: 0.46908 |



epoch 0  | loss: 0.23313 | val_0_matthews: 0.91206 |  0:00:42s
epoch 1  | loss: 0.04581 | val_0_matthews: 0.98031 |  0:01:24s
epoch 2  | loss: 0.04314 | val_0_matthews: 0.98251 |  0:02:06s
epoch 3  | loss: 0.0419  | val_0_matthews: 0.98299 |  0:02:48s
epoch 4  | loss: 0.04117 | val_0_matthews: 0.98307 |  0:03:31s
epoch 5  | loss: 0.04107 | val_0_matthews: 0.98304 |  0:04:12s
epoch 6  | loss: 0.04203 | val_0_matthews: 0.98315 |  0:04:54s
epoch 7  | loss: 0.04    | val_0_matthews: 0.98382 |  0:05:37s
epoch 8  | loss: 0.04086 | val_0_matthews: 0.98351 |  0:06:19s
epoch 9  | loss: 0.03965 | val_0_matthews: 0.98384 |  0:07:01s
epoch 10 | loss: 0.03914 | val_0_matthews: 0.98309 |  0:07:43s
epoch 11 | loss: 0.03919 | val_0_matthews: 0.9841  |  0:08:24s
epoch 12 | loss: 0.03847 | val_0_matthews: 0.9843  |  0:09:07s
epoch 13 | loss: 0.03802 | val_0_matthews: 0.98397 |  0:09:52s
epoch 14 | loss: 0.03784 | val_0_matthews: 0.98414 |  0:10:34s
epoch 15 | loss: 0.03756 | val_0_matthews: 0.9842  |  0

[I 2024-08-26 23:33:52,355] Trial 0 finished with value: 0.9843040704727173 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.04452019576176219, 'PRETRAIN_RATIO': 0.7629023443609105, 'N_D': 64, 'N_STEPS': 4, 'GAMMA': 1.5252465450906483, 'N_INDEPENDENT': 4, 'N_SHARED': 4, 'BATCH_SIZE': 28672, 'VIRT_BATCH_SIZE': 512}. Best is trial 0 with value: 0.9843040704727173.


epoch 0  | loss: 1.97859 | val_0_unsup_loss_numpy: 0.7658900022506714|  0:00:27s
epoch 1  | loss: 0.7222  | val_0_unsup_loss_numpy: 0.5882099866867065|  0:00:54s
epoch 2  | loss: 0.5933  | val_0_unsup_loss_numpy: 0.4421199858188629|  0:01:21s
epoch 3  | loss: 0.49845 | val_0_unsup_loss_numpy: 0.36754000186920166|  0:01:48s
epoch 4  | loss: 0.4319  | val_0_unsup_loss_numpy: 0.3180899918079376|  0:02:15s
epoch 5  | loss: 0.38211 | val_0_unsup_loss_numpy: 0.2925400137901306|  0:02:42s
epoch 6  | loss: 0.34839 | val_0_unsup_loss_numpy: 0.2752799987792969|  0:03:09s
epoch 7  | loss: 0.31944 | val_0_unsup_loss_numpy: 0.265720009803772|  0:03:36s
epoch 8  | loss: 0.29711 | val_0_unsup_loss_numpy: 0.25376999378204346|  0:04:03s
epoch 9  | loss: 0.27957 | val_0_unsup_loss_numpy: 0.24987000226974487|  0:04:30s
epoch 10 | loss: 0.26256 | val_0_unsup_loss_numpy: 0.2505599856376648|  0:04:57s
epoch 11 | loss: 0.24855 | val_0_unsup_loss_numpy: 0.25780999660491943|  0:05:23s
epoch 12 | loss: 0.23393 



epoch 0  | loss: 0.18178 | val_0_matthews: 0.97916 |  0:00:20s
epoch 1  | loss: 0.04512 | val_0_matthews: 0.98198 |  0:00:41s
epoch 2  | loss: 0.04298 | val_0_matthews: 0.98253 |  0:01:02s
epoch 3  | loss: 0.04164 | val_0_matthews: 0.98328 |  0:01:23s
epoch 4  | loss: 0.04093 | val_0_matthews: 0.98347 |  0:01:44s
epoch 5  | loss: 0.04043 | val_0_matthews: 0.9836  |  0:02:05s
epoch 6  | loss: 0.03999 | val_0_matthews: 0.98361 |  0:02:25s
epoch 7  | loss: 0.0396  | val_0_matthews: 0.98374 |  0:02:45s
epoch 8  | loss: 0.0394  | val_0_matthews: 0.9839  |  0:03:06s
epoch 9  | loss: 0.03897 | val_0_matthews: 0.9839  |  0:03:27s
epoch 10 | loss: 0.03877 | val_0_matthews: 0.98382 |  0:03:48s
epoch 11 | loss: 0.03855 | val_0_matthews: 0.98403 |  0:04:09s
epoch 12 | loss: 0.03841 | val_0_matthews: 0.98401 |  0:04:29s
epoch 13 | loss: 0.03813 | val_0_matthews: 0.98393 |  0:04:50s
epoch 14 | loss: 0.03813 | val_0_matthews: 0.98377 |  0:05:10s
epoch 15 | loss: 0.03811 | val_0_matthews: 0.9841  |  0

[I 2024-08-26 23:54:10,371] Trial 1 finished with value: 0.9841035008430481 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.05177521192046732, 'PRETRAIN_RATIO': 0.33535940346856735, 'N_D': 48, 'N_STEPS': 4, 'GAMMA': 1.2724255225713796, 'N_INDEPENDENT': 1, 'N_SHARED': 2, 'BATCH_SIZE': 32768, 'VIRT_BATCH_SIZE': 512}. Best is trial 0 with value: 0.9843040704727173.


epoch 0  | loss: 12.97664| val_0_unsup_loss_numpy: 0.952489972114563|  0:00:45s
epoch 1  | loss: 0.82532 | val_0_unsup_loss_numpy: 0.6208000183105469|  0:01:30s
epoch 2  | loss: 0.62313 | val_0_unsup_loss_numpy: 0.4847800135612488|  0:02:16s
epoch 3  | loss: 0.52428 | val_0_unsup_loss_numpy: 0.42309001088142395|  0:03:01s
epoch 4  | loss: 0.46515 | val_0_unsup_loss_numpy: 0.385670006275177|  0:03:47s
epoch 5  | loss: 0.42241 | val_0_unsup_loss_numpy: 0.3626300096511841|  0:04:32s
epoch 6  | loss: 0.38823 | val_0_unsup_loss_numpy: 0.3461500108242035|  0:05:17s
epoch 7  | loss: 0.35985 | val_0_unsup_loss_numpy: 0.3411000072956085|  0:06:03s
epoch 8  | loss: 0.33709 | val_0_unsup_loss_numpy: 0.3414599895477295|  0:06:48s
epoch 9  | loss: 0.31821 | val_0_unsup_loss_numpy: 0.3509899973869324|  0:07:34s
epoch 10 | loss: 0.30254 | val_0_unsup_loss_numpy: 0.36155998706817627|  0:08:19s
epoch 11 | loss: 0.28993 | val_0_unsup_loss_numpy: 0.36131998896598816|  0:09:05s

Early stopping occurred at



epoch 0  | loss: 0.16536 | val_0_matthews: 0.97491 |  0:00:39s
epoch 1  | loss: 0.05308 | val_0_matthews: 0.97809 |  0:01:19s
epoch 2  | loss: 0.04905 | val_0_matthews: 0.97947 |  0:01:58s
epoch 3  | loss: 0.04632 | val_0_matthews: 0.98047 |  0:02:38s
epoch 4  | loss: 0.04547 | val_0_matthews: 0.98084 |  0:03:17s
epoch 5  | loss: 0.04427 | val_0_matthews: 0.98149 |  0:03:57s
epoch 6  | loss: 0.04348 | val_0_matthews: 0.98186 |  0:04:37s
epoch 7  | loss: 0.04272 | val_0_matthews: 0.98202 |  0:05:16s
epoch 8  | loss: 0.04217 | val_0_matthews: 0.98229 |  0:05:56s
epoch 9  | loss: 0.0419  | val_0_matthews: 0.98263 |  0:06:36s
epoch 10 | loss: 0.04164 | val_0_matthews: 0.9828  |  0:07:15s
epoch 11 | loss: 0.04247 | val_0_matthews: 0.98231 |  0:07:55s
epoch 12 | loss: 0.04154 | val_0_matthews: 0.98288 |  0:08:34s
epoch 13 | loss: 0.04127 | val_0_matthews: 0.98296 |  0:09:15s
epoch 14 | loss: 0.04074 | val_0_matthews: 0.98295 |  0:09:54s
epoch 15 | loss: 0.0404  | val_0_matthews: 0.98312 |  0

[I 2024-08-27 00:53:43,159] Trial 2 finished with value: 0.9838061332702637 and parameters: {'MASK_TYPE': 'sparsemax', 'LR': 0.0011582633989429404, 'PRETRAIN_RATIO': 0.21888439051680197, 'N_D': 48, 'N_STEPS': 6, 'GAMMA': 1.0060595706405868, 'N_INDEPENDENT': 4, 'N_SHARED': 5, 'BATCH_SIZE': 8192, 'VIRT_BATCH_SIZE': 1536}. Best is trial 0 with value: 0.9843040704727173.


epoch 0  | loss: 1.91762 | val_0_unsup_loss_numpy: 0.8167099952697754|  0:00:42s
epoch 1  | loss: 0.77357 | val_0_unsup_loss_numpy: 0.7089999914169312|  0:01:25s
epoch 2  | loss: 0.70572 | val_0_unsup_loss_numpy: 0.6526399850845337|  0:02:08s
epoch 3  | loss: 0.67329 | val_0_unsup_loss_numpy: 0.632390022277832|  0:02:51s
epoch 4  | loss: 0.64781 | val_0_unsup_loss_numpy: 0.6293299794197083|  0:03:34s
epoch 5  | loss: 0.63398 | val_0_unsup_loss_numpy: 0.6268399953842163|  0:04:16s
epoch 6  | loss: 0.63471 | val_0_unsup_loss_numpy: 0.6197699904441833|  0:05:00s
epoch 7  | loss: 0.61774 | val_0_unsup_loss_numpy: 0.6104099750518799|  0:05:43s
epoch 8  | loss: 0.6028  | val_0_unsup_loss_numpy: 0.602869987487793|  0:06:26s
epoch 9  | loss: 0.58645 | val_0_unsup_loss_numpy: 0.6052899956703186|  0:07:09s
epoch 10 | loss: 0.57466 | val_0_unsup_loss_numpy: 0.6065899729728699|  0:07:52s
epoch 11 | loss: 0.57711 | val_0_unsup_loss_numpy: 0.609220027923584|  0:08:35s
epoch 12 | loss: 0.55701 | val_



epoch 0  | loss: 0.2708  | val_0_matthews: 0.97483 |  0:00:35s
epoch 1  | loss: 0.05012 | val_0_matthews: 0.98    |  0:01:11s
epoch 2  | loss: 0.04526 | val_0_matthews: 0.98114 |  0:01:46s
epoch 3  | loss: 0.04342 | val_0_matthews: 0.98214 |  0:02:22s
epoch 4  | loss: 0.04371 | val_0_matthews: 0.98202 |  0:02:57s
epoch 5  | loss: 0.04241 | val_0_matthews: 0.98271 |  0:03:33s
epoch 6  | loss: 0.04118 | val_0_matthews: 0.98321 |  0:04:09s
epoch 7  | loss: 0.04069 | val_0_matthews: 0.98291 |  0:04:44s
epoch 8  | loss: 0.0404  | val_0_matthews: 0.98323 |  0:05:20s
epoch 9  | loss: 0.03998 | val_0_matthews: 0.9833  |  0:05:55s
epoch 10 | loss: 0.04051 | val_0_matthews: 0.98313 |  0:06:31s
epoch 11 | loss: 0.03988 | val_0_matthews: 0.9832  |  0:07:07s
epoch 12 | loss: 0.03923 | val_0_matthews: 0.98358 |  0:07:42s
epoch 13 | loss: 0.03892 | val_0_matthews: 0.98358 |  0:08:18s
epoch 14 | loss: 0.03883 | val_0_matthews: 0.9837  |  0:08:54s
epoch 15 | loss: 0.03845 | val_0_matthews: 0.9835  |  0

[I 2024-08-27 01:21:17,080] Trial 3 finished with value: 0.983942985534668 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.08149825123176183, 'PRETRAIN_RATIO': 0.20000333644173357, 'N_D': 16, 'N_STEPS': 5, 'GAMMA': 1.2523015930295252, 'N_INDEPENDENT': 2, 'N_SHARED': 4, 'BATCH_SIZE': 32768, 'VIRT_BATCH_SIZE': 512}. Best is trial 0 with value: 0.9843040704727173.


epoch 0  | loss: 1.64606 | val_0_unsup_loss_numpy: 0.8087999820709229|  0:00:31s
epoch 1  | loss: 0.73114 | val_0_unsup_loss_numpy: 0.5806499719619751|  0:01:01s
epoch 2  | loss: 0.59355 | val_0_unsup_loss_numpy: 0.49055999517440796|  0:01:32s
epoch 3  | loss: 0.49961 | val_0_unsup_loss_numpy: 0.420740008354187|  0:02:03s
epoch 4  | loss: 0.42599 | val_0_unsup_loss_numpy: 0.3620699942111969|  0:02:34s
epoch 5  | loss: 0.3672  | val_0_unsup_loss_numpy: 0.43149998784065247|  0:03:06s
epoch 6  | loss: 0.31989 | val_0_unsup_loss_numpy: 0.39789000153541565|  0:03:37s
epoch 7  | loss: 0.28084 | val_0_unsup_loss_numpy: 0.4309999942779541|  0:04:08s
epoch 8  | loss: 0.2478  | val_0_unsup_loss_numpy: 0.4206799864768982|  0:04:39s

Early stopping occurred at epoch 8 with best_epoch = 4 and best_val_0_unsup_loss_numpy = 0.3620699942111969




epoch 0  | loss: 0.0687  | val_0_matthews: 0.98171 |  0:00:25s
epoch 1  | loss: 0.04347 | val_0_matthews: 0.98319 |  0:00:51s
epoch 2  | loss: 0.04166 | val_0_matthews: 0.98342 |  0:01:16s
epoch 3  | loss: 0.04096 | val_0_matthews: 0.98389 |  0:01:41s
epoch 4  | loss: 0.04031 | val_0_matthews: 0.98383 |  0:02:07s
epoch 5  | loss: 0.03986 | val_0_matthews: 0.9839  |  0:02:32s
epoch 6  | loss: 0.03942 | val_0_matthews: 0.98382 |  0:02:57s
epoch 7  | loss: 0.03903 | val_0_matthews: 0.98399 |  0:03:23s
epoch 8  | loss: 0.0388  | val_0_matthews: 0.98403 |  0:03:48s
epoch 9  | loss: 0.03851 | val_0_matthews: 0.98416 |  0:04:13s
epoch 10 | loss: 0.03818 | val_0_matthews: 0.98397 |  0:04:38s
epoch 11 | loss: 0.03789 | val_0_matthews: 0.9841  |  0:05:04s
epoch 12 | loss: 0.03758 | val_0_matthews: 0.98404 |  0:05:29s
epoch 13 | loss: 0.03736 | val_0_matthews: 0.98399 |  0:05:54s

Early stopping occurred at epoch 13 with best_epoch = 9 and best_val_0_matthews = 0.98416


[I 2024-08-27 01:38:09,315] Trial 4 finished with value: 0.9841607809066772 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.0057367587003765595, 'PRETRAIN_RATIO': 0.4623795708794746, 'N_D': 64, 'N_STEPS': 3, 'GAMMA': 1.6089116204639282, 'N_INDEPENDENT': 3, 'N_SHARED': 1, 'BATCH_SIZE': 8192, 'VIRT_BATCH_SIZE': 512}. Best is trial 0 with value: 0.9843040704727173.


epoch 0  | loss: 1.87276 | val_0_unsup_loss_numpy: 0.46355000138282776|  0:00:57s
epoch 1  | loss: 0.40422 | val_0_unsup_loss_numpy: 0.30410000681877136|  0:01:55s
epoch 2  | loss: 0.29376 | val_0_unsup_loss_numpy: 0.2790899872779846|  0:02:52s
epoch 3  | loss: 0.25174 | val_0_unsup_loss_numpy: 0.2829599976539612|  0:03:50s
epoch 4  | loss: 0.23333 | val_0_unsup_loss_numpy: 0.28220000863075256|  0:04:48s
epoch 5  | loss: 0.22073 | val_0_unsup_loss_numpy: 0.2897599935531616|  0:05:45s
epoch 6  | loss: 0.21843 | val_0_unsup_loss_numpy: 0.3172700107097626|  0:06:44s

Early stopping occurred at epoch 6 with best_epoch = 2 and best_val_0_unsup_loss_numpy = 0.2790899872779846




epoch 0  | loss: 0.10946 | val_0_matthews: 0.97889 |  0:00:49s
epoch 1  | loss: 0.04735 | val_0_matthews: 0.98099 |  0:01:38s
epoch 2  | loss: 0.04435 | val_0_matthews: 0.98271 |  0:02:28s
epoch 3  | loss: 0.04259 | val_0_matthews: 0.9825  |  0:03:18s
epoch 4  | loss: 0.0416  | val_0_matthews: 0.98346 |  0:04:07s
epoch 5  | loss: 0.04084 | val_0_matthews: 0.98359 |  0:04:57s
epoch 6  | loss: 0.04055 | val_0_matthews: 0.98376 |  0:05:46s
epoch 7  | loss: 0.03997 | val_0_matthews: 0.98376 |  0:06:35s
epoch 8  | loss: 0.03967 | val_0_matthews: 0.98348 |  0:07:25s
epoch 9  | loss: 0.04007 | val_0_matthews: 0.98359 |  0:08:14s
epoch 10 | loss: 0.03943 | val_0_matthews: 0.98395 |  0:09:04s
epoch 11 | loss: 0.03881 | val_0_matthews: 0.98401 |  0:09:54s
epoch 12 | loss: 0.03868 | val_0_matthews: 0.98398 |  0:10:43s
epoch 13 | loss: 0.03845 | val_0_matthews: 0.98402 |  0:11:33s
epoch 14 | loss: 0.03876 | val_0_matthews: 0.98385 |  0:12:23s
epoch 15 | loss: 0.03793 | val_0_matthews: 0.98428 |  0

[I 2024-08-27 02:21:16,545] Trial 5 finished with value: 0.9844775795936584 and parameters: {'MASK_TYPE': 'sparsemax', 'LR': 0.0229197581805364, 'PRETRAIN_RATIO': 0.2780448939884285, 'N_D': 48, 'N_STEPS': 5, 'GAMMA': 1.0517101911301066, 'N_INDEPENDENT': 5, 'N_SHARED': 3, 'BATCH_SIZE': 12288, 'VIRT_BATCH_SIZE': 512}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 1.53531 | val_0_unsup_loss_numpy: 0.9700300097465515|  0:00:23s
epoch 1  | loss: 0.93629 | val_0_unsup_loss_numpy: 0.9398900270462036|  0:00:47s
epoch 2  | loss: 0.87278 | val_0_unsup_loss_numpy: 0.9209700226783752|  0:01:11s
epoch 3  | loss: 0.82744 | val_0_unsup_loss_numpy: 0.8045099973678589|  0:01:35s
epoch 4  | loss: 0.80474 | val_0_unsup_loss_numpy: 0.7067800164222717|  0:01:58s
epoch 5  | loss: 0.78587 | val_0_unsup_loss_numpy: 0.6767799854278564|  0:02:22s
epoch 6  | loss: 0.77453 | val_0_unsup_loss_numpy: 0.676360011100769|  0:02:46s
epoch 7  | loss: 0.76811 | val_0_unsup_loss_numpy: 0.7166100144386292|  0:03:10s
epoch 8  | loss: 0.77055 | val_0_unsup_loss_numpy: 0.6693599820137024|  0:03:34s
epoch 9  | loss: 0.75854 | val_0_unsup_loss_numpy: 0.671019971370697|  0:03:58s
epoch 10 | loss: 0.75209 | val_0_unsup_loss_numpy: 0.6676899790763855|  0:04:21s
epoch 11 | loss: 0.75568 | val_0_unsup_loss_numpy: 0.6711000204086304|  0:04:45s
epoch 12 | loss: 0.74971 | val



epoch 0  | loss: 0.56528 | val_0_matthews: 0.61688 |  0:00:19s
epoch 1  | loss: 0.23291 | val_0_matthews: 0.87052 |  0:00:38s
epoch 2  | loss: 0.14777 | val_0_matthews: 0.85945 |  0:00:58s
epoch 3  | loss: 0.13966 | val_0_matthews: 0.91359 |  0:01:17s
epoch 4  | loss: 0.11259 | val_0_matthews: 0.92275 |  0:01:36s
epoch 5  | loss: 0.1012  | val_0_matthews: 0.9422  |  0:01:55s
epoch 6  | loss: 0.0852  | val_0_matthews: 0.95196 |  0:02:15s
epoch 7  | loss: 0.07702 | val_0_matthews: 0.95452 |  0:02:34s
epoch 8  | loss: 0.07157 | val_0_matthews: 0.96719 |  0:02:54s
epoch 9  | loss: 0.06443 | val_0_matthews: 0.96781 |  0:03:14s
epoch 10 | loss: 0.0612  | val_0_matthews: 0.97184 |  0:03:33s
epoch 11 | loss: 0.05666 | val_0_matthews: 0.97438 |  0:03:52s
epoch 12 | loss: 0.0538  | val_0_matthews: 0.97828 |  0:04:12s
epoch 13 | loss: 0.04838 | val_0_matthews: 0.97937 |  0:04:31s
epoch 14 | loss: 0.04757 | val_0_matthews: 0.97697 |  0:04:50s
epoch 15 | loss: 0.04783 | val_0_matthews: 0.9804  |  0

[I 2024-08-27 02:40:41,939] Trial 6 finished with value: 0.9820330142974854 and parameters: {'MASK_TYPE': 'sparsemax', 'LR': 0.09329898420966148, 'PRETRAIN_RATIO': 0.6725180566058904, 'N_D': 32, 'N_STEPS': 3, 'GAMMA': 1.8832346980766994, 'N_INDEPENDENT': 2, 'N_SHARED': 2, 'BATCH_SIZE': 32768, 'VIRT_BATCH_SIZE': 512}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 1.90575 | val_0_unsup_loss_numpy: 0.985289990901947|  0:00:14s
epoch 1  | loss: 0.961   | val_0_unsup_loss_numpy: 0.9287999868392944|  0:00:29s
epoch 2  | loss: 0.93134 | val_0_unsup_loss_numpy: 0.8415300250053406|  0:00:44s
epoch 3  | loss: 0.90568 | val_0_unsup_loss_numpy: 0.7952600121498108|  0:00:59s
epoch 4  | loss: 0.8824  | val_0_unsup_loss_numpy: 0.7642199993133545|  0:01:14s
epoch 5  | loss: 0.86013 | val_0_unsup_loss_numpy: 0.73403000831604|  0:01:29s
epoch 6  | loss: 0.83611 | val_0_unsup_loss_numpy: 0.7079100012779236|  0:01:44s
epoch 7  | loss: 0.8141  | val_0_unsup_loss_numpy: 0.6870200037956238|  0:01:59s
epoch 8  | loss: 0.79692 | val_0_unsup_loss_numpy: 0.6717100143432617|  0:02:14s
epoch 9  | loss: 0.77944 | val_0_unsup_loss_numpy: 0.6693099737167358|  0:02:29s
epoch 10 | loss: 0.75918 | val_0_unsup_loss_numpy: 0.6583600044250488|  0:02:44s
epoch 11 | loss: 0.73885 | val_0_unsup_loss_numpy: 0.6457899808883667|  0:02:59s
epoch 12 | loss: 0.72221 | val_



epoch 0  | loss: 0.12802 | val_0_matthews: 0.87113 |  0:00:12s
epoch 1  | loss: 0.05737 | val_0_matthews: 0.96098 |  0:00:25s
epoch 2  | loss: 0.04821 | val_0_matthews: 0.98125 |  0:00:38s
epoch 3  | loss: 0.04616 | val_0_matthews: 0.98163 |  0:00:50s
epoch 4  | loss: 0.04424 | val_0_matthews: 0.98214 |  0:01:03s
epoch 5  | loss: 0.04294 | val_0_matthews: 0.98263 |  0:01:16s
epoch 6  | loss: 0.04219 | val_0_matthews: 0.98292 |  0:01:28s
epoch 7  | loss: 0.04264 | val_0_matthews: 0.98305 |  0:01:41s
epoch 8  | loss: 0.04065 | val_0_matthews: 0.98316 |  0:01:54s
epoch 9  | loss: 0.0403  | val_0_matthews: 0.98334 |  0:02:07s
epoch 10 | loss: 0.03988 | val_0_matthews: 0.98354 |  0:02:20s
epoch 11 | loss: 0.0399  | val_0_matthews: 0.9832  |  0:02:32s
epoch 12 | loss: 0.03955 | val_0_matthews: 0.98356 |  0:02:45s
epoch 13 | loss: 0.03934 | val_0_matthews: 0.98347 |  0:02:58s
epoch 14 | loss: 0.03962 | val_0_matthews: 0.97955 |  0:03:10s
epoch 15 | loss: 0.04021 | val_0_matthews: 0.98359 |  0

[I 2024-08-27 02:58:47,110] Trial 7 finished with value: 0.9840347766876221 and parameters: {'MASK_TYPE': 'sparsemax', 'LR': 0.013422864871720619, 'PRETRAIN_RATIO': 0.7557292508050206, 'N_D': 32, 'N_STEPS': 3, 'GAMMA': 1.6941571165474785, 'N_INDEPENDENT': 2, 'N_SHARED': 1, 'BATCH_SIZE': 20480, 'VIRT_BATCH_SIZE': 1024}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 1.13202 | val_0_unsup_loss_numpy: 0.8613799810409546|  0:00:14s
epoch 1  | loss: 0.81093 | val_0_unsup_loss_numpy: 0.7103000283241272|  0:00:27s
epoch 2  | loss: 0.73423 | val_0_unsup_loss_numpy: 0.6508700251579285|  0:00:42s
epoch 3  | loss: 0.69959 | val_0_unsup_loss_numpy: 0.6380100250244141|  0:00:56s
epoch 4  | loss: 0.67829 | val_0_unsup_loss_numpy: 0.6300100088119507|  0:01:10s
epoch 5  | loss: 0.66574 | val_0_unsup_loss_numpy: 0.624779999256134|  0:01:25s
epoch 6  | loss: 0.65436 | val_0_unsup_loss_numpy: 0.6267600059509277|  0:01:40s
epoch 7  | loss: 0.64915 | val_0_unsup_loss_numpy: 0.6246500015258789|  0:01:53s
epoch 8  | loss: 0.64321 | val_0_unsup_loss_numpy: 0.6240699887275696|  0:02:08s
epoch 9  | loss: 0.64019 | val_0_unsup_loss_numpy: 0.6206300258636475|  0:02:22s
epoch 10 | loss: 0.63424 | val_0_unsup_loss_numpy: 0.620199978351593|  0:02:36s
epoch 11 | loss: 0.63122 | val_0_unsup_loss_numpy: 0.6183000206947327|  0:02:50s
epoch 12 | loss: 0.63071 | val



epoch 0  | loss: 0.1125  | val_0_matthews: 0.97685 |  0:00:11s
epoch 1  | loss: 0.04722 | val_0_matthews: 0.98045 |  0:00:23s
epoch 2  | loss: 0.04441 | val_0_matthews: 0.98117 |  0:00:34s
epoch 3  | loss: 0.04311 | val_0_matthews: 0.98185 |  0:00:45s
epoch 4  | loss: 0.042   | val_0_matthews: 0.98243 |  0:00:56s
epoch 5  | loss: 0.04148 | val_0_matthews: 0.98266 |  0:01:08s
epoch 6  | loss: 0.04099 | val_0_matthews: 0.98259 |  0:01:19s
epoch 7  | loss: 0.04058 | val_0_matthews: 0.98307 |  0:01:30s
epoch 8  | loss: 0.04044 | val_0_matthews: 0.98299 |  0:01:42s
epoch 9  | loss: 0.04007 | val_0_matthews: 0.98317 |  0:01:54s
epoch 10 | loss: 0.03973 | val_0_matthews: 0.98329 |  0:02:05s
epoch 11 | loss: 0.03953 | val_0_matthews: 0.98345 |  0:02:16s
epoch 12 | loss: 0.0394  | val_0_matthews: 0.98365 |  0:02:28s
epoch 13 | loss: 0.03909 | val_0_matthews: 0.98375 |  0:02:39s
epoch 14 | loss: 0.03906 | val_0_matthews: 0.98346 |  0:02:50s
epoch 15 | loss: 0.03897 | val_0_matthews: 0.98322 |  0

[I 2024-08-27 03:13:14,722] Trial 8 finished with value: 0.9837465882301331 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.04887840950671322, 'PRETRAIN_RATIO': 0.5058859879804221, 'N_D': 16, 'N_STEPS': 3, 'GAMMA': 1.9540427645572254, 'N_INDEPENDENT': 1, 'N_SHARED': 1, 'BATCH_SIZE': 12288, 'VIRT_BATCH_SIZE': 1536}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 1.21198 | val_0_unsup_loss_numpy: 0.7141000032424927|  0:00:50s
epoch 1  | loss: 0.70246 | val_0_unsup_loss_numpy: 0.6797800064086914|  0:01:40s
epoch 2  | loss: 0.66758 | val_0_unsup_loss_numpy: 0.6875600218772888|  0:02:29s
epoch 3  | loss: 0.64813 | val_0_unsup_loss_numpy: 0.6765199899673462|  0:03:20s
epoch 4  | loss: 0.63873 | val_0_unsup_loss_numpy: 0.7062399983406067|  0:04:10s
epoch 5  | loss: 0.62944 | val_0_unsup_loss_numpy: 0.704259991645813|  0:05:00s
epoch 6  | loss: 0.61999 | val_0_unsup_loss_numpy: 0.7126700282096863|  0:05:50s
epoch 7  | loss: 0.61544 | val_0_unsup_loss_numpy: 0.6791599988937378|  0:06:40s

Early stopping occurred at epoch 7 with best_epoch = 3 and best_val_0_unsup_loss_numpy = 0.6765199899673462




epoch 0  | loss: 0.29075 | val_0_matthews: 0.90325 |  0:00:39s
epoch 1  | loss: 0.08468 | val_0_matthews: 0.96964 |  0:01:19s
epoch 2  | loss: 0.06596 | val_0_matthews: 0.97567 |  0:01:59s
epoch 3  | loss: 0.05507 | val_0_matthews: 0.97874 |  0:02:39s
epoch 4  | loss: 0.04834 | val_0_matthews: 0.96617 |  0:03:19s
epoch 5  | loss: 0.04545 | val_0_matthews: 0.98116 |  0:03:59s
epoch 6  | loss: 0.04375 | val_0_matthews: 0.98165 |  0:04:39s
epoch 7  | loss: 0.0434  | val_0_matthews: 0.98196 |  0:05:18s
epoch 8  | loss: 0.04218 | val_0_matthews: 0.98228 |  0:05:58s
epoch 9  | loss: 0.04154 | val_0_matthews: 0.98213 |  0:06:38s
epoch 10 | loss: 0.04096 | val_0_matthews: 0.98259 |  0:07:19s
epoch 11 | loss: 0.044   | val_0_matthews: 0.98248 |  0:07:59s
epoch 12 | loss: 0.04101 | val_0_matthews: 0.98282 |  0:08:39s
epoch 13 | loss: 0.04032 | val_0_matthews: 0.98261 |  0:09:19s
epoch 14 | loss: 0.04009 | val_0_matthews: 0.98296 |  0:09:58s
epoch 15 | loss: 0.0398  | val_0_matthews: 0.98155 |  0

[I 2024-08-27 03:54:58,169] Trial 9 finished with value: 0.983772337436676 and parameters: {'MASK_TYPE': 'sparsemax', 'LR': 0.08437157481807553, 'PRETRAIN_RATIO': 0.2939239784426557, 'N_D': 16, 'N_STEPS': 6, 'GAMMA': 1.1681394178262812, 'N_INDEPENDENT': 4, 'N_SHARED': 1, 'BATCH_SIZE': 12288, 'VIRT_BATCH_SIZE': 512}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 4.10349 | val_0_unsup_loss_numpy: 0.7509400248527527|  0:00:23s
epoch 1  | loss: 0.6358  | val_0_unsup_loss_numpy: 0.5214899778366089|  0:00:47s
epoch 2  | loss: 0.50498 | val_0_unsup_loss_numpy: 0.3989199995994568|  0:01:11s
epoch 3  | loss: 0.41786 | val_0_unsup_loss_numpy: 0.33917999267578125|  0:01:35s
epoch 4  | loss: 0.37317 | val_0_unsup_loss_numpy: 0.32315999269485474|  0:01:59s
epoch 5  | loss: 0.32741 | val_0_unsup_loss_numpy: 0.30972999334335327|  0:02:23s
epoch 6  | loss: 0.29176 | val_0_unsup_loss_numpy: 0.3078100085258484|  0:02:47s
epoch 7  | loss: 0.26631 | val_0_unsup_loss_numpy: 0.3121199905872345|  0:03:11s
epoch 8  | loss: 0.24881 | val_0_unsup_loss_numpy: 0.3095000088214874|  0:03:35s
epoch 9  | loss: 0.23795 | val_0_unsup_loss_numpy: 0.30605998635292053|  0:03:59s
epoch 10 | loss: 0.22107 | val_0_unsup_loss_numpy: 0.3049600124359131|  0:04:23s
epoch 11 | loss: 0.21179 | val_0_unsup_loss_numpy: 0.3023500144481659|  0:04:47s
epoch 12 | loss: 0.20309



epoch 0  | loss: 0.09553 | val_0_matthews: 0.97952 |  0:00:20s
epoch 1  | loss: 0.04678 | val_0_matthews: 0.98145 |  0:00:41s
epoch 2  | loss: 0.0438  | val_0_matthews: 0.98229 |  0:01:02s
epoch 3  | loss: 0.04275 | val_0_matthews: 0.98249 |  0:01:24s
epoch 4  | loss: 0.0418  | val_0_matthews: 0.98286 |  0:01:45s
epoch 5  | loss: 0.04115 | val_0_matthews: 0.98301 |  0:02:05s
epoch 6  | loss: 0.04087 | val_0_matthews: 0.98326 |  0:02:26s
epoch 7  | loss: 0.04045 | val_0_matthews: 0.98317 |  0:02:46s
epoch 8  | loss: 0.04022 | val_0_matthews: 0.98323 |  0:03:07s
epoch 9  | loss: 0.04041 | val_0_matthews: 0.98319 |  0:03:27s
epoch 10 | loss: 0.03994 | val_0_matthews: 0.98328 |  0:03:48s
epoch 11 | loss: 0.0396  | val_0_matthews: 0.98336 |  0:04:09s
epoch 12 | loss: 0.03934 | val_0_matthews: 0.9836  |  0:04:30s
epoch 13 | loss: 0.03906 | val_0_matthews: 0.98339 |  0:04:51s
epoch 14 | loss: 0.0389  | val_0_matthews: 0.98328 |  0:05:12s
epoch 15 | loss: 0.03917 | val_0_matthews: 0.98366 |  0

[I 2024-08-27 04:20:40,097] Trial 10 finished with value: 0.9838235378265381 and parameters: {'MASK_TYPE': 'sparsemax', 'LR': 0.01101255376421729, 'PRETRAIN_RATIO': 0.2782404414836022, 'N_D': 48, 'N_STEPS': 5, 'GAMMA': 1.0006548532386634, 'N_INDEPENDENT': 5, 'N_SHARED': 3, 'BATCH_SIZE': 20480, 'VIRT_BATCH_SIZE': 2048}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 2.63057 | val_0_unsup_loss_numpy: 0.914650022983551|  0:00:29s
epoch 1  | loss: 0.73476 | val_0_unsup_loss_numpy: 0.7121300101280212|  0:00:58s
epoch 2  | loss: 0.60238 | val_0_unsup_loss_numpy: 0.4980100095272064|  0:01:26s
epoch 3  | loss: 0.51505 | val_0_unsup_loss_numpy: 0.38923001289367676|  0:01:55s
epoch 4  | loss: 0.44354 | val_0_unsup_loss_numpy: 0.32019999623298645|  0:02:25s
epoch 5  | loss: 0.37832 | val_0_unsup_loss_numpy: 0.2679400146007538|  0:02:53s
epoch 6  | loss: 0.33072 | val_0_unsup_loss_numpy: 0.24591000378131866|  0:03:22s
epoch 7  | loss: 0.29397 | val_0_unsup_loss_numpy: 0.22915999591350555|  0:03:51s
epoch 8  | loss: 0.27836 | val_0_unsup_loss_numpy: 0.19242000579833984|  0:04:21s
epoch 9  | loss: 0.23881 | val_0_unsup_loss_numpy: 0.17564000189304352|  0:04:49s
epoch 10 | loss: 0.21475 | val_0_unsup_loss_numpy: 0.16798000037670135|  0:05:18s
epoch 11 | loss: 0.19821 | val_0_unsup_loss_numpy: 0.15062999725341797|  0:05:47s
epoch 12 | loss: 0.18



epoch 0  | loss: 0.15824 | val_0_matthews: 0.96023 |  0:00:26s
epoch 1  | loss: 0.04563 | val_0_matthews: 0.98131 |  0:00:51s
epoch 2  | loss: 0.04354 | val_0_matthews: 0.98233 |  0:01:17s
epoch 3  | loss: 0.04222 | val_0_matthews: 0.98297 |  0:01:43s
epoch 4  | loss: 0.04104 | val_0_matthews: 0.98324 |  0:02:09s
epoch 5  | loss: 0.04052 | val_0_matthews: 0.98277 |  0:02:35s
epoch 6  | loss: 0.04025 | val_0_matthews: 0.9837  |  0:03:00s
epoch 7  | loss: 0.03985 | val_0_matthews: 0.98355 |  0:03:26s
epoch 8  | loss: 0.03975 | val_0_matthews: 0.98379 |  0:03:52s
epoch 9  | loss: 0.03902 | val_0_matthews: 0.98387 |  0:04:18s
epoch 10 | loss: 0.0388  | val_0_matthews: 0.98393 |  0:04:43s
epoch 11 | loss: 0.03845 | val_0_matthews: 0.98401 |  0:05:09s
epoch 12 | loss: 0.03829 | val_0_matthews: 0.98399 |  0:05:35s
epoch 13 | loss: 0.03815 | val_0_matthews: 0.984   |  0:06:01s
epoch 14 | loss: 0.03793 | val_0_matthews: 0.98397 |  0:06:26s
epoch 15 | loss: 0.03769 | val_0_matthews: 0.98398 |  0

[I 2024-08-27 04:43:25,820] Trial 11 finished with value: 0.9840074777603149 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.024241404213293075, 'PRETRAIN_RATIO': 0.579885946256226, 'N_D': 64, 'N_STEPS': 4, 'GAMMA': 1.457306941491727, 'N_INDEPENDENT': 5, 'N_SHARED': 4, 'BATCH_SIZE': 24576, 'VIRT_BATCH_SIZE': 1024}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 2.8761  | val_0_unsup_loss_numpy: 0.8228899836540222|  0:00:31s
epoch 1  | loss: 0.75145 | val_0_unsup_loss_numpy: 0.641789972782135|  0:01:02s
epoch 2  | loss: 0.65847 | val_0_unsup_loss_numpy: 0.5242699980735779|  0:01:34s
epoch 3  | loss: 0.59201 | val_0_unsup_loss_numpy: 0.4303700029850006|  0:02:05s
epoch 4  | loss: 0.52774 | val_0_unsup_loss_numpy: 0.3560900092124939|  0:02:37s
epoch 5  | loss: 0.51051 | val_0_unsup_loss_numpy: 0.3281700015068054|  0:03:08s
epoch 6  | loss: 0.48366 | val_0_unsup_loss_numpy: 0.3601199984550476|  0:03:39s
epoch 7  | loss: 0.46531 | val_0_unsup_loss_numpy: 0.29276999831199646|  0:04:11s
epoch 8  | loss: 0.42137 | val_0_unsup_loss_numpy: 0.2769100069999695|  0:04:43s
epoch 9  | loss: 0.39627 | val_0_unsup_loss_numpy: 0.26774001121520996|  0:05:14s
epoch 10 | loss: 0.37523 | val_0_unsup_loss_numpy: 0.2589299976825714|  0:05:45s
epoch 11 | loss: 0.35591 | val_0_unsup_loss_numpy: 0.2503199875354767|  0:06:17s
epoch 12 | loss: 0.3618  | 



epoch 0  | loss: 0.1315  | val_0_matthews: 0.97813 |  0:00:27s
epoch 1  | loss: 0.04451 | val_0_matthews: 0.98203 |  0:00:55s
epoch 2  | loss: 0.04188 | val_0_matthews: 0.98304 |  0:01:22s
epoch 3  | loss: 0.04056 | val_0_matthews: 0.9831  |  0:01:49s
epoch 4  | loss: 0.04025 | val_0_matthews: 0.98369 |  0:02:17s
epoch 5  | loss: 0.03975 | val_0_matthews: 0.98381 |  0:02:45s
epoch 6  | loss: 0.03954 | val_0_matthews: 0.98333 |  0:03:13s
epoch 7  | loss: 0.04003 | val_0_matthews: 0.98358 |  0:03:40s
epoch 8  | loss: 0.04298 | val_0_matthews: 0.98372 |  0:04:07s
epoch 9  | loss: 0.03842 | val_0_matthews: 0.98404 |  0:04:35s
epoch 10 | loss: 0.0377  | val_0_matthews: 0.98429 |  0:05:03s
epoch 11 | loss: 0.03743 | val_0_matthews: 0.98407 |  0:05:30s
epoch 12 | loss: 0.03975 | val_0_matthews: 0.96563 |  0:05:57s
epoch 13 | loss: 0.04363 | val_0_matthews: 0.98352 |  0:06:25s
epoch 14 | loss: 0.04073 | val_0_matthews: 0.98384 |  0:06:53s

Early stopping occurred at epoch 14 with best_epoch = 

[I 2024-08-27 05:14:16,031] Trial 12 finished with value: 0.9842938780784607 and parameters: {'MASK_TYPE': 'sparsemax', 'LR': 0.025968805038611074, 'PRETRAIN_RATIO': 0.38234469186296965, 'N_D': 64, 'N_STEPS': 5, 'GAMMA': 1.481263370029012, 'N_INDEPENDENT': 4, 'N_SHARED': 4, 'BATCH_SIZE': 24576, 'VIRT_BATCH_SIZE': 1024}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 4.02853 | val_0_unsup_loss_numpy: 0.7715399861335754|  0:00:28s
epoch 1  | loss: 0.72774 | val_0_unsup_loss_numpy: 0.5714600086212158|  0:00:57s
epoch 2  | loss: 0.57478 | val_0_unsup_loss_numpy: 0.4307200014591217|  0:01:26s
epoch 3  | loss: 0.47881 | val_0_unsup_loss_numpy: 0.3632600009441376|  0:01:56s
epoch 4  | loss: 0.40961 | val_0_unsup_loss_numpy: 0.32210999727249146|  0:02:25s
epoch 5  | loss: 0.35436 | val_0_unsup_loss_numpy: 0.30619001388549805|  0:02:55s
epoch 6  | loss: 0.31414 | val_0_unsup_loss_numpy: 0.33831000328063965|  0:03:24s
epoch 7  | loss: 0.28149 | val_0_unsup_loss_numpy: 0.36531999707221985|  0:03:53s
epoch 8  | loss: 0.25087 | val_0_unsup_loss_numpy: 0.40062999725341797|  0:04:23s
epoch 9  | loss: 0.22842 | val_0_unsup_loss_numpy: 0.4408699870109558|  0:04:52s

Early stopping occurred at epoch 9 with best_epoch = 5 and best_val_0_unsup_loss_numpy = 0.30619001388549805




epoch 0  | loss: 0.0851  | val_0_matthews: 0.98121 |  0:00:26s
epoch 1  | loss: 0.0438  | val_0_matthews: 0.98307 |  0:00:52s
epoch 2  | loss: 0.04199 | val_0_matthews: 0.9834  |  0:01:18s
epoch 3  | loss: 0.04124 | val_0_matthews: 0.98356 |  0:01:43s
epoch 4  | loss: 0.04073 | val_0_matthews: 0.98368 |  0:02:09s
epoch 5  | loss: 0.04007 | val_0_matthews: 0.98408 |  0:02:35s
epoch 6  | loss: 0.03956 | val_0_matthews: 0.98388 |  0:03:00s
epoch 7  | loss: 0.03929 | val_0_matthews: 0.98402 |  0:03:26s
epoch 8  | loss: 0.03896 | val_0_matthews: 0.98411 |  0:03:53s
epoch 9  | loss: 0.03872 | val_0_matthews: 0.98402 |  0:04:19s
epoch 10 | loss: 0.03847 | val_0_matthews: 0.984   |  0:04:45s
epoch 11 | loss: 0.03824 | val_0_matthews: 0.98422 |  0:05:11s
epoch 12 | loss: 0.03796 | val_0_matthews: 0.9841  |  0:05:36s
epoch 13 | loss: 0.03778 | val_0_matthews: 0.98394 |  0:06:02s
epoch 14 | loss: 0.03757 | val_0_matthews: 0.98413 |  0:06:28s
epoch 15 | loss: 0.03737 | val_0_matthews: 0.98394 |  0

[I 2024-08-27 05:34:12,962] Trial 13 finished with value: 0.9842190146446228 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.005663005549620107, 'PRETRAIN_RATIO': 0.4145877830917317, 'N_D': 64, 'N_STEPS': 4, 'GAMMA': 1.314498325935449, 'N_INDEPENDENT': 5, 'N_SHARED': 3, 'BATCH_SIZE': 16384, 'VIRT_BATCH_SIZE': 1024}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 2.86406 | val_0_unsup_loss_numpy: 0.7875900268554688|  0:00:22s
epoch 1  | loss: 0.61296 | val_0_unsup_loss_numpy: 0.547569990158081|  0:00:46s
epoch 2  | loss: 0.48765 | val_0_unsup_loss_numpy: 0.39010998606681824|  0:01:09s
epoch 3  | loss: 0.41894 | val_0_unsup_loss_numpy: 0.32256999611854553|  0:01:32s
epoch 4  | loss: 0.38644 | val_0_unsup_loss_numpy: 0.2819499969482422|  0:01:56s
epoch 5  | loss: 0.33843 | val_0_unsup_loss_numpy: 0.2743400037288666|  0:02:19s
epoch 6  | loss: 0.31698 | val_0_unsup_loss_numpy: 0.27160000801086426|  0:02:43s
epoch 7  | loss: 0.28844 | val_0_unsup_loss_numpy: 0.2817099988460541|  0:03:06s
epoch 8  | loss: 0.27667 | val_0_unsup_loss_numpy: 0.28543999791145325|  0:03:30s
epoch 9  | loss: 0.2514  | val_0_unsup_loss_numpy: 0.31718000769615173|  0:03:53s
epoch 10 | loss: 0.2515  | val_0_unsup_loss_numpy: 0.31922000646591187|  0:04:17s

Early stopping occurred at epoch 10 with best_epoch = 6 and best_val_0_unsup_loss_numpy = 0.27160000801



epoch 0  | loss: 0.18932 | val_0_matthews: 0.97031 |  0:00:20s
epoch 1  | loss: 0.05311 | val_0_matthews: 0.97771 |  0:00:41s
epoch 2  | loss: 0.04784 | val_0_matthews: 0.97986 |  0:01:02s
epoch 3  | loss: 0.04563 | val_0_matthews: 0.98031 |  0:01:23s
epoch 4  | loss: 0.04418 | val_0_matthews: 0.98172 |  0:01:44s
epoch 5  | loss: 0.04254 | val_0_matthews: 0.98247 |  0:02:05s
epoch 6  | loss: 0.04158 | val_0_matthews: 0.98285 |  0:02:26s
epoch 7  | loss: 0.04098 | val_0_matthews: 0.98311 |  0:02:46s
epoch 8  | loss: 0.04041 | val_0_matthews: 0.98303 |  0:03:07s
epoch 9  | loss: 0.04005 | val_0_matthews: 0.98327 |  0:03:27s
epoch 10 | loss: 0.03961 | val_0_matthews: 0.9832  |  0:03:48s
epoch 11 | loss: 0.03984 | val_0_matthews: 0.98333 |  0:04:09s
epoch 12 | loss: 0.03921 | val_0_matthews: 0.98376 |  0:04:29s
epoch 13 | loss: 0.03885 | val_0_matthews: 0.98387 |  0:04:50s
epoch 14 | loss: 0.03855 | val_0_matthews: 0.98392 |  0:05:11s
epoch 15 | loss: 0.03828 | val_0_matthews: 0.98361 |  0

[I 2024-08-27 05:52:32,553] Trial 14 finished with value: 0.9839200377464294 and parameters: {'MASK_TYPE': 'sparsemax', 'LR': 0.03020337936732595, 'PRETRAIN_RATIO': 0.25579773143272894, 'N_D': 48, 'N_STEPS': 5, 'GAMMA': 1.1254722608882584, 'N_INDEPENDENT': 4, 'N_SHARED': 5, 'BATCH_SIZE': 28672, 'VIRT_BATCH_SIZE': 2048}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 1.98079 | val_0_unsup_loss_numpy: 0.994949996471405|  0:00:39s
epoch 1  | loss: 0.9215  | val_0_unsup_loss_numpy: 0.9115099906921387|  0:01:18s
epoch 2  | loss: 0.8723  | val_0_unsup_loss_numpy: 0.7656000256538391|  0:01:57s
epoch 3  | loss: 0.82822 | val_0_unsup_loss_numpy: 0.6930199861526489|  0:02:36s
epoch 4  | loss: 0.78736 | val_0_unsup_loss_numpy: 0.6645399928092957|  0:03:15s
epoch 5  | loss: 0.75671 | val_0_unsup_loss_numpy: 0.6672000288963318|  0:03:54s
epoch 6  | loss: 0.7307  | val_0_unsup_loss_numpy: 0.6423100233078003|  0:04:33s
epoch 7  | loss: 0.71351 | val_0_unsup_loss_numpy: 0.6371099948883057|  0:05:12s
epoch 8  | loss: 0.6991  | val_0_unsup_loss_numpy: 0.626579999923706|  0:05:51s
epoch 9  | loss: 0.68629 | val_0_unsup_loss_numpy: 0.6345199942588806|  0:06:30s
epoch 10 | loss: 0.67342 | val_0_unsup_loss_numpy: 0.6355699896812439|  0:07:09s
epoch 11 | loss: 0.66514 | val_0_unsup_loss_numpy: 0.6053500175476074|  0:07:49s
epoch 12 | loss: 0.6573  | val



epoch 0  | loss: 0.11366 | val_0_matthews: 0.96235 |  0:00:32s
epoch 1  | loss: 0.06115 | val_0_matthews: 0.97373 |  0:01:04s
epoch 2  | loss: 0.05028 | val_0_matthews: 0.98025 |  0:01:37s
epoch 3  | loss: 0.04563 | val_0_matthews: 0.98131 |  0:02:10s
epoch 4  | loss: 0.04733 | val_0_matthews: 0.97999 |  0:02:42s
epoch 5  | loss: 0.04495 | val_0_matthews: 0.982   |  0:03:14s
epoch 6  | loss: 0.04266 | val_0_matthews: 0.98245 |  0:03:47s
epoch 7  | loss: 0.04193 | val_0_matthews: 0.98275 |  0:04:19s
epoch 8  | loss: 0.04149 | val_0_matthews: 0.98282 |  0:04:51s
epoch 9  | loss: 0.04103 | val_0_matthews: 0.98292 |  0:05:24s
epoch 10 | loss: 0.0406  | val_0_matthews: 0.98332 |  0:05:57s
epoch 11 | loss: 0.04026 | val_0_matthews: 0.98348 |  0:06:29s
epoch 12 | loss: 0.03991 | val_0_matthews: 0.98359 |  0:07:01s
epoch 13 | loss: 0.03968 | val_0_matthews: 0.98378 |  0:07:34s
epoch 14 | loss: 0.03953 | val_0_matthews: 0.98366 |  0:08:06s
epoch 15 | loss: 0.03934 | val_0_matthews: 0.98359 |  0

[I 2024-08-27 06:29:13,342] Trial 15 finished with value: 0.9840121269226074 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.016702256181798356, 'PRETRAIN_RATIO': 0.7999868571697746, 'N_D': 32, 'N_STEPS': 4, 'GAMMA': 1.64827666817602, 'N_INDEPENDENT': 3, 'N_SHARED': 3, 'BATCH_SIZE': 16384, 'VIRT_BATCH_SIZE': 512}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 9.23465 | val_0_unsup_loss_numpy: 0.8072999715805054|  0:00:29s
epoch 1  | loss: 0.71453 | val_0_unsup_loss_numpy: 0.542930006980896|  0:00:59s
epoch 2  | loss: 0.53268 | val_0_unsup_loss_numpy: 0.39699000120162964|  0:01:29s
epoch 3  | loss: 0.42271 | val_0_unsup_loss_numpy: 0.29596999287605286|  0:01:59s
epoch 4  | loss: 0.35228 | val_0_unsup_loss_numpy: 0.26221001148223877|  0:02:29s
epoch 5  | loss: 0.3033  | val_0_unsup_loss_numpy: 0.22846999764442444|  0:02:59s
epoch 6  | loss: 0.2632  | val_0_unsup_loss_numpy: 0.20924000442028046|  0:03:29s
epoch 7  | loss: 0.23299 | val_0_unsup_loss_numpy: 0.1987600028514862|  0:03:59s
epoch 8  | loss: 0.20974 | val_0_unsup_loss_numpy: 0.19131000339984894|  0:04:29s
epoch 9  | loss: 0.19598 | val_0_unsup_loss_numpy: 0.18630999326705933|  0:04:59s
epoch 10 | loss: 0.17608 | val_0_unsup_loss_numpy: 0.18352000415325165|  0:05:29s
epoch 11 | loss: 0.17861 | val_0_unsup_loss_numpy: 0.19267000257968903|  0:05:59s
epoch 12 | loss: 0.1



epoch 0  | loss: 0.09371 | val_0_matthews: 0.98021 |  0:00:26s
epoch 1  | loss: 0.04438 | val_0_matthews: 0.98243 |  0:00:53s
epoch 2  | loss: 0.0426  | val_0_matthews: 0.98295 |  0:01:19s
epoch 3  | loss: 0.04126 | val_0_matthews: 0.98362 |  0:01:46s
epoch 4  | loss: 0.04069 | val_0_matthews: 0.98339 |  0:02:12s
epoch 5  | loss: 0.04007 | val_0_matthews: 0.98396 |  0:02:39s
epoch 6  | loss: 0.03975 | val_0_matthews: 0.98397 |  0:03:05s
epoch 7  | loss: 0.03941 | val_0_matthews: 0.984   |  0:03:31s
epoch 8  | loss: 0.03908 | val_0_matthews: 0.98401 |  0:03:58s
epoch 9  | loss: 0.03902 | val_0_matthews: 0.98401 |  0:04:25s
epoch 10 | loss: 0.03905 | val_0_matthews: 0.98396 |  0:04:51s
epoch 11 | loss: 0.03866 | val_0_matthews: 0.98422 |  0:05:18s
epoch 12 | loss: 0.03823 | val_0_matthews: 0.98423 |  0:05:44s
epoch 13 | loss: 0.03809 | val_0_matthews: 0.98404 |  0:06:11s
epoch 14 | loss: 0.03807 | val_0_matthews: 0.98427 |  0:06:37s
epoch 15 | loss: 0.03794 | val_0_matthews: 0.98421 |  0

[I 2024-08-27 06:56:52,872] Trial 16 finished with value: 0.9843766093254089 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.006194136925369498, 'PRETRAIN_RATIO': 0.33362877660988904, 'N_D': 64, 'N_STEPS': 6, 'GAMMA': 1.3807046932303289, 'N_INDEPENDENT': 5, 'N_SHARED': 4, 'BATCH_SIZE': 24576, 'VIRT_BATCH_SIZE': 1536}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 8.00532 | val_0_unsup_loss_numpy: 0.862309992313385|  0:00:29s
epoch 1  | loss: 0.78945 | val_0_unsup_loss_numpy: 0.6297000050544739|  0:00:58s
epoch 2  | loss: 0.67418 | val_0_unsup_loss_numpy: 0.521340012550354|  0:01:27s
epoch 3  | loss: 0.59338 | val_0_unsup_loss_numpy: 0.44604000449180603|  0:01:56s
epoch 4  | loss: 0.5322  | val_0_unsup_loss_numpy: 0.3994300067424774|  0:02:25s
epoch 5  | loss: 0.48579 | val_0_unsup_loss_numpy: 0.37817999720573425|  0:02:55s
epoch 6  | loss: 0.44691 | val_0_unsup_loss_numpy: 0.3656199872493744|  0:03:24s
epoch 7  | loss: 0.4157  | val_0_unsup_loss_numpy: 0.35229000449180603|  0:03:53s
epoch 8  | loss: 0.38778 | val_0_unsup_loss_numpy: 0.3381800055503845|  0:04:22s
epoch 9  | loss: 0.36287 | val_0_unsup_loss_numpy: 0.3262600004673004|  0:04:52s
epoch 10 | loss: 0.34287 | val_0_unsup_loss_numpy: 0.31325000524520874|  0:05:21s
epoch 11 | loss: 0.3248  | val_0_unsup_loss_numpy: 0.30542001128196716|  0:05:50s
epoch 12 | loss: 0.30982 



epoch 0  | loss: 0.11895 | val_0_matthews: 0.97862 |  0:00:25s
epoch 1  | loss: 0.0469  | val_0_matthews: 0.98021 |  0:00:50s
epoch 2  | loss: 0.04411 | val_0_matthews: 0.98171 |  0:01:15s
epoch 3  | loss: 0.04297 | val_0_matthews: 0.98218 |  0:01:40s
epoch 4  | loss: 0.04317 | val_0_matthews: 0.98128 |  0:02:05s
epoch 5  | loss: 0.04294 | val_0_matthews: 0.98244 |  0:02:30s
epoch 6  | loss: 0.04166 | val_0_matthews: 0.98272 |  0:02:55s
epoch 7  | loss: 0.04096 | val_0_matthews: 0.98295 |  0:03:20s
epoch 8  | loss: 0.04041 | val_0_matthews: 0.98323 |  0:03:45s
epoch 9  | loss: 0.04021 | val_0_matthews: 0.98341 |  0:04:11s
epoch 10 | loss: 0.03986 | val_0_matthews: 0.98336 |  0:04:36s
epoch 11 | loss: 0.03973 | val_0_matthews: 0.98335 |  0:05:01s
epoch 12 | loss: 0.03954 | val_0_matthews: 0.98358 |  0:05:25s
epoch 13 | loss: 0.03926 | val_0_matthews: 0.98359 |  0:05:50s
epoch 14 | loss: 0.0391  | val_0_matthews: 0.98374 |  0:06:15s
epoch 15 | loss: 0.03894 | val_0_matthews: 0.98376 |  0

[I 2024-08-27 07:32:49,789] Trial 17 finished with value: 0.9840087890625 and parameters: {'MASK_TYPE': 'sparsemax', 'LR': 0.003180533161346018, 'PRETRAIN_RATIO': 0.3297562337226995, 'N_D': 48, 'N_STEPS': 6, 'GAMMA': 1.0972649445534441, 'N_INDEPENDENT': 5, 'N_SHARED': 2, 'BATCH_SIZE': 16384, 'VIRT_BATCH_SIZE': 1536}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 11.62206| val_0_unsup_loss_numpy: 0.9739400148391724|  0:00:30s
epoch 1  | loss: 0.92402 | val_0_unsup_loss_numpy: 0.8657600283622742|  0:01:00s
epoch 2  | loss: 0.85179 | val_0_unsup_loss_numpy: 0.8179000020027161|  0:01:29s
epoch 3  | loss: 0.79086 | val_0_unsup_loss_numpy: 0.7864099740982056|  0:02:00s
epoch 4  | loss: 0.73529 | val_0_unsup_loss_numpy: 0.6825000047683716|  0:02:30s
epoch 5  | loss: 0.68799 | val_0_unsup_loss_numpy: 0.6933900117874146|  0:03:00s
epoch 6  | loss: 0.64596 | val_0_unsup_loss_numpy: 0.6715099811553955|  0:03:30s
epoch 7  | loss: 0.61394 | val_0_unsup_loss_numpy: 0.5893300175666809|  0:04:00s
epoch 8  | loss: 0.5861  | val_0_unsup_loss_numpy: 0.5406100153923035|  0:04:30s
epoch 9  | loss: 0.56593 | val_0_unsup_loss_numpy: 0.5891100168228149|  0:05:00s
epoch 10 | loss: 0.54475 | val_0_unsup_loss_numpy: 0.5513899922370911|  0:05:30s
epoch 11 | loss: 0.53533 | val_0_unsup_loss_numpy: 0.5340499877929688|  0:06:01s
epoch 12 | loss: 0.51769 | v



epoch 0  | loss: 0.15094 | val_0_matthews: 0.96653 |  0:00:25s
epoch 1  | loss: 0.05742 | val_0_matthews: 0.97486 |  0:00:51s
epoch 2  | loss: 0.05723 | val_0_matthews: 0.97328 |  0:01:18s
epoch 3  | loss: 0.05805 | val_0_matthews: 0.97516 |  0:01:44s
epoch 4  | loss: 0.05528 | val_0_matthews: 0.97681 |  0:02:10s
epoch 5  | loss: 0.05172 | val_0_matthews: 0.97734 |  0:02:37s
epoch 6  | loss: 0.04934 | val_0_matthews: 0.97832 |  0:03:03s
epoch 7  | loss: 0.04788 | val_0_matthews: 0.97907 |  0:03:29s
epoch 8  | loss: 0.0465  | val_0_matthews: 0.9795  |  0:03:55s
epoch 9  | loss: 0.04642 | val_0_matthews: 0.97938 |  0:04:21s
epoch 10 | loss: 0.04631 | val_0_matthews: 0.98015 |  0:04:47s
epoch 11 | loss: 0.04465 | val_0_matthews: 0.97931 |  0:05:13s
epoch 12 | loss: 0.04449 | val_0_matthews: 0.98103 |  0:05:39s
epoch 13 | loss: 0.04326 | val_0_matthews: 0.98123 |  0:06:05s
epoch 14 | loss: 0.04403 | val_0_matthews: 0.97932 |  0:06:32s
epoch 15 | loss: 0.04529 | val_0_matthews: 0.98112 |  0

[I 2024-08-27 08:08:26,271] Trial 18 finished with value: 0.9820302128791809 and parameters: {'MASK_TYPE': 'sparsemax', 'LR': 0.002587392612860012, 'PRETRAIN_RATIO': 0.35164311333848935, 'N_D': 64, 'N_STEPS': 6, 'GAMMA': 1.3476519687005202, 'N_INDEPENDENT': 5, 'N_SHARED': 3, 'BATCH_SIZE': 20480, 'VIRT_BATCH_SIZE': 1536}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 5.50048 | val_0_unsup_loss_numpy: 0.5698999762535095|  0:00:32s
epoch 1  | loss: 0.49871 | val_0_unsup_loss_numpy: 0.4354200065135956|  0:01:04s
epoch 2  | loss: 0.37099 | val_0_unsup_loss_numpy: 0.38791000843048096|  0:01:37s
epoch 3  | loss: 0.30307 | val_0_unsup_loss_numpy: 0.35683000087738037|  0:02:09s
epoch 4  | loss: 0.2547  | val_0_unsup_loss_numpy: 0.3292900025844574|  0:02:41s
epoch 5  | loss: 0.22038 | val_0_unsup_loss_numpy: 0.31845998764038086|  0:03:14s
epoch 6  | loss: 0.19443 | val_0_unsup_loss_numpy: 0.31453999876976013|  0:03:46s
epoch 7  | loss: 0.17399 | val_0_unsup_loss_numpy: 0.3158999979496002|  0:04:19s
epoch 8  | loss: 0.15836 | val_0_unsup_loss_numpy: 0.31641000509262085|  0:04:51s
epoch 9  | loss: 0.14737 | val_0_unsup_loss_numpy: 0.32106998562812805|  0:05:24s
epoch 10 | loss: 0.13627 | val_0_unsup_loss_numpy: 0.3256399929523468|  0:05:57s

Early stopping occurred at epoch 10 with best_epoch = 6 and best_val_0_unsup_loss_numpy = 0.3145399987



epoch 0  | loss: 0.09526 | val_0_matthews: 0.98048 |  0:00:28s
epoch 1  | loss: 0.04516 | val_0_matthews: 0.9824  |  0:00:56s
epoch 2  | loss: 0.04281 | val_0_matthews: 0.98312 |  0:01:25s
epoch 3  | loss: 0.04304 | val_0_matthews: 0.98216 |  0:01:53s
epoch 4  | loss: 0.04227 | val_0_matthews: 0.98335 |  0:02:21s
epoch 5  | loss: 0.04132 | val_0_matthews: 0.98285 |  0:02:49s
epoch 6  | loss: 0.04096 | val_0_matthews: 0.98382 |  0:03:19s
epoch 7  | loss: 0.04048 | val_0_matthews: 0.98375 |  0:03:48s
epoch 8  | loss: 0.03981 | val_0_matthews: 0.98386 |  0:04:16s
epoch 9  | loss: 0.0395  | val_0_matthews: 0.98405 |  0:04:45s
epoch 10 | loss: 0.03918 | val_0_matthews: 0.98399 |  0:05:13s
epoch 11 | loss: 0.03912 | val_0_matthews: 0.98404 |  0:05:41s
epoch 12 | loss: 0.03887 | val_0_matthews: 0.98409 |  0:06:09s
epoch 13 | loss: 0.03865 | val_0_matthews: 0.98418 |  0:06:37s
epoch 14 | loss: 0.03855 | val_0_matthews: 0.98414 |  0:07:06s
epoch 15 | loss: 0.03839 | val_0_matthews: 0.98418 |  0

[I 2024-08-27 08:50:09,481] Trial 19 finished with value: 0.9842379093170166 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.005628031527250257, 'PRETRAIN_RATIO': 0.2331451385076096, 'N_D': 48, 'N_STEPS': 6, 'GAMMA': 1.2184648236988673, 'N_INDEPENDENT': 3, 'N_SHARED': 5, 'BATCH_SIZE': 12288, 'VIRT_BATCH_SIZE': 2048}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 11.75081| val_0_unsup_loss_numpy: 0.9845399856567383|  0:00:34s
epoch 1  | loss: 0.91169 | val_0_unsup_loss_numpy: 0.79721999168396|  0:01:07s
epoch 2  | loss: 0.78269 | val_0_unsup_loss_numpy: 0.6720600128173828|  0:01:41s
epoch 3  | loss: 0.69418 | val_0_unsup_loss_numpy: 0.6045899987220764|  0:02:15s
epoch 4  | loss: 0.63268 | val_0_unsup_loss_numpy: 0.5675600171089172|  0:02:49s
epoch 5  | loss: 0.58798 | val_0_unsup_loss_numpy: 0.5561599731445312|  0:03:23s
epoch 6  | loss: 0.55497 | val_0_unsup_loss_numpy: 0.5737000107765198|  0:03:57s
epoch 7  | loss: 0.52715 | val_0_unsup_loss_numpy: 0.6079999804496765|  0:04:31s
epoch 8  | loss: 0.51073 | val_0_unsup_loss_numpy: 0.6695299744606018|  0:05:05s
epoch 9  | loss: 0.49011 | val_0_unsup_loss_numpy: 0.7868800163269043|  0:05:39s

Early stopping occurred at epoch 9 with best_epoch = 5 and best_val_0_unsup_loss_numpy = 0.5561599731445312




epoch 0  | loss: 0.18006 | val_0_matthews: 0.96785 |  0:00:29s
epoch 1  | loss: 0.0536  | val_0_matthews: 0.97715 |  0:00:58s
epoch 2  | loss: 0.04891 | val_0_matthews: 0.9788  |  0:01:28s
epoch 3  | loss: 0.04693 | val_0_matthews: 0.97994 |  0:01:58s
epoch 4  | loss: 0.04592 | val_0_matthews: 0.98066 |  0:02:28s
epoch 5  | loss: 0.04543 | val_0_matthews: 0.9807  |  0:02:58s
epoch 6  | loss: 0.04436 | val_0_matthews: 0.98118 |  0:03:27s
epoch 7  | loss: 0.04399 | val_0_matthews: 0.98055 |  0:03:57s
epoch 8  | loss: 0.04488 | val_0_matthews: 0.98143 |  0:04:26s
epoch 9  | loss: 0.04331 | val_0_matthews: 0.98161 |  0:04:56s
epoch 10 | loss: 0.04447 | val_0_matthews: 0.98135 |  0:05:26s
epoch 11 | loss: 0.04334 | val_0_matthews: 0.98208 |  0:05:55s
epoch 12 | loss: 0.04211 | val_0_matthews: 0.98183 |  0:06:25s
epoch 13 | loss: 0.04147 | val_0_matthews: 0.98258 |  0:06:55s
epoch 14 | loss: 0.04112 | val_0_matthews: 0.98243 |  0:07:24s
epoch 15 | loss: 0.04156 | val_0_matthews: 0.98012 |  0

[I 2024-08-27 09:21:34,340] Trial 20 finished with value: 0.9834973216056824 and parameters: {'MASK_TYPE': 'sparsemax', 'LR': 0.002886713387191909, 'PRETRAIN_RATIO': 0.29575047788146064, 'N_D': 32, 'N_STEPS': 5, 'GAMMA': 1.0775137388645561, 'N_INDEPENDENT': 5, 'N_SHARED': 4, 'BATCH_SIZE': 24576, 'VIRT_BATCH_SIZE': 1024}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 4.83658 | val_0_unsup_loss_numpy: 0.9282799959182739|  0:00:20s
epoch 1  | loss: 0.82694 | val_0_unsup_loss_numpy: 0.7345899939537048|  0:00:40s
epoch 2  | loss: 0.70008 | val_0_unsup_loss_numpy: 0.585349977016449|  0:01:01s
epoch 3  | loss: 0.60786 | val_0_unsup_loss_numpy: 0.4814099967479706|  0:01:21s
epoch 4  | loss: 0.53784 | val_0_unsup_loss_numpy: 0.42239001393318176|  0:01:42s
epoch 5  | loss: 0.49509 | val_0_unsup_loss_numpy: 0.3740299940109253|  0:02:03s
epoch 6  | loss: 0.44296 | val_0_unsup_loss_numpy: 0.35749998688697815|  0:02:24s
epoch 7  | loss: 0.40546 | val_0_unsup_loss_numpy: 0.3252499997615814|  0:02:45s
epoch 8  | loss: 0.37231 | val_0_unsup_loss_numpy: 0.3297500014305115|  0:03:06s
epoch 9  | loss: 0.34095 | val_0_unsup_loss_numpy: 0.3359000086784363|  0:03:28s
epoch 10 | loss: 0.31434 | val_0_unsup_loss_numpy: 0.31637999415397644|  0:03:48s
epoch 11 | loss: 0.29246 | val_0_unsup_loss_numpy: 0.32308998703956604|  0:04:09s
epoch 12 | loss: 0.28176 



epoch 0  | loss: 0.13996 | val_0_matthews: 0.97133 |  0:00:18s
epoch 1  | loss: 0.04806 | val_0_matthews: 0.97998 |  0:00:37s
epoch 2  | loss: 0.04417 | val_0_matthews: 0.98156 |  0:00:55s
epoch 3  | loss: 0.0432  | val_0_matthews: 0.98214 |  0:01:13s
epoch 4  | loss: 0.0419  | val_0_matthews: 0.98243 |  0:01:31s
epoch 5  | loss: 0.04127 | val_0_matthews: 0.98281 |  0:01:49s
epoch 6  | loss: 0.04079 | val_0_matthews: 0.98311 |  0:02:07s
epoch 7  | loss: 0.04027 | val_0_matthews: 0.98333 |  0:02:26s
epoch 8  | loss: 0.04011 | val_0_matthews: 0.98348 |  0:02:44s
epoch 9  | loss: 0.03989 | val_0_matthews: 0.98329 |  0:03:03s
epoch 10 | loss: 0.03961 | val_0_matthews: 0.98359 |  0:03:21s
epoch 11 | loss: 0.03923 | val_0_matthews: 0.98356 |  0:03:40s
epoch 12 | loss: 0.03916 | val_0_matthews: 0.98365 |  0:03:59s
epoch 13 | loss: 0.03891 | val_0_matthews: 0.98358 |  0:04:17s
epoch 14 | loss: 0.0386  | val_0_matthews: 0.98351 |  0:04:36s
epoch 15 | loss: 0.03828 | val_0_matthews: 0.98366 |  0

[I 2024-08-27 09:38:55,848] Trial 21 finished with value: 0.9839977622032166 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.008137908306042444, 'PRETRAIN_RATIO': 0.41627064312560985, 'N_D': 64, 'N_STEPS': 4, 'GAMMA': 1.5464956271137225, 'N_INDEPENDENT': 4, 'N_SHARED': 4, 'BATCH_SIZE': 28672, 'VIRT_BATCH_SIZE': 1536}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 2.76027 | val_0_unsup_loss_numpy: 0.8467100262641907|  0:00:28s
epoch 1  | loss: 0.76409 | val_0_unsup_loss_numpy: 0.7119200229644775|  0:00:56s
epoch 2  | loss: 0.65553 | val_0_unsup_loss_numpy: 0.5343300104141235|  0:01:24s
epoch 3  | loss: 0.56711 | val_0_unsup_loss_numpy: 0.3971099853515625|  0:01:53s
epoch 4  | loss: 0.49105 | val_0_unsup_loss_numpy: 0.34446999430656433|  0:02:21s
epoch 5  | loss: 0.43402 | val_0_unsup_loss_numpy: 0.29377999901771545|  0:02:49s
epoch 6  | loss: 0.38014 | val_0_unsup_loss_numpy: 0.25718000531196594|  0:03:18s
epoch 7  | loss: 0.33492 | val_0_unsup_loss_numpy: 0.24458999931812286|  0:03:46s
epoch 8  | loss: 0.30089 | val_0_unsup_loss_numpy: 0.2597399950027466|  0:04:15s
epoch 9  | loss: 0.27748 | val_0_unsup_loss_numpy: 0.24186000227928162|  0:04:43s
epoch 10 | loss: 0.25017 | val_0_unsup_loss_numpy: 0.25409001111984253|  0:05:11s
epoch 11 | loss: 0.23177 | val_0_unsup_loss_numpy: 0.24449999630451202|  0:05:39s
epoch 12 | loss: 0.21



epoch 0  | loss: 0.24625 | val_0_matthews: 0.97037 |  0:00:24s
epoch 1  | loss: 0.04512 | val_0_matthews: 0.98065 |  0:00:49s
epoch 2  | loss: 0.04253 | val_0_matthews: 0.98241 |  0:01:13s
epoch 3  | loss: 0.04128 | val_0_matthews: 0.98287 |  0:01:37s
epoch 4  | loss: 0.04023 | val_0_matthews: 0.98319 |  0:02:02s
epoch 5  | loss: 0.03955 | val_0_matthews: 0.98379 |  0:02:26s
epoch 6  | loss: 0.03941 | val_0_matthews: 0.98369 |  0:02:51s
epoch 7  | loss: 0.03884 | val_0_matthews: 0.98393 |  0:03:16s
epoch 8  | loss: 0.03868 | val_0_matthews: 0.98338 |  0:03:40s
epoch 9  | loss: 0.03875 | val_0_matthews: 0.98412 |  0:04:04s
epoch 10 | loss: 0.03809 | val_0_matthews: 0.98419 |  0:04:29s
epoch 11 | loss: 0.03778 | val_0_matthews: 0.98401 |  0:04:54s
epoch 12 | loss: 0.03768 | val_0_matthews: 0.98397 |  0:05:18s
epoch 13 | loss: 0.03803 | val_0_matthews: 0.98412 |  0:05:42s
epoch 14 | loss: 0.03749 | val_0_matthews: 0.9841  |  0:06:07s

Early stopping occurred at epoch 14 with best_epoch = 

[I 2024-08-27 10:00:28,955] Trial 22 finished with value: 0.9841939210891724 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.04213174040038784, 'PRETRAIN_RATIO': 0.5599231252169645, 'N_D': 64, 'N_STEPS': 5, 'GAMMA': 1.7757822438304518, 'N_INDEPENDENT': 4, 'N_SHARED': 3, 'BATCH_SIZE': 28672, 'VIRT_BATCH_SIZE': 1024}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 2.99235 | val_0_unsup_loss_numpy: 0.733210027217865|  0:00:47s
epoch 1  | loss: 0.58387 | val_0_unsup_loss_numpy: 0.44315001368522644|  0:01:35s
epoch 2  | loss: 0.40865 | val_0_unsup_loss_numpy: 0.2972399890422821|  0:02:23s
epoch 3  | loss: 0.30066 | val_0_unsup_loss_numpy: 0.24461999535560608|  0:03:11s
epoch 4  | loss: 0.23616 | val_0_unsup_loss_numpy: 0.23799000680446625|  0:03:59s
epoch 5  | loss: 0.1931  | val_0_unsup_loss_numpy: 0.25380998849868774|  0:04:47s
epoch 6  | loss: 0.16319 | val_0_unsup_loss_numpy: 0.2863300144672394|  0:05:35s
epoch 7  | loss: 0.1405  | val_0_unsup_loss_numpy: 0.3037300109863281|  0:06:24s
epoch 8  | loss: 0.12309 | val_0_unsup_loss_numpy: 0.302480012178421|  0:07:12s

Early stopping occurred at epoch 8 with best_epoch = 4 and best_val_0_unsup_loss_numpy = 0.23799000680446625




epoch 0  | loss: 0.10462 | val_0_matthews: 0.98095 |  0:00:41s
epoch 1  | loss: 0.0444  | val_0_matthews: 0.98242 |  0:01:23s
epoch 2  | loss: 0.04269 | val_0_matthews: 0.98291 |  0:02:06s
epoch 3  | loss: 0.04174 | val_0_matthews: 0.98348 |  0:02:48s
epoch 4  | loss: 0.04104 | val_0_matthews: 0.98364 |  0:03:30s
epoch 5  | loss: 0.0405  | val_0_matthews: 0.98367 |  0:04:12s
epoch 6  | loss: 0.04016 | val_0_matthews: 0.98367 |  0:04:54s
epoch 7  | loss: 0.03986 | val_0_matthews: 0.98393 |  0:05:36s
epoch 8  | loss: 0.03958 | val_0_matthews: 0.98394 |  0:06:18s
epoch 9  | loss: 0.03929 | val_0_matthews: 0.98394 |  0:07:00s
epoch 10 | loss: 0.03904 | val_0_matthews: 0.98395 |  0:07:42s
epoch 11 | loss: 0.03891 | val_0_matthews: 0.98402 |  0:08:24s
epoch 12 | loss: 0.0387  | val_0_matthews: 0.98405 |  0:09:06s
epoch 13 | loss: 0.03848 | val_0_matthews: 0.98403 |  0:09:49s
epoch 14 | loss: 0.03832 | val_0_matthews: 0.98409 |  0:10:31s
epoch 15 | loss: 0.03817 | val_0_matthews: 0.98403 |  0

[I 2024-08-27 10:27:07,193] Trial 23 finished with value: 0.9843036532402039 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.016447022052991624, 'PRETRAIN_RATIO': 0.2636290023151164, 'N_D': 64, 'N_STEPS': 4, 'GAMMA': 1.390536856512239, 'N_INDEPENDENT': 5, 'N_SHARED': 4, 'BATCH_SIZE': 24576, 'VIRT_BATCH_SIZE': 512}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 2.88988 | val_0_unsup_loss_numpy: 0.7562800049781799|  0:00:37s
epoch 1  | loss: 0.68051 | val_0_unsup_loss_numpy: 0.6021299958229065|  0:01:15s
epoch 2  | loss: 0.55001 | val_0_unsup_loss_numpy: 0.46070998907089233|  0:01:52s
epoch 3  | loss: 0.47704 | val_0_unsup_loss_numpy: 0.3342300057411194|  0:02:30s
epoch 4  | loss: 0.4023  | val_0_unsup_loss_numpy: 0.3099200129508972|  0:03:08s
epoch 5  | loss: 0.36104 | val_0_unsup_loss_numpy: 0.3105100095272064|  0:03:46s
epoch 6  | loss: 0.33349 | val_0_unsup_loss_numpy: 0.324290007352829|  0:04:24s
epoch 7  | loss: 0.30589 | val_0_unsup_loss_numpy: 0.32592999935150146|  0:05:02s
epoch 8  | loss: 0.28655 | val_0_unsup_loss_numpy: 0.33652999997138977|  0:05:40s

Early stopping occurred at epoch 8 with best_epoch = 4 and best_val_0_unsup_loss_numpy = 0.3099200129508972




epoch 0  | loss: 0.24378 | val_0_matthews: 0.97412 |  0:00:33s
epoch 1  | loss: 0.04693 | val_0_matthews: 0.98099 |  0:01:06s
epoch 2  | loss: 0.04268 | val_0_matthews: 0.9823  |  0:01:39s
epoch 3  | loss: 0.04125 | val_0_matthews: 0.9826  |  0:02:12s
epoch 4  | loss: 0.04    | val_0_matthews: 0.98329 |  0:02:45s
epoch 5  | loss: 0.03951 | val_0_matthews: 0.98333 |  0:03:18s
epoch 6  | loss: 0.03939 | val_0_matthews: 0.98342 |  0:03:52s
epoch 7  | loss: 0.03867 | val_0_matthews: 0.98387 |  0:04:25s
epoch 8  | loss: 0.0382  | val_0_matthews: 0.98374 |  0:04:58s
epoch 9  | loss: 0.03784 | val_0_matthews: 0.98387 |  0:05:31s
epoch 10 | loss: 0.03757 | val_0_matthews: 0.98405 |  0:06:04s
epoch 11 | loss: 0.03748 | val_0_matthews: 0.98381 |  0:06:37s
epoch 12 | loss: 0.03727 | val_0_matthews: 0.9839  |  0:07:11s
epoch 13 | loss: 0.03726 | val_0_matthews: 0.98415 |  0:07:44s
epoch 14 | loss: 0.03691 | val_0_matthews: 0.98402 |  0:08:17s
epoch 15 | loss: 0.03685 | val_0_matthews: 0.98406 |  0

[I 2024-08-27 10:49:23,749] Trial 24 finished with value: 0.9841451644897461 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.035928624802915046, 'PRETRAIN_RATIO': 0.3156414411687494, 'N_D': 48, 'N_STEPS': 6, 'GAMMA': 1.5195229376764352, 'N_INDEPENDENT': 4, 'N_SHARED': 5, 'BATCH_SIZE': 28672, 'VIRT_BATCH_SIZE': 1024}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 2.10735 | val_0_unsup_loss_numpy: 0.7707599997520447|  0:00:28s
epoch 1  | loss: 0.57459 | val_0_unsup_loss_numpy: 0.47446998953819275|  0:00:56s
epoch 2  | loss: 0.43367 | val_0_unsup_loss_numpy: 0.28696000576019287|  0:01:24s
epoch 3  | loss: 0.35658 | val_0_unsup_loss_numpy: 0.23989999294281006|  0:01:53s
epoch 4  | loss: 0.29232 | val_0_unsup_loss_numpy: 0.20919999480247498|  0:02:20s
epoch 5  | loss: 0.29192 | val_0_unsup_loss_numpy: 0.22116999328136444|  0:02:49s
epoch 6  | loss: 0.25777 | val_0_unsup_loss_numpy: 0.17839999496936798|  0:03:17s
epoch 7  | loss: 0.2151  | val_0_unsup_loss_numpy: 0.2323800027370453|  0:03:45s
epoch 8  | loss: 0.19394 | val_0_unsup_loss_numpy: 0.16356000304222107|  0:04:13s
epoch 9  | loss: 0.17707 | val_0_unsup_loss_numpy: 0.16322000324726105|  0:04:41s
epoch 10 | loss: 0.17615 | val_0_unsup_loss_numpy: 0.1505099982023239|  0:05:09s
epoch 11 | loss: 0.15935 | val_0_unsup_loss_numpy: 0.1624699980020523|  0:05:37s
epoch 12 | loss: 0.1



epoch 0  | loss: 0.22018 | val_0_matthews: 0.97981 |  0:00:24s
epoch 1  | loss: 0.04327 | val_0_matthews: 0.98229 |  0:00:49s
epoch 2  | loss: 0.04106 | val_0_matthews: 0.98304 |  0:01:13s
epoch 3  | loss: 0.03971 | val_0_matthews: 0.98344 |  0:01:38s
epoch 4  | loss: 0.03921 | val_0_matthews: 0.9837  |  0:02:03s
epoch 5  | loss: 0.03882 | val_0_matthews: 0.98367 |  0:02:29s
epoch 6  | loss: 0.03821 | val_0_matthews: 0.98384 |  0:02:54s
epoch 7  | loss: 0.03797 | val_0_matthews: 0.98415 |  0:03:19s
epoch 8  | loss: 0.03759 | val_0_matthews: 0.98405 |  0:03:43s
epoch 9  | loss: 0.03725 | val_0_matthews: 0.98393 |  0:04:08s
epoch 10 | loss: 0.03741 | val_0_matthews: 0.98429 |  0:04:33s
epoch 11 | loss: 0.03696 | val_0_matthews: 0.98409 |  0:04:57s
epoch 12 | loss: 0.03671 | val_0_matthews: 0.9841  |  0:05:22s
epoch 13 | loss: 0.03661 | val_0_matthews: 0.98404 |  0:05:47s
epoch 14 | loss: 0.03623 | val_0_matthews: 0.9841  |  0:06:12s

Early stopping occurred at epoch 14 with best_epoch = 

[I 2024-08-27 11:11:01,435] Trial 25 finished with value: 0.9842923283576965 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.06159356759916198, 'PRETRAIN_RATIO': 0.37210990281860357, 'N_D': 64, 'N_STEPS': 5, 'GAMMA': 1.7847484731883285, 'N_INDEPENDENT': 5, 'N_SHARED': 4, 'BATCH_SIZE': 20480, 'VIRT_BATCH_SIZE': 1536}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 2.1787  | val_0_unsup_loss_numpy: 0.8519600033760071|  0:00:33s
epoch 1  | loss: 0.77526 | val_0_unsup_loss_numpy: 0.6941499710083008|  0:01:06s
epoch 2  | loss: 0.63317 | val_0_unsup_loss_numpy: 0.5088099837303162|  0:01:40s
epoch 3  | loss: 0.53492 | val_0_unsup_loss_numpy: 0.41280001401901245|  0:02:13s
epoch 4  | loss: 0.46067 | val_0_unsup_loss_numpy: 0.3429200053215027|  0:02:46s
epoch 5  | loss: 0.40096 | val_0_unsup_loss_numpy: 0.3046700060367584|  0:03:20s
epoch 6  | loss: 0.35724 | val_0_unsup_loss_numpy: 0.2782000005245209|  0:03:53s
epoch 7  | loss: 0.32411 | val_0_unsup_loss_numpy: 0.2681199908256531|  0:04:26s
epoch 8  | loss: 0.29796 | val_0_unsup_loss_numpy: 0.2576099932193756|  0:04:59s
epoch 9  | loss: 0.2803  | val_0_unsup_loss_numpy: 0.2536500096321106|  0:05:33s
epoch 10 | loss: 0.26116 | val_0_unsup_loss_numpy: 0.24539999663829803|  0:06:06s
epoch 11 | loss: 0.2445  | val_0_unsup_loss_numpy: 0.24834999442100525|  0:06:39s
epoch 12 | loss: 0.23103 



epoch 0  | loss: 0.10939 | val_0_matthews: 0.98049 |  0:00:27s
epoch 1  | loss: 0.04374 | val_0_matthews: 0.98276 |  0:00:55s
epoch 2  | loss: 0.04231 | val_0_matthews: 0.98328 |  0:01:22s
epoch 3  | loss: 0.04127 | val_0_matthews: 0.98369 |  0:01:50s
epoch 4  | loss: 0.0407  | val_0_matthews: 0.98349 |  0:02:18s
epoch 5  | loss: 0.04013 | val_0_matthews: 0.98393 |  0:02:45s
epoch 6  | loss: 0.03976 | val_0_matthews: 0.98375 |  0:03:13s
epoch 7  | loss: 0.03926 | val_0_matthews: 0.98395 |  0:03:41s
epoch 8  | loss: 0.03928 | val_0_matthews: 0.98383 |  0:04:08s
epoch 9  | loss: 0.03922 | val_0_matthews: 0.98399 |  0:04:36s
epoch 10 | loss: 0.03862 | val_0_matthews: 0.98398 |  0:05:04s
epoch 11 | loss: 0.0382  | val_0_matthews: 0.98421 |  0:05:32s
epoch 12 | loss: 0.03794 | val_0_matthews: 0.98414 |  0:06:00s
epoch 13 | loss: 0.03784 | val_0_matthews: 0.98412 |  0:06:27s
epoch 14 | loss: 0.0377  | val_0_matthews: 0.98403 |  0:06:54s
epoch 15 | loss: 0.03758 | val_0_matthews: 0.98422 |  0

[I 2024-08-27 11:36:19,108] Trial 26 finished with value: 0.9842337965965271 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.02093579859503632, 'PRETRAIN_RATIO': 0.459717202950811, 'N_D': 48, 'N_STEPS': 4, 'GAMMA': 1.4126584525314252, 'N_INDEPENDENT': 3, 'N_SHARED': 2, 'BATCH_SIZE': 24576, 'VIRT_BATCH_SIZE': 512}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 5.36365 | val_0_unsup_loss_numpy: 0.9721199870109558|  0:00:20s
epoch 1  | loss: 0.87385 | val_0_unsup_loss_numpy: 0.8472499847412109|  0:00:41s
epoch 2  | loss: 0.78632 | val_0_unsup_loss_numpy: 0.7091100215911865|  0:01:01s
epoch 3  | loss: 0.71754 | val_0_unsup_loss_numpy: 0.5979499816894531|  0:01:21s
epoch 4  | loss: 0.6548  | val_0_unsup_loss_numpy: 0.517769992351532|  0:01:42s
epoch 5  | loss: 0.61976 | val_0_unsup_loss_numpy: 0.49202001094818115|  0:02:02s
epoch 6  | loss: 0.56071 | val_0_unsup_loss_numpy: 0.42941001057624817|  0:02:22s
epoch 7  | loss: 0.52038 | val_0_unsup_loss_numpy: 0.40015000104904175|  0:02:42s
epoch 8  | loss: 0.48726 | val_0_unsup_loss_numpy: 0.3816699981689453|  0:03:03s
epoch 9  | loss: 0.46093 | val_0_unsup_loss_numpy: 0.35561999678611755|  0:03:24s
epoch 10 | loss: 0.43837 | val_0_unsup_loss_numpy: 0.34292998909950256|  0:03:45s
epoch 11 | loss: 0.42918 | val_0_unsup_loss_numpy: 0.3467099964618683|  0:04:05s
epoch 12 | loss: 0.40764



epoch 0  | loss: 0.09849 | val_0_matthews: 0.96098 |  0:00:17s
epoch 1  | loss: 0.04353 | val_0_matthews: 0.98207 |  0:00:35s
epoch 2  | loss: 0.04211 | val_0_matthews: 0.98285 |  0:00:52s
epoch 3  | loss: 0.04137 | val_0_matthews: 0.98308 |  0:01:10s
epoch 4  | loss: 0.04081 | val_0_matthews: 0.98346 |  0:01:28s
epoch 5  | loss: 0.04028 | val_0_matthews: 0.98364 |  0:01:46s
epoch 6  | loss: 0.04012 | val_0_matthews: 0.98378 |  0:02:04s
epoch 7  | loss: 0.03989 | val_0_matthews: 0.98392 |  0:02:22s
epoch 8  | loss: 0.03954 | val_0_matthews: 0.98386 |  0:02:40s
epoch 9  | loss: 0.03921 | val_0_matthews: 0.98387 |  0:02:58s
epoch 10 | loss: 0.03899 | val_0_matthews: 0.9841  |  0:03:16s
epoch 11 | loss: 0.0388  | val_0_matthews: 0.98415 |  0:03:34s
epoch 12 | loss: 0.03852 | val_0_matthews: 0.984   |  0:03:52s
epoch 13 | loss: 0.03849 | val_0_matthews: 0.98416 |  0:04:10s
epoch 14 | loss: 0.03823 | val_0_matthews: 0.98417 |  0:04:27s
epoch 15 | loss: 0.03811 | val_0_matthews: 0.9841  |  0

[I 2024-08-27 12:00:04,695] Trial 27 finished with value: 0.9844347834587097 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.009455017760220272, 'PRETRAIN_RATIO': 0.6069394203276622, 'N_D': 64, 'N_STEPS': 5, 'GAMMA': 1.5536294653496454, 'N_INDEPENDENT': 4, 'N_SHARED': 3, 'BATCH_SIZE': 28672, 'VIRT_BATCH_SIZE': 2048}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 3.60178 | val_0_unsup_loss_numpy: 0.8803799748420715|  0:00:25s
epoch 1  | loss: 0.81728 | val_0_unsup_loss_numpy: 0.7169100046157837|  0:00:52s
epoch 2  | loss: 0.69587 | val_0_unsup_loss_numpy: 0.5940999984741211|  0:01:18s
epoch 3  | loss: 0.61379 | val_0_unsup_loss_numpy: 0.5241000056266785|  0:01:44s
epoch 4  | loss: 0.56117 | val_0_unsup_loss_numpy: 0.46682000160217285|  0:02:10s
epoch 5  | loss: 0.51841 | val_0_unsup_loss_numpy: 0.4895800054073334|  0:02:36s
epoch 6  | loss: 0.48661 | val_0_unsup_loss_numpy: 0.5471199750900269|  0:03:03s
epoch 7  | loss: 0.47349 | val_0_unsup_loss_numpy: 0.4704500138759613|  0:03:29s
epoch 8  | loss: 0.43813 | val_0_unsup_loss_numpy: 0.5678600072860718|  0:03:55s

Early stopping occurred at epoch 8 with best_epoch = 4 and best_val_0_unsup_loss_numpy = 0.46682000160217285




epoch 0  | loss: 0.10925 | val_0_matthews: 0.97452 |  0:00:22s
epoch 1  | loss: 0.0501  | val_0_matthews: 0.97964 |  0:00:44s
epoch 2  | loss: 0.04612 | val_0_matthews: 0.98083 |  0:01:06s
epoch 3  | loss: 0.04459 | val_0_matthews: 0.98176 |  0:01:29s
epoch 4  | loss: 0.04316 | val_0_matthews: 0.98162 |  0:01:51s
epoch 5  | loss: 0.04223 | val_0_matthews: 0.98236 |  0:02:14s
epoch 6  | loss: 0.04169 | val_0_matthews: 0.98284 |  0:02:37s
epoch 7  | loss: 0.04076 | val_0_matthews: 0.98321 |  0:03:00s
epoch 8  | loss: 0.04022 | val_0_matthews: 0.98334 |  0:03:22s
epoch 9  | loss: 0.04    | val_0_matthews: 0.98355 |  0:03:44s
epoch 10 | loss: 0.03979 | val_0_matthews: 0.98358 |  0:04:06s
epoch 11 | loss: 0.0398  | val_0_matthews: 0.98358 |  0:04:29s
epoch 12 | loss: 0.03922 | val_0_matthews: 0.9839  |  0:04:51s
epoch 13 | loss: 0.03897 | val_0_matthews: 0.98378 |  0:05:14s
epoch 14 | loss: 0.0388  | val_0_matthews: 0.9838  |  0:05:37s
epoch 15 | loss: 0.03875 | val_0_matthews: 0.98387 |  0

[I 2024-08-27 12:19:27,842] Trial 28 finished with value: 0.9841285347938538 and parameters: {'MASK_TYPE': 'sparsemax', 'LR': 0.008921861634883652, 'PRETRAIN_RATIO': 0.6733576106932886, 'N_D': 64, 'N_STEPS': 5, 'GAMMA': 1.2098961350480093, 'N_INDEPENDENT': 5, 'N_SHARED': 3, 'BATCH_SIZE': 16384, 'VIRT_BATCH_SIZE': 2048}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 10.93691| val_0_unsup_loss_numpy: 0.9609500169754028|  0:00:23s
epoch 1  | loss: 0.85688 | val_0_unsup_loss_numpy: 0.7841399908065796|  0:00:46s
epoch 2  | loss: 0.74852 | val_0_unsup_loss_numpy: 0.6861699819564819|  0:01:10s
epoch 3  | loss: 0.69735 | val_0_unsup_loss_numpy: 0.625249981880188|  0:01:33s
epoch 4  | loss: 0.6528  | val_0_unsup_loss_numpy: 0.6212499737739563|  0:01:56s
epoch 5  | loss: 0.619   | val_0_unsup_loss_numpy: 0.5776299834251404|  0:02:19s
epoch 6  | loss: 0.58391 | val_0_unsup_loss_numpy: 0.6214100122451782|  0:02:43s
epoch 7  | loss: 0.5485  | val_0_unsup_loss_numpy: 0.655019998550415|  0:03:07s
epoch 8  | loss: 0.51588 | val_0_unsup_loss_numpy: 0.49553000926971436|  0:03:30s
epoch 9  | loss: 0.48959 | val_0_unsup_loss_numpy: 0.4390200078487396|  0:03:54s
epoch 10 | loss: 0.46017 | val_0_unsup_loss_numpy: 0.40136000514030457|  0:04:18s
epoch 11 | loss: 0.43659 | val_0_unsup_loss_numpy: 0.3949100077152252|  0:04:41s
epoch 12 | loss: 0.41229 | v



epoch 0  | loss: 0.1633  | val_0_matthews: 0.96676 |  0:00:20s
epoch 1  | loss: 0.05357 | val_0_matthews: 0.97744 |  0:00:40s
epoch 2  | loss: 0.04944 | val_0_matthews: 0.97867 |  0:01:00s
epoch 3  | loss: 0.04781 | val_0_matthews: 0.98005 |  0:01:20s
epoch 4  | loss: 0.04635 | val_0_matthews: 0.97981 |  0:01:40s
epoch 5  | loss: 0.04774 | val_0_matthews: 0.98091 |  0:01:59s
epoch 6  | loss: 0.04397 | val_0_matthews: 0.98191 |  0:02:19s
epoch 7  | loss: 0.04319 | val_0_matthews: 0.98219 |  0:02:39s
epoch 8  | loss: 0.04179 | val_0_matthews: 0.98232 |  0:02:59s
epoch 9  | loss: 0.04221 | val_0_matthews: 0.982   |  0:03:19s
epoch 10 | loss: 0.04267 | val_0_matthews: 0.98202 |  0:03:40s
epoch 11 | loss: 0.0419  | val_0_matthews: 0.98262 |  0:04:00s
epoch 12 | loss: 0.04224 | val_0_matthews: 0.98225 |  0:04:20s
epoch 13 | loss: 0.04322 | val_0_matthews: 0.98253 |  0:04:40s
epoch 14 | loss: 0.04169 | val_0_matthews: 0.98253 |  0:05:01s
epoch 15 | loss: 0.04238 | val_0_matthews: 0.98227 |  0

[I 2024-08-27 12:38:36,009] Trial 29 finished with value: 0.9826229214668274 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.004183517701398176, 'PRETRAIN_RATIO': 0.2508010046415097, 'N_D': 64, 'N_STEPS': 6, 'GAMMA': 1.5812725309054223, 'N_INDEPENDENT': 4, 'N_SHARED': 3, 'BATCH_SIZE': 28672, 'VIRT_BATCH_SIZE': 2048}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 14.0488 | val_0_unsup_loss_numpy: 1.0481699705123901|  0:00:19s
epoch 1  | loss: 0.99806 | val_0_unsup_loss_numpy: 0.9459900259971619|  0:00:39s
epoch 2  | loss: 0.94354 | val_0_unsup_loss_numpy: 0.8970800042152405|  0:00:59s
epoch 3  | loss: 0.90061 | val_0_unsup_loss_numpy: 0.8422200083732605|  0:01:19s
epoch 4  | loss: 0.85475 | val_0_unsup_loss_numpy: 0.7900699973106384|  0:01:39s
epoch 5  | loss: 0.8157  | val_0_unsup_loss_numpy: 0.7494699954986572|  0:01:58s
epoch 6  | loss: 0.78286 | val_0_unsup_loss_numpy: 0.707859992980957|  0:02:18s
epoch 7  | loss: 0.75315 | val_0_unsup_loss_numpy: 0.6827800273895264|  0:02:38s
epoch 8  | loss: 0.72526 | val_0_unsup_loss_numpy: 0.645330011844635|  0:02:57s
epoch 9  | loss: 0.69979 | val_0_unsup_loss_numpy: 0.6269299983978271|  0:03:17s
epoch 10 | loss: 0.67738 | val_0_unsup_loss_numpy: 0.6130200028419495|  0:03:37s
epoch 11 | loss: 0.65773 | val_0_unsup_loss_numpy: 0.602940022945404|  0:03:56s
epoch 12 | loss: 0.64017 | val_



epoch 0  | loss: 0.14152 | val_0_matthews: 0.95367 |  0:00:17s
epoch 1  | loss: 0.04965 | val_0_matthews: 0.97874 |  0:00:34s
epoch 2  | loss: 0.046   | val_0_matthews: 0.98072 |  0:00:51s
epoch 3  | loss: 0.0445  | val_0_matthews: 0.98131 |  0:01:07s
epoch 4  | loss: 0.04331 | val_0_matthews: 0.98207 |  0:01:24s
epoch 5  | loss: 0.04232 | val_0_matthews: 0.98253 |  0:01:41s
epoch 6  | loss: 0.04173 | val_0_matthews: 0.98275 |  0:01:58s
epoch 7  | loss: 0.04125 | val_0_matthews: 0.983   |  0:02:16s
epoch 8  | loss: 0.04087 | val_0_matthews: 0.98294 |  0:02:33s
epoch 9  | loss: 0.04053 | val_0_matthews: 0.98314 |  0:02:50s
epoch 10 | loss: 0.0402  | val_0_matthews: 0.98326 |  0:03:07s
epoch 11 | loss: 0.03993 | val_0_matthews: 0.98351 |  0:03:25s
epoch 12 | loss: 0.03971 | val_0_matthews: 0.98331 |  0:03:41s
epoch 13 | loss: 0.03942 | val_0_matthews: 0.98366 |  0:03:58s
epoch 14 | loss: 0.03918 | val_0_matthews: 0.98357 |  0:04:15s
epoch 15 | loss: 0.03906 | val_0_matthews: 0.98378 |  0

[I 2024-08-27 13:03:51,507] Trial 30 finished with value: 0.983896791934967 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.001857076340505676, 'PRETRAIN_RATIO': 0.5960392210993827, 'N_D': 48, 'N_STEPS': 5, 'GAMMA': 1.0535247050588514, 'N_INDEPENDENT': 4, 'N_SHARED': 2, 'BATCH_SIZE': 24576, 'VIRT_BATCH_SIZE': 2048}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 4.88849 | val_0_unsup_loss_numpy: 0.9936800003051758|  0:00:40s
epoch 1  | loss: 0.94307 | val_0_unsup_loss_numpy: 0.967710018157959|  0:01:20s
epoch 2  | loss: 0.89883 | val_0_unsup_loss_numpy: 0.922249972820282|  0:02:00s
epoch 3  | loss: 0.851   | val_0_unsup_loss_numpy: 0.8320599794387817|  0:02:39s
epoch 4  | loss: 0.806   | val_0_unsup_loss_numpy: 0.7554799914360046|  0:03:19s
epoch 5  | loss: 0.7674  | val_0_unsup_loss_numpy: 0.6835799813270569|  0:03:59s
epoch 6  | loss: 0.732   | val_0_unsup_loss_numpy: 0.633870005607605|  0:04:39s
epoch 7  | loss: 0.70181 | val_0_unsup_loss_numpy: 0.6256300210952759|  0:05:19s
epoch 8  | loss: 0.67506 | val_0_unsup_loss_numpy: 0.5879499912261963|  0:05:59s
epoch 9  | loss: 0.64927 | val_0_unsup_loss_numpy: 0.5675699710845947|  0:06:39s
epoch 10 | loss: 0.62871 | val_0_unsup_loss_numpy: 0.5473099946975708|  0:07:19s
epoch 11 | loss: 0.60796 | val_0_unsup_loss_numpy: 0.5477499961853027|  0:07:59s
epoch 12 | loss: 0.5895  | val_



epoch 0  | loss: 0.18368 | val_0_matthews: 0.81089 |  0:00:34s
epoch 1  | loss: 0.05306 | val_0_matthews: 0.97619 |  0:01:08s
epoch 2  | loss: 0.04782 | val_0_matthews: 0.9796  |  0:01:41s
epoch 3  | loss: 0.04528 | val_0_matthews: 0.98164 |  0:02:15s
epoch 4  | loss: 0.04433 | val_0_matthews: 0.98223 |  0:02:49s
epoch 5  | loss: 0.04272 | val_0_matthews: 0.98283 |  0:03:23s
epoch 6  | loss: 0.04191 | val_0_matthews: 0.98307 |  0:03:57s
epoch 7  | loss: 0.0412  | val_0_matthews: 0.98314 |  0:04:31s
epoch 8  | loss: 0.04073 | val_0_matthews: 0.98345 |  0:05:06s
epoch 9  | loss: 0.0404  | val_0_matthews: 0.98374 |  0:05:40s
epoch 10 | loss: 0.04008 | val_0_matthews: 0.98365 |  0:06:14s
epoch 11 | loss: 0.03979 | val_0_matthews: 0.98374 |  0:06:48s
epoch 12 | loss: 0.03946 | val_0_matthews: 0.98393 |  0:07:21s
epoch 13 | loss: 0.03922 | val_0_matthews: 0.98411 |  0:07:55s
epoch 14 | loss: 0.03913 | val_0_matthews: 0.98406 |  0:08:29s
epoch 15 | loss: 0.0389  | val_0_matthews: 0.98416 |  0

[I 2024-08-27 13:36:59,285] Trial 31 finished with value: 0.9841606020927429 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.0069086029673481285, 'PRETRAIN_RATIO': 0.6849528408268879, 'N_D': 64, 'N_STEPS': 4, 'GAMMA': 1.7123024367803872, 'N_INDEPENDENT': 3, 'N_SHARED': 4, 'BATCH_SIZE': 32768, 'VIRT_BATCH_SIZE': 512}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 5.32387 | val_0_unsup_loss_numpy: 0.9839699864387512|  0:00:23s
epoch 1  | loss: 0.91502 | val_0_unsup_loss_numpy: 0.9300199747085571|  0:00:48s
epoch 2  | loss: 0.83512 | val_0_unsup_loss_numpy: 0.8297200202941895|  0:01:12s
epoch 3  | loss: 0.76954 | val_0_unsup_loss_numpy: 0.7752299904823303|  0:01:36s
epoch 4  | loss: 0.71546 | val_0_unsup_loss_numpy: 0.6403800249099731|  0:02:00s
epoch 5  | loss: 0.67707 | val_0_unsup_loss_numpy: 0.5692800283432007|  0:02:24s
epoch 6  | loss: 0.64724 | val_0_unsup_loss_numpy: 0.5508999824523926|  0:02:48s
epoch 7  | loss: 0.62085 | val_0_unsup_loss_numpy: 0.5321900248527527|  0:03:13s
epoch 8  | loss: 0.59763 | val_0_unsup_loss_numpy: 0.5166299939155579|  0:03:37s
epoch 9  | loss: 0.57863 | val_0_unsup_loss_numpy: 0.5042399764060974|  0:04:01s
epoch 10 | loss: 0.56141 | val_0_unsup_loss_numpy: 0.5051100254058838|  0:04:25s
epoch 11 | loss: 0.57187 | val_0_unsup_loss_numpy: 0.5547500252723694|  0:04:50s
epoch 12 | loss: 0.55307 | v



epoch 0  | loss: 0.18568 | val_0_matthews: 0.92838 |  0:00:21s
epoch 1  | loss: 0.04965 | val_0_matthews: 0.97821 |  0:00:41s
epoch 2  | loss: 0.04576 | val_0_matthews: 0.98117 |  0:01:02s
epoch 3  | loss: 0.04493 | val_0_matthews: 0.98146 |  0:01:23s
epoch 4  | loss: 0.04359 | val_0_matthews: 0.98211 |  0:01:44s
epoch 5  | loss: 0.04281 | val_0_matthews: 0.98243 |  0:02:05s
epoch 6  | loss: 0.04235 | val_0_matthews: 0.98254 |  0:02:26s
epoch 7  | loss: 0.04138 | val_0_matthews: 0.983   |  0:02:48s
epoch 8  | loss: 0.04093 | val_0_matthews: 0.98271 |  0:03:09s
epoch 9  | loss: 0.04098 | val_0_matthews: 0.98288 |  0:03:30s
epoch 10 | loss: 0.04067 | val_0_matthews: 0.98354 |  0:03:51s
epoch 11 | loss: 0.04002 | val_0_matthews: 0.98348 |  0:04:13s
epoch 12 | loss: 0.03982 | val_0_matthews: 0.98351 |  0:04:34s
epoch 13 | loss: 0.03961 | val_0_matthews: 0.98409 |  0:04:54s
epoch 14 | loss: 0.03945 | val_0_matthews: 0.98361 |  0:05:15s
epoch 15 | loss: 0.03926 | val_0_matthews: 0.9838  |  0

[I 2024-08-27 14:00:49,857] Trial 32 finished with value: 0.9840894341468811 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.0109565890067268, 'PRETRAIN_RATIO': 0.7209800616011073, 'N_D': 48, 'N_STEPS': 5, 'GAMMA': 1.3390487937800193, 'N_INDEPENDENT': 5, 'N_SHARED': 3, 'BATCH_SIZE': 28672, 'VIRT_BATCH_SIZE': 1536}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 4.2393  | val_0_unsup_loss_numpy: 0.9389399886131287|  0:00:23s
epoch 1  | loss: 0.83843 | val_0_unsup_loss_numpy: 0.8856199979782104|  0:00:47s
epoch 2  | loss: 0.74906 | val_0_unsup_loss_numpy: 0.7801799774169922|  0:01:11s
epoch 3  | loss: 0.68338 | val_0_unsup_loss_numpy: 0.6786999702453613|  0:01:35s
epoch 4  | loss: 0.63212 | val_0_unsup_loss_numpy: 0.5717300176620483|  0:01:59s
epoch 5  | loss: 0.58545 | val_0_unsup_loss_numpy: 0.47661998867988586|  0:02:24s
epoch 6  | loss: 0.54912 | val_0_unsup_loss_numpy: 0.41266998648643494|  0:02:47s
epoch 7  | loss: 0.51342 | val_0_unsup_loss_numpy: 0.38655000925064087|  0:03:11s
epoch 8  | loss: 0.4774  | val_0_unsup_loss_numpy: 0.34558001160621643|  0:03:36s
epoch 9  | loss: 0.44077 | val_0_unsup_loss_numpy: 0.3303599953651428|  0:04:00s
epoch 10 | loss: 0.41297 | val_0_unsup_loss_numpy: 0.32881999015808105|  0:04:25s
epoch 11 | loss: 0.38115 | val_0_unsup_loss_numpy: 0.3193899989128113|  0:04:49s
epoch 12 | loss: 0.3640



epoch 0  | loss: 0.19359 | val_0_matthews: 0.87942 |  0:00:21s
epoch 1  | loss: 0.0466  | val_0_matthews: 0.97729 |  0:00:42s
epoch 2  | loss: 0.04301 | val_0_matthews: 0.98165 |  0:01:03s
epoch 3  | loss: 0.04138 | val_0_matthews: 0.9826  |  0:01:24s
epoch 4  | loss: 0.04046 | val_0_matthews: 0.98284 |  0:01:46s
epoch 5  | loss: 0.03992 | val_0_matthews: 0.98334 |  0:02:07s
epoch 6  | loss: 0.03935 | val_0_matthews: 0.98338 |  0:02:28s
epoch 7  | loss: 0.04166 | val_0_matthews: 0.9825  |  0:02:50s
epoch 8  | loss: 0.0401  | val_0_matthews: 0.983   |  0:03:10s
epoch 9  | loss: 0.03909 | val_0_matthews: 0.98348 |  0:03:31s
epoch 10 | loss: 0.03895 | val_0_matthews: 0.98361 |  0:03:53s
epoch 11 | loss: 0.03909 | val_0_matthews: 0.98185 |  0:04:14s
epoch 12 | loss: 0.04223 | val_0_matthews: 0.98258 |  0:04:35s
epoch 13 | loss: 0.03984 | val_0_matthews: 0.98353 |  0:04:56s
epoch 14 | loss: 0.06556 | val_0_matthews: 0.98152 |  0:05:17s

Early stopping occurred at epoch 14 with best_epoch = 

[I 2024-08-27 14:24:00,770] Trial 33 finished with value: 0.9836074709892273 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.017600148538125884, 'PRETRAIN_RATIO': 0.6239816804467341, 'N_D': 64, 'N_STEPS': 5, 'GAMMA': 1.508324293095727, 'N_INDEPENDENT': 4, 'N_SHARED': 4, 'BATCH_SIZE': 32768, 'VIRT_BATCH_SIZE': 1536}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 1.45998 | val_0_unsup_loss_numpy: 0.4886600077152252|  0:01:21s
epoch 1  | loss: 0.44507 | val_0_unsup_loss_numpy: 0.2527399957180023|  0:02:42s
epoch 2  | loss: 0.31934 | val_0_unsup_loss_numpy: 0.18241000175476074|  0:04:03s
epoch 3  | loss: 0.25163 | val_0_unsup_loss_numpy: 0.14868000149726868|  0:05:25s
epoch 4  | loss: 0.21411 | val_0_unsup_loss_numpy: 0.1342799961566925|  0:06:46s
epoch 5  | loss: 0.18617 | val_0_unsup_loss_numpy: 0.13583000004291534|  0:08:08s
epoch 6  | loss: 0.17815 | val_0_unsup_loss_numpy: 0.13752000033855438|  0:09:29s
epoch 7  | loss: 0.16414 | val_0_unsup_loss_numpy: 0.14890000224113464|  0:10:51s
epoch 8  | loss: 0.1549  | val_0_unsup_loss_numpy: 0.1345899999141693|  0:12:12s

Early stopping occurred at epoch 8 with best_epoch = 4 and best_val_0_unsup_loss_numpy = 0.1342799961566925




epoch 0  | loss: 0.08544 | val_0_matthews: 0.98083 |  0:01:09s
epoch 1  | loss: 0.04427 | val_0_matthews: 0.9699  |  0:02:19s
epoch 2  | loss: 0.043   | val_0_matthews: 0.98326 |  0:03:29s
epoch 3  | loss: 0.04096 | val_0_matthews: 0.9834  |  0:04:39s
epoch 4  | loss: 0.04011 | val_0_matthews: 0.98317 |  0:05:48s
epoch 5  | loss: 0.03942 | val_0_matthews: 0.98382 |  0:06:58s
epoch 6  | loss: 0.03903 | val_0_matthews: 0.98382 |  0:08:07s
epoch 7  | loss: 0.03922 | val_0_matthews: 0.9839  |  0:09:17s
epoch 8  | loss: 0.03883 | val_0_matthews: 0.98398 |  0:10:26s
epoch 9  | loss: 0.03828 | val_0_matthews: 0.98386 |  0:11:35s
epoch 10 | loss: 0.03808 | val_0_matthews: 0.98402 |  0:12:44s
epoch 11 | loss: 0.0379  | val_0_matthews: 0.98371 |  0:13:54s
epoch 12 | loss: 0.03873 | val_0_matthews: 0.98271 |  0:15:03s
epoch 13 | loss: 0.03892 | val_0_matthews: 0.98401 |  0:16:13s
epoch 14 | loss: 0.03826 | val_0_matthews: 0.98417 |  0:17:22s
epoch 15 | loss: 0.03738 | val_0_matthews: 0.98434 |  0

[I 2024-08-27 15:25:40,260] Trial 34 finished with value: 0.9843671321868896 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.033231158067555076, 'PRETRAIN_RATIO': 0.5249044735209788, 'N_D': 64, 'N_STEPS': 6, 'GAMMA': 1.4340765228296772, 'N_INDEPENDENT': 4, 'N_SHARED': 5, 'BATCH_SIZE': 8192, 'VIRT_BATCH_SIZE': 512}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 2.33293 | val_0_unsup_loss_numpy: 0.49215999245643616|  0:01:26s
epoch 1  | loss: 0.43905 | val_0_unsup_loss_numpy: 0.2746100127696991|  0:02:52s
epoch 2  | loss: 0.32454 | val_0_unsup_loss_numpy: 0.2367600053548813|  0:04:19s
epoch 3  | loss: 0.26257 | val_0_unsup_loss_numpy: 0.24063000082969666|  0:05:46s
epoch 4  | loss: 0.22645 | val_0_unsup_loss_numpy: 0.2093999981880188|  0:07:13s
epoch 5  | loss: 0.22041 | val_0_unsup_loss_numpy: 0.1975799947977066|  0:08:40s
epoch 6  | loss: 0.20982 | val_0_unsup_loss_numpy: 0.2429399937391281|  0:10:07s
epoch 7  | loss: 0.18795 | val_0_unsup_loss_numpy: 0.1843400001525879|  0:11:34s
epoch 8  | loss: 0.16872 | val_0_unsup_loss_numpy: 0.19351999461650848|  0:13:01s
epoch 9  | loss: 0.15669 | val_0_unsup_loss_numpy: 0.17419999837875366|  0:14:28s
epoch 10 | loss: 0.14623 | val_0_unsup_loss_numpy: 0.16106000542640686|  0:15:55s
epoch 11 | loss: 0.14098 | val_0_unsup_loss_numpy: 0.17642000317573547|  0:17:22s
epoch 12 | loss: 0.149



epoch 0  | loss: 0.06895 | val_0_matthews: 0.98192 |  0:01:15s
epoch 1  | loss: 0.04434 | val_0_matthews: 0.98305 |  0:02:30s
epoch 2  | loss: 0.04195 | val_0_matthews: 0.98349 |  0:03:46s
epoch 3  | loss: 0.0412  | val_0_matthews: 0.98383 |  0:05:01s
epoch 4  | loss: 0.04026 | val_0_matthews: 0.98384 |  0:06:16s
epoch 5  | loss: 0.04035 | val_0_matthews: 0.98388 |  0:07:32s
epoch 6  | loss: 0.03973 | val_0_matthews: 0.98364 |  0:08:47s
epoch 7  | loss: 0.0397  | val_0_matthews: 0.98366 |  0:10:02s
epoch 8  | loss: 0.0392  | val_0_matthews: 0.98412 |  0:11:18s
epoch 9  | loss: 0.03888 | val_0_matthews: 0.98413 |  0:12:33s
epoch 10 | loss: 0.03871 | val_0_matthews: 0.98394 |  0:13:49s
epoch 11 | loss: 0.03856 | val_0_matthews: 0.98372 |  0:15:05s
epoch 12 | loss: 0.03827 | val_0_matthews: 0.98381 |  0:16:20s
epoch 13 | loss: 0.03824 | val_0_matthews: 0.98353 |  0:17:36s

Early stopping occurred at epoch 13 with best_epoch = 9 and best_val_0_matthews = 0.98413


[I 2024-08-27 16:27:27,806] Trial 35 finished with value: 0.9841291904449463 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.01276935550122239, 'PRETRAIN_RATIO': 0.5419406746097819, 'N_D': 64, 'N_STEPS': 6, 'GAMMA': 1.2925695524248726, 'N_INDEPENDENT': 5, 'N_SHARED': 5, 'BATCH_SIZE': 8192, 'VIRT_BATCH_SIZE': 512}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 1.23932 | val_0_unsup_loss_numpy: 0.4798400104045868|  0:00:41s
epoch 1  | loss: 0.47351 | val_0_unsup_loss_numpy: 0.3572799861431122|  0:01:22s
epoch 2  | loss: 0.36411 | val_0_unsup_loss_numpy: 0.2829599976539612|  0:02:04s
epoch 3  | loss: 0.30466 | val_0_unsup_loss_numpy: 0.2859100103378296|  0:02:46s
epoch 4  | loss: 0.26769 | val_0_unsup_loss_numpy: 0.29673001170158386|  0:03:28s
epoch 5  | loss: 0.2523  | val_0_unsup_loss_numpy: 0.26131001114845276|  0:04:10s
epoch 6  | loss: 0.23063 | val_0_unsup_loss_numpy: 0.2656799852848053|  0:04:52s
epoch 7  | loss: 0.25872 | val_0_unsup_loss_numpy: 0.27862000465393066|  0:05:33s
epoch 8  | loss: 0.23349 | val_0_unsup_loss_numpy: 0.2698799967765808|  0:06:15s
epoch 9  | loss: 0.21399 | val_0_unsup_loss_numpy: 0.3224300146102905|  0:06:57s

Early stopping occurred at epoch 9 with best_epoch = 5 and best_val_0_unsup_loss_numpy = 0.26131001114845276




epoch 0  | loss: 0.13858 | val_0_matthews: 0.97949 |  0:00:35s
epoch 1  | loss: 0.04701 | val_0_matthews: 0.98145 |  0:01:11s
epoch 2  | loss: 0.06053 | val_0_matthews: 0.88464 |  0:01:48s
epoch 3  | loss: 0.05088 | val_0_matthews: 0.98181 |  0:02:24s
epoch 4  | loss: 0.0429  | val_0_matthews: 0.98197 |  0:03:00s
epoch 5  | loss: 0.04028 | val_0_matthews: 0.98324 |  0:03:36s
epoch 6  | loss: 0.03884 | val_0_matthews: 0.98349 |  0:04:12s
epoch 7  | loss: 0.03845 | val_0_matthews: 0.98376 |  0:04:48s
epoch 8  | loss: 0.03823 | val_0_matthews: 0.98392 |  0:05:24s
epoch 9  | loss: 0.03788 | val_0_matthews: 0.98361 |  0:06:00s
epoch 10 | loss: 0.03761 | val_0_matthews: 0.9839  |  0:06:36s
epoch 11 | loss: 0.03737 | val_0_matthews: 0.98401 |  0:07:12s
epoch 12 | loss: 0.0372  | val_0_matthews: 0.98395 |  0:07:48s
epoch 13 | loss: 0.03694 | val_0_matthews: 0.98397 |  0:08:24s
epoch 14 | loss: 0.0377  | val_0_matthews: 0.98378 |  0:09:00s
epoch 15 | loss: 0.03698 | val_0_matthews: 0.98388 |  0

[I 2024-08-27 17:08:01,727] Trial 36 finished with value: 0.9840121865272522 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.06043109034968528, 'PRETRAIN_RATIO': 0.4888862851092519, 'N_D': 48, 'N_STEPS': 6, 'GAMMA': 1.3816691263021421, 'N_INDEPENDENT': 4, 'N_SHARED': 5, 'BATCH_SIZE': 8192, 'VIRT_BATCH_SIZE': 2048}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 4.89402 | val_0_unsup_loss_numpy: 0.7171099781990051|  0:01:21s
epoch 1  | loss: 0.71874 | val_0_unsup_loss_numpy: 0.5462300181388855|  0:02:42s
epoch 2  | loss: 0.60504 | val_0_unsup_loss_numpy: 0.45263999700546265|  0:04:04s
epoch 3  | loss: 0.53089 | val_0_unsup_loss_numpy: 0.3870599865913391|  0:05:26s
epoch 4  | loss: 0.47427 | val_0_unsup_loss_numpy: 0.35227999091148376|  0:06:47s
epoch 5  | loss: 0.43151 | val_0_unsup_loss_numpy: 0.34251999855041504|  0:08:09s
epoch 6  | loss: 0.40022 | val_0_unsup_loss_numpy: 0.32927000522613525|  0:09:30s
epoch 7  | loss: 0.37938 | val_0_unsup_loss_numpy: 0.3061099946498871|  0:10:50s
epoch 8  | loss: 0.35627 | val_0_unsup_loss_numpy: 0.2902100086212158|  0:12:11s
epoch 9  | loss: 0.33632 | val_0_unsup_loss_numpy: 0.2748500108718872|  0:13:32s
epoch 10 | loss: 0.31481 | val_0_unsup_loss_numpy: 0.2649500072002411|  0:14:53s
epoch 11 | loss: 0.29413 | val_0_unsup_loss_numpy: 0.24883000552654266|  0:16:13s
epoch 12 | loss: 0.2785



epoch 0  | loss: 0.10027 | val_0_matthews: 0.98035 |  0:01:09s
epoch 1  | loss: 0.0466  | val_0_matthews: 0.97883 |  0:02:19s
epoch 2  | loss: 0.04566 | val_0_matthews: 0.98229 |  0:03:28s
epoch 3  | loss: 0.04243 | val_0_matthews: 0.98286 |  0:04:37s
epoch 4  | loss: 0.04162 | val_0_matthews: 0.98275 |  0:05:46s
epoch 5  | loss: 0.04131 | val_0_matthews: 0.983   |  0:06:55s
epoch 6  | loss: 0.04117 | val_0_matthews: 0.98333 |  0:08:04s
epoch 7  | loss: 0.03979 | val_0_matthews: 0.9837  |  0:09:13s
epoch 8  | loss: 0.03985 | val_0_matthews: 0.98351 |  0:10:22s
epoch 9  | loss: 0.03962 | val_0_matthews: 0.98364 |  0:11:32s
epoch 10 | loss: 0.0388  | val_0_matthews: 0.98403 |  0:12:41s
epoch 11 | loss: 0.03825 | val_0_matthews: 0.98402 |  0:13:50s
epoch 12 | loss: 0.03792 | val_0_matthews: 0.984   |  0:14:59s
epoch 13 | loss: 0.03776 | val_0_matthews: 0.98406 |  0:16:09s
epoch 14 | loss: 0.03759 | val_0_matthews: 0.98402 |  0:17:18s
epoch 15 | loss: 0.03742 | val_0_matthews: 0.98417 |  0

[I 2024-08-27 18:27:34,465] Trial 37 finished with value: 0.9841725826263428 and parameters: {'MASK_TYPE': 'sparsemax', 'LR': 0.004336589083005467, 'PRETRAIN_RATIO': 0.20161218651854507, 'N_D': 64, 'N_STEPS': 6, 'GAMMA': 1.4468071120637354, 'N_INDEPENDENT': 4, 'N_SHARED': 5, 'BATCH_SIZE': 8192, 'VIRT_BATCH_SIZE': 512}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 1.57443 | val_0_unsup_loss_numpy: 0.5666599869728088|  0:00:43s
epoch 1  | loss: 0.5268  | val_0_unsup_loss_numpy: 0.4343799948692322|  0:01:26s
epoch 2  | loss: 0.44146 | val_0_unsup_loss_numpy: 0.41635000705718994|  0:02:10s
epoch 3  | loss: 0.39534 | val_0_unsup_loss_numpy: 0.4011000096797943|  0:02:52s
epoch 4  | loss: 0.35592 | val_0_unsup_loss_numpy: 0.4153499901294708|  0:03:34s
epoch 5  | loss: 0.33919 | val_0_unsup_loss_numpy: 0.6183000206947327|  0:04:15s
epoch 6  | loss: 0.32704 | val_0_unsup_loss_numpy: 0.42208999395370483|  0:04:57s
epoch 7  | loss: 0.31253 | val_0_unsup_loss_numpy: 0.41885000467300415|  0:05:38s

Early stopping occurred at epoch 7 with best_epoch = 3 and best_val_0_unsup_loss_numpy = 0.4011000096797943




epoch 0  | loss: 0.12545 | val_0_matthews: 0.97667 |  0:00:34s
epoch 1  | loss: 0.04789 | val_0_matthews: 0.98025 |  0:01:09s
epoch 2  | loss: 0.04458 | val_0_matthews: 0.98159 |  0:01:44s
epoch 3  | loss: 0.04314 | val_0_matthews: 0.98179 |  0:02:19s
epoch 4  | loss: 0.0421  | val_0_matthews: 0.98264 |  0:02:55s
epoch 5  | loss: 0.04129 | val_0_matthews: 0.98294 |  0:03:31s
epoch 6  | loss: 0.04076 | val_0_matthews: 0.98286 |  0:04:07s
epoch 7  | loss: 0.04029 | val_0_matthews: 0.98331 |  0:04:44s
epoch 8  | loss: 0.04024 | val_0_matthews: 0.98345 |  0:05:20s
epoch 9  | loss: 0.04049 | val_0_matthews: 0.98108 |  0:05:56s
epoch 10 | loss: 0.04167 | val_0_matthews: 0.98295 |  0:06:32s
epoch 11 | loss: 0.03968 | val_0_matthews: 0.98338 |  0:07:08s
epoch 12 | loss: 0.04561 | val_0_matthews: 0.98238 |  0:07:43s

Early stopping occurred at epoch 12 with best_epoch = 8 and best_val_0_matthews = 0.98345


[I 2024-08-27 18:57:20,812] Trial 38 finished with value: 0.9834517240524292 and parameters: {'MASK_TYPE': 'entmax', 'LR': 0.03596624603487079, 'PRETRAIN_RATIO': 0.4488193307896435, 'N_D': 32, 'N_STEPS': 6, 'GAMMA': 1.2540991633779877, 'N_INDEPENDENT': 5, 'N_SHARED': 2, 'BATCH_SIZE': 12288, 'VIRT_BATCH_SIZE': 1024}. Best is trial 5 with value: 0.9844775795936584.


epoch 0  | loss: 10.68997| val_0_unsup_loss_numpy: 0.9256200194358826|  0:00:45s
epoch 1  | loss: 0.90927 | val_0_unsup_loss_numpy: 0.800819993019104|  0:01:29s
epoch 2  | loss: 0.83755 | val_0_unsup_loss_numpy: 0.7321799993515015|  0:02:11s
epoch 3  | loss: 0.78804 | val_0_unsup_loss_numpy: 0.6801900267601013|  0:02:53s
epoch 4  | loss: 0.74276 | val_0_unsup_loss_numpy: 0.6269599795341492|  0:03:35s
epoch 5  | loss: 0.70276 | val_0_unsup_loss_numpy: 0.5743200182914734|  0:04:17s
epoch 6  | loss: 0.6666  | val_0_unsup_loss_numpy: 0.5295600295066833|  0:04:59s
epoch 7  | loss: 0.63477 | val_0_unsup_loss_numpy: 0.4880799949169159|  0:05:40s
epoch 8  | loss: 0.60644 | val_0_unsup_loss_numpy: 0.46779999136924744|  0:06:22s
epoch 9  | loss: 0.57965 | val_0_unsup_loss_numpy: 0.4389300048351288|  0:07:04s
epoch 10 | loss: 0.55494 | val_0_unsup_loss_numpy: 0.43615999817848206|  0:07:46s
epoch 11 | loss: 0.53269 | val_0_unsup_loss_numpy: 0.40639999508857727|  0:08:28s
epoch 12 | loss: 0.51163 |

### Submission

In [148]:
dataset_test = Base_Dataset(df_test, is_test=True)
display(len(dataset_test))
BATCH_SIZE = 4096
testloader = torch.utils.data.DataLoader(dataset_test,
                                         batch_size=BATCH_SIZE, shuffle=False,
                                         num_workers=8, drop_last=False)
X_test, y_test = make_embedded(embeddings, testloader)


2077964

In [159]:
# sub = clf.predict(X_test)
sub = (clf.predict_proba(X_test)[:, 1] > 0.47).astype('int')


In [160]:
submission = pd.read_csv('sample_submission.csv', index_col='id')
submission.loc[:, 'class'] = sub
idx2class = {0:'e', 1:'p'}
submission.replace(idx2class, inplace=True)
submission.to_csv(PATH+'submission.csv')


  submission.loc[:, 'class'] = sub
