In [1]:

from sklearn.datasets import fetch_openml
import numpy as np
from types import SimpleNamespace
from typing import Tuple, List
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
    
adult = fetch_openml(data_id = 1590, data_home='./data_cache')

data = adult.data

le = LabelEncoder()
label = pd.Series(le.fit_transform(adult.target))


category_cols = ['workclass', 'education', 'race', 'sex', "marital-status", "occupation", "relationship", "native-country"]
continuous_cols = [x for x in data.columns if x not in category_cols]

for col in category_cols:
    data[col] = le.fit_transform(data[col])


    
    
temp = None
for col in category_cols:
    oh_values = OneHotEncoder().fit_transform(data[col].values.reshape((-1, 1))).toarray()
    new_cols = [col + "-" + str(i) for i in range(len(data[col].unique()))]
    oh_values = pd.DataFrame(oh_values, columns = new_cols, dtype=np.int8, index=data.index)
    if temp is None:
        temp = oh_values
    else:
        temp = temp.merge(oh_values, left_index=True, right_index=True)

data = data.merge(temp, left_index=True, right_index=True)
data.drop(category_cols, inplace=True, axis=1)

category_cols = temp.columns

scaler = MinMaxScaler()
data[continuous_cols] = scaler.fit_transform(data[continuous_cols])

  warn(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[col] = le.fit_transform(data[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[col] = le.fit_transform(data[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[col] = le.fit_transform(data[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using 

In [2]:
import torch.nn as nn

num_categoricals = len(continuous_cols)
num_continuous = len(continuous_cols)
loss_fn = nn.CrossEntropyLoss
metric =  "accuracy_score"
metric_params = {}
random_seed = 0

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from misc.scorer import BaseScorer


class AccuracyScorer(BaseScorer):
    def __init__(self, metric: str) -> None:
        super().__init__(metric)
    
    def __call__(self, y, y_hat) -> float:
        return self.metric(y, y_hat.argmax(1))

In [4]:
from pl_vime import PLVIME


In [5]:
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(data, label, train_size = 0.7, random_state=random_seed, stratify=label)

X_train, X_unlabeled, y_train, _ = train_test_split(X_train, y_train, train_size = 0.1, random_state=random_seed, stratify=y_train)

In [6]:
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from data_utils import *

gpus = [1]
n_jobs = 32
max_epochs = 30
batch_size = 512

pretraining_patience = 10
early_stopping_patience = 10

batch_size = 512

def fit_model(
            model,
            data_hparams
    ):
    
    train_ds = VIMESelfDataset(X_train.append(X_unlabeled), data_hparams, continuous_cols, category_cols)
    test_ds = VIMESelfDataset(X_valid, data_hparams, continuous_cols, category_cols)
    
    pl_datamodule = PLDataModule(train_ds, test_ds, batch_size=batch_size)

    model.do_pretraining()

    callbacks = [
        EarlyStopping(
            monitor= 'val_loss', 
            mode = 'min',
            patience = pretraining_patience,
            verbose = False
        )
    ]
    pretraining_path = f'temporary_ckpt_data/pretraining'
    checkpoint_callback = ModelCheckpoint(
        monitor='val_loss',
        dirpath=pretraining_path,
        filename='pretraining-{epoch:02d}-{val_f1:.4f}',
        save_top_k=1,
        mode = 'min'
    )

    callbacks.append(checkpoint_callback)

    trainer = Trainer(
                    devices = gpus,
                    accelerator = "cuda" if len(gpus) >= 1 else 'cpu',
                    max_epochs = max_epochs,
                    num_sanity_val_steps = 2,
                    callbacks = callbacks,
    )

    trainer.fit(model, pl_datamodule)
    
    pretraining_path = checkpoint_callback.best_model_path

    model = model.load_from_checkpoint(pretraining_path)

    model.do_finetunning()
    
        
    train_ds = VIMEClassificationDataset(X_train, y_train.values, data_hparams, X_unlabeled, continuous_cols, category_cols)
    test_ds = VIMEClassificationDataset(X_valid, y_valid.values, data_hparams, None, continuous_cols, category_cols)

    pl_datamodule = PLDataModule(train_ds, test_ds, batch_size = batch_size)
        
    callbacks = [
        EarlyStopping(
            monitor= 'val_' + metric, 
            mode = 'max',
            patience = early_stopping_patience,
            verbose = False
        )
    ]

    checkpoint_path = None

    checkpoint_path = f'temporary_ckpt_data/'
    checkpoint_callback = ModelCheckpoint(
        monitor='val_' + metric,
        dirpath=checkpoint_path,
        filename='{epoch:02d}-{val_f1:.4f}',
        save_top_k=1,
        mode = 'max'
    )

    callbacks.append(checkpoint_callback)

    trainer = Trainer(
                    devices = gpus,
                    accelerator = "cuda" if len(gpus) >= 1 else 'cpu',
                    max_epochs = max_epochs,
                    num_sanity_val_steps = 2,
                    callbacks = callbacks,
    )

    trainer.fit(model, pl_datamodule)

    model = model.load_from_checkpoint(checkpoint_callback.best_model_path)
    
    return model

In [7]:
hparams_range = {
    
    'predictor_hidden_dim' : ['suggest_int', ['predictor_hidden_dim', 16, 512]],
    # 'predictor_output_dim' : ['suggest_int', ['emb_dim', 16, 512]],
    
    'p_m' : ["suggest_float", ["p_m", 0.1, 0.9]],
    'alpha1' : ["suggest_float", ["alpha1", 0.1, 5]],
    'alpha2' : ["suggest_float", ["alpha2", 0.1, 5]],
    'beta' : ["suggest_float", ["beta", 0.1, 10]],
    'K' : ["suggest_int", ["K", 2, 20]],


    'lr' : ['suggest_float', ['lr', 0.0001, 0.05]],
    'gamma' : ['suggest_float', ['gamma', 0.1, 0.95]],
    'step_size' : ['suggest_int', ['step_size', 10, 100]],
}

In [8]:
import optuna
import torch.nn.functional as F
from sklearn.metrics import accuracy_score

def objective(      trial: optuna.trial.Trial,
        ) -> float:
        """Objective function for optuna

        Args:
            trial: A object which returns hyperparameters of a model of hyperparameter search trial.
            train_idx: Indices of training data in self.data and self.label.
            test_idx: Indices of test data in self.data and self.label.
            fold_idx: A fold index that denotes which fold under the given k-fold cross validation.
        
        Returns:
            A score of given hyperparameters.
        """
        model_hparams = {
            "encoder_dim" : data.shape[1],
            "predictor_hidden_dim" : None,
            "predictor_output_dim" : 2,
            'alpha1' : None,
            'alpha2' : None,
            'beta' : None,
            'K' : None
        }
        
        data_hparams = {
            "K" : None,
            "p_m" : None
        }
        optim_hparams = {
            "lr" : None
        }
        scheduler_hparams = {
            'gamma' : None,
            'step_size' : None
        }

        for k, v in hparams_range.items():
            if k in model_hparams.keys():
                model_hparams[k] = getattr(trial, v[0])(*v[1])
            if k in data_hparams.keys():
                data_hparams[k] = getattr(trial, v[0])(*v[1])
            if k in optim_hparams.keys():
                optim_hparams[k] = getattr(trial, v[0])(*v[1])
            if k in scheduler_hparams.keys():
                scheduler_hparams[k] = getattr(trial, v[0])(*v[1])

        
        pl_vime = PLVIME(model_hparams, "Adam", optim_hparams, "StepLR", scheduler_hparams, 
        num_categoricals, num_continuous, -1, loss_fn,
        AccuracyScorer("accuracy_score"), random_seed)
        
        pl_vime = fit_model(pl_vime, data_hparams)
        

        trainer = Trainer(
                    devices = gpus,
                    accelerator = "cuda" if len(gpus)>= 1 else 'cpu',
                    max_epochs = max_epochs,
                    num_sanity_val_steps = 2,
                    callbacks = None,
        )
        test_ds = VIMEClassificationDataset(X_valid, y_valid.values, data_hparams, None, continuous_cols, category_cols)
        test_dl = DataLoader(test_ds, batch_size, shuffle=False, sampler = SequentialSampler(test_ds), num_workers=n_jobs)

        preds = trainer.predict(pl_vime, test_dl)

        preds = F.softmax(torch.concat([out.cpu() for out in preds]).squeeze(),dim=1)

        accuracy = accuracy_score(y_valid, preds.argmax(1))

        return accuracy

In [9]:
study = optuna.create_study(direction="maximize",sampler=optuna.samplers.TPESampler(seed=random_seed))
study.optimize(objective, n_trials=10, show_progress_bar=False)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")


trial = study.best_trial

print("  Accuracy: {}".format(trial.value))
print("  Best hyperparameters: ", trial)

[I 2023-12-18 16:33:37,870] A new study created in memory with name: no-name-c012dc04-28cf-40ce-9e1e-92be0cc74293
Global seed set to 0
  train_ds = VIMESelfDataset(X_train.append(X_unlabeled), data_hparams, continuous_cols, category_cols)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0

Epoch 29: 100%|██████████| 67/67 [00:04<00:00, 13.99it/s, v_num=384, train_loss=3.760, val_loss=3.760]

`Trainer.fit` stopped: `max_epochs=30` reached.


Epoch 29: 100%|██████████| 67/67 [00:04<00:00, 13.99it/s, v_num=384, train_loss=3.760, val_loss=3.760]


Global seed set to 0
  X = X.append(unlabeled_data)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 150 K 
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
150 K     Trainable params
0         Non-trainable params
150 K     Total params
0.602     Total estimated model params size (MB)


Epoch 29: 100%|██████████| 67/67 [00:11<00:00,  5.59it/s, v_num=385, train_loss=0.626, train_accuracy_score=0.812, val_accuracy_score=0.836, val_loss=0.602]

`Trainer.fit` stopped: `max_epochs=30` reached.


Epoch 29: 100%|██████████| 67/67 [00:11<00:00,  5.59it/s, v_num=385, train_loss=0.626, train_accuracy_score=0.812, val_accuracy_score=0.836, val_loss=0.602]


Global seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 29/29 [00:00<00:00, 666.54it/s]

[I 2023-12-18 16:42:35,212] Trial 0 finished with value: 0.8394185491025729 and parameters: {'predictor_hidden_dim': 288, 'p_m': 0.6721514930979355, 'alpha1': 3.053540542751055, 'alpha2': 2.769927596684795, 'beta': 4.294182513455157, 'K': 14, 'lr': 0.021935601842008354, 'gamma': 0.8580070506647678, 'step_size': 97}. Best is trial 0 with value: 0.8394185491025729.
Global seed set to 0
  train_ds = VIMESelfDataset(X_train.append(X_unlabeled), data_hparams, continuous_cols, category_cols)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 100 K 
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
100 K     Trainable params
0         Non-trainable params
100 K     Total params
0.403     Total estimated model params size (MB)


Epoch 16: 100%|██████████| 67/67 [00:05<00:00, 13.28it/s, v_num=387, train_loss=3.400, val_loss=3.400]


Global seed set to 0
  X = X.append(unlabeled_data)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 100 K 
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
100 K     Trainable params
0         Non-trainable params
100 K     Total params
0.403     Total estimated model params size (MB)


Epoch 10: 100%|██████████| 67/67 [00:05<00:00, 12.90it/s, v_num=388, train_loss=0.673, train_accuracy_score=0.824, val_accuracy_score=0.814, val_loss=0.661]


Global seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 29/29 [00:00<00:00, 910.76it/s]

[I 2023-12-18 16:45:09,889] Trial 1 finished with value: 0.8334811983894083 and parameters: {'predictor_hidden_dim': 206, 'p_m': 0.7333800304661316, 'alpha1': 2.691585106789232, 'alpha2': 2.8834183493602685, 'beta': 9.263406719097345, 'K': 3, 'lr': 0.004447752055106882, 'gamma': 0.11718563782427686, 'step_size': 85}. Best is trial 0 with value: 0.8394185491025729.
Global seed set to 0





  train_ds = VIMESelfDataset(X_train.append(X_unlabeled), data_hparams, continuous_cols, category_cols)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 241 K 
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
241 K     Trainable params
0         Non-trainable params
241 K     Total params
0.968  

Epoch 22: 100%|██████████| 67/67 [00:04<00:00, 14.72it/s, v_num=390, train_loss=5.990, val_loss=6.000]


Global seed set to 0
  X = X.append(unlabeled_data)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 241 K 
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
241 K     Trainable params
0         Non-trainable params
241 K     Total params
0.968     Total estimated model params size (MB)


Epoch 11: 100%|██████████| 67/67 [00:12<00:00,  5.28it/s, v_num=391, train_loss=0.662, train_accuracy_score=0.819, val_accuracy_score=0.818, val_loss=0.641]


Global seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 29/29 [00:00<00:00, 909.43it/s] 

[I 2023-12-18 16:50:13,419] Trial 2 finished with value: 0.8344366341363544 and parameters: {'predictor_hidden_dim': 402, 'p_m': 0.7960097185974554, 'alpha1': 4.895229876940544, 'alpha2': 4.015876964661945, 'beta': 4.668645686304025, 'K': 16, 'lr': 0.0060018938508597675, 'gamma': 0.6439328681283952, 'step_size': 23}. Best is trial 0 with value: 0.8394185491025729.
Global seed set to 0
  train_ds = VIMESelfDataset(X_train.append(X_unlabeled), data_hparams, continuous_cols, category_cols)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 324 K 
1 | pretraining_mask_loss     | BCELoss          | 0  


Epoch 14: 100%|██████████| 67/67 [00:05<00:00, 12.05it/s, v_num=393, train_loss=2.650, val_loss=2.650]


Global seed set to 0
  X = X.append(unlabeled_data)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 324 K 
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
324 K     Trainable params
0         Non-trainable params
324 K     Total params
1.299     Total estimated model params size (MB)


Epoch 11: 100%|██████████| 67/67 [00:09<00:00,  6.79it/s, v_num=394, train_loss=0.671, train_accuracy_score=0.807, val_accuracy_score=0.788, val_loss=0.634]


Global seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 29/29 [00:00<00:00, 675.21it/s]

[I 2023-12-18 16:53:49,137] Trial 3 finished with value: 0.824336313382925 and parameters: {'predictor_hidden_dim': 485, 'p_m': 0.5174786574000574, 'alpha1': 2.1318435059535656, 'alpha2': 1.3963224993126724, 'beta': 7.764913525398745, 'K': 10, 'lr': 0.02846485404854556, 'gamma': 0.11597133037090188, 'step_size': 66}. Best is trial 0 with value: 0.8394185491025729.
Global seed set to 0
  train_ds = VIMESelfDataset(X_train.append(X_unlabeled), data_hparams, continuous_cols, category_cols)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 173 K 
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
173 K     Trainable params
0         Non-trainable params
173 K     Total params
0.694     Total estimated model params size (MB)


Epoch 20: 100%|██████████| 67/67 [00:04<00:00, 13.46it/s, v_num=396, train_loss=5.730, val_loss=5.730]


Global seed set to 0
  X = X.append(unlabeled_data)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 173 K 
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
173 K     Trainable params
0         Non-trainable params
173 K     Total params
0.694     Total estimated model params size (MB)


Epoch 12: 100%|██████████| 67/67 [00:09<00:00,  7.16it/s, v_num=397, train_loss=0.678, train_accuracy_score=0.795, val_accuracy_score=0.746, val_loss=0.617]


Global seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 29/29 [00:00<00:00, 1091.21it/s]

[I 2023-12-18 16:57:53,123] Trial 4 finished with value: 0.8014741008667167 and parameters: {'predictor_hidden_dim': 320, 'p_m': 0.5935471974998056, 'alpha1': 4.724365584721658, 'alpha2': 3.440919465607069, 'beta': 3.659128215680482, 'K': 10, 'lr': 0.034911796676770517, 'gamma': 0.15119165088487935, 'step_size': 70}. Best is trial 0 with value: 0.8394185491025729.
Global seed set to 0
  train_ds = VIMESelfDataset(X_train.append(X_unlabeled), data_hparams, continuous_cols, category_cols)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 196 K 
1 | pretraining_mask_loss     | BCELoss          | 0  


Epoch 29: 100%|██████████| 67/67 [00:04<00:00, 13.77it/s, v_num=399, train_loss=0.884, val_loss=0.885]

`Trainer.fit` stopped: `max_epochs=30` reached.


Epoch 29: 100%|██████████| 67/67 [00:04<00:00, 13.76it/s, v_num=399, train_loss=0.884, val_loss=0.885]


Global seed set to 0
  X = X.append(unlabeled_data)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 196 K 
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
196 K     Trainable params
0         Non-trainable params
196 K     Total params
0.785     Total estimated model params size (MB)


Epoch 10: 100%|██████████| 67/67 [00:11<00:00,  6.02it/s, v_num=400, train_loss=0.542, train_accuracy_score=0.847, val_accuracy_score=0.787, val_loss=0.490]


Global seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 29/29 [00:00<00:00, 640.92it/s]

[I 2023-12-18 17:02:43,184] Trial 5 finished with value: 0.8165563365863646 and parameters: {'predictor_hidden_dim': 349, 'p_m': 0.2683060488590727, 'alpha1': 0.7317388585087812, 'alpha2': 1.6455989195285012, 'beta': 3.700736632331964, 'K': 12, 'lr': 0.021986215521769784, 'gamma': 0.9401177623503422, 'step_size': 19}. Best is trial 0 with value: 0.8394185491025729.
Global seed set to 0





  train_ds = VIMESelfDataset(X_train.append(X_unlabeled), data_hparams, continuous_cols, category_cols)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 62.8 K
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
62.8 K    Trainable params
0         Non-trainable params
62.8 K    Total params
0.251  

Epoch 21: 100%|██████████| 67/67 [00:04<00:00, 13.67it/s, v_num=402, train_loss=3.780, val_loss=3.780]


Global seed set to 0
  X = X.append(unlabeled_data)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 62.8 K
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
62.8 K    Trainable params
0         Non-trainable params
62.8 K    Total params
0.251     Total estimated model params size (MB)


Epoch 10: 100%|██████████| 67/67 [00:06<00:00,  9.83it/s, v_num=403, train_loss=0.566, train_accuracy_score=0.819, val_accuracy_score=0.781, val_loss=0.519]


Global seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 29/29 [00:00<00:00, 704.04it/s]

[I 2023-12-18 17:06:15,486] Trial 6 finished with value: 0.8060465433699584 and parameters: {'predictor_hidden_dim': 119, 'p_m': 0.22904761430799703, 'alpha1': 3.3002307947804526, 'alpha2': 1.3411288524449325, 'beta': 4.716476651277432, 'K': 6, 'lr': 0.008032582223911433, 'gamma': 0.19381886998965936, 'step_size': 69}. Best is trial 0 with value: 0.8394185491025729.
Global seed set to 0
  train_ds = VIMESelfDataset(X_train.append(X_unlabeled), data_hparams, continuous_cols, category_cols)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 51.8 K
1 | pretraining_mask_loss     | BCELoss          | 0


Epoch 29: 100%|██████████| 67/67 [00:04<00:00, 13.41it/s, v_num=405, train_loss=2.170, val_loss=2.170]

`Trainer.fit` stopped: `max_epochs=30` reached.


Epoch 29: 100%|██████████| 67/67 [00:05<00:00, 13.40it/s, v_num=405, train_loss=2.170, val_loss=2.170]


Global seed set to 0
  X = X.append(unlabeled_data)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 51.8 K
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
51.8 K    Trainable params
0         Non-trainable params
51.8 K    Total params
0.207     Total estimated model params size (MB)


Epoch 14: 100%|██████████| 67/67 [00:13<00:00,  4.84it/s, v_num=406, train_loss=0.389, train_accuracy_score=0.899, val_accuracy_score=0.815, val_loss=0.405]


Global seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 29/29 [00:00<00:00, 565.88it/s]

[I 2023-12-18 17:12:40,726] Trial 7 finished with value: 0.8186036989012488 and parameters: {'predictor_hidden_dim': 84, 'p_m': 0.2572658893440428, 'alpha1': 1.9067533362387243, 'alpha2': 4.1228668262548815, 'beta': 1.0613026303513067, 'K': 17, 'lr': 0.004895310553908757, 'gamma': 0.9299905452613864, 'step_size': 52}. Best is trial 0 with value: 0.8394185491025729.
Global seed set to 0
  train_ds = VIMESelfDataset(X_train.append(X_unlabeled), data_hparams, continuous_cols, category_cols)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 342 K 
1 | pretraining_mask_loss     | BCELoss          | 0 


Epoch 14: 100%|██████████| 67/67 [00:05<00:00, 12.44it/s, v_num=408, train_loss=4.470, val_loss=4.470]


Global seed set to 0
  X = X.append(unlabeled_data)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 342 K 
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
342 K     Trainable params
0         Non-trainable params
342 K     Total params
1.370     Total estimated model params size (MB)


Epoch 11: 100%|██████████| 67/67 [00:05<00:00, 12.51it/s, v_num=409, train_loss=0.628, train_accuracy_score=0.826, val_accuracy_score=0.798, val_loss=0.593]


Global seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 29/29 [00:00<00:00, 1007.06it/s]

[I 2023-12-18 17:15:26,074] Trial 8 finished with value: 0.8104824950522077 and parameters: {'predictor_hidden_dim': 501, 'p_m': 0.5838764157960368, 'alpha1': 3.7223915390516784, 'alpha2': 0.2920201820461713, 'beta': 2.899788929506455, 'K': 4, 'lr': 0.014877395856355032, 'gamma': 0.20091856111110745, 'step_size': 38}. Best is trial 0 with value: 0.8394185491025729.
Global seed set to 0
  train_ds = VIMESelfDataset(X_train.append(X_unlabeled), data_hparams, continuous_cols, category_cols)
GPU available: True (cuda), used: True





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 108 K 
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
108 K     Trainable params
0         Non-trainable params
108 K     Total params
0.436     Total estimated model params size (MB)


Epoch 18: 100%|██████████| 67/67 [00:04<00:00, 13.65it/s, v_num=411, train_loss=3.780, val_loss=3.790]


Global seed set to 0
  X = X.append(unlabeled_data)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 108 K 
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
108 K     Trainable params
0         Non-trainable params
108 K     Total params
0.436     Total estimated model params size (MB)


Epoch 17: 100%|██████████| 67/67 [00:09<00:00,  6.84it/s, v_num=412, train_loss=0.447, train_accuracy_score=0.854, val_accuracy_score=0.787, val_loss=0.433]


Global seed set to 0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 29/29 [00:00<00:00, 650.81it/s]

[I 2023-12-18 17:20:31,636] Trial 9 finished with value: 0.8075479424008736 and parameters: {'predictor_hidden_dim': 221, 'p_m': 0.1513179970790275, 'alpha1': 3.4931133849130975, 'alpha2': 2.8763471256122184, 'beta': 2.72735596030051, 'K': 11, 'lr': 0.00478763148684624, 'gamma': 0.5895545212227524, 'step_size': 94}. Best is trial 0 with value: 0.8394185491025729.



Number of finished trials:  10
Best trial:
  Accuracy: 0.8394185491025729
  Best hyperparameters:  FrozenTrial(number=0, state=TrialState.COMPLETE, values=[0.8394185491025729], datetime_start=datetime.datetime(2023, 12, 18, 16, 33, 37, 871513), datetime_complete=datetime.datetime(2023, 12, 18, 16, 42, 35, 212155), params={'predictor_hidden_dim': 288, 'p_m': 0.6721514930979355, 'alpha1': 3.053540542751055, 'alpha2': 2.769927596684795, 'beta': 4.294182513455157, 'K': 14, 'lr': 0.021935601842008354, 'gamma': 0.8580070506647678, 'step_size': 97}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'predictor_hidden_dim': IntDistribution(high=512, log=False, low=16, step=1), 'p_m': FloatDistribution(high=0.9, log=False, low=0.1, step=None), 'alpha1': FloatDistribution(high=5.0, log=False, low=0.1, step=None), 'alpha2': FloatDistribution(high=5.0, log=False, low=0.1, step=None), 'beta': FloatDistribution(high=10.0, log=False, low=0.1, step=None), 'K': IntDistribution(high

In [18]:
model_hparams = {
            "encoder_dim" : data.shape[1],
            "predictor_hidden_dim" : None,
            "predictor_output_dim" : 2,
            'alpha1' : None,
            'alpha2' : None,
            'beta' : None,
            'K' : None
        }
        
data_hparams = {
        "K" : None,
        "p_m" : None
}
optim_hparams = {
        "lr" : None
}
scheduler_hparams = {
        'gamma' : None,
        'step_size' : None
}

for k, v in study.best_trial.params.items():
        if k in model_hparams.keys():
                model_hparams[k] = study.best_trial.params[k]
        if k in data_hparams.keys():
                data_hparams[k] = study.best_trial.params[k]
        if k in optim_hparams.keys():
                optim_hparams[k] = study.best_trial.params[k]
        if k in scheduler_hparams.keys():
                scheduler_hparams[k] = study.best_trial.params[k]


pl_vime = PLVIME(model_hparams, "Adam", optim_hparams, "StepLR", scheduler_hparams, 
        num_categoricals, num_continuous, -1, loss_fn,
        AccuracyScorer("accuracy_score"), random_seed)
pl_vime = fit_model(pl_vime, data_hparams)

Global seed set to 0
  train_ds = VIMESelfDataset(X_train.append(X_unlabeled), data_hparams, continuous_cols, category_cols)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 150 K 
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
150 K     Trainable params
0         Non-trainable params
150 K    

Epoch 29: 100%|██████████| 67/67 [00:04<00:00, 13.58it/s, v_num=414, train_loss=3.760, val_loss=3.760]

`Trainer.fit` stopped: `max_epochs=30` reached.


Epoch 29: 100%|██████████| 67/67 [00:04<00:00, 13.58it/s, v_num=414, train_loss=3.760, val_loss=3.760]


Global seed set to 0
  X = X.append(unlabeled_data)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                      | Type             | Params
---------------------------------------------------------------
0 | model                     | VIME             | 150 K 
1 | pretraining_mask_loss     | BCELoss          | 0     
2 | pretraining_feature_loss1 | CrossEntropyLoss | 0     
3 | pretraining_feature_loss2 | MSELoss          | 0     
4 | consistency_loss          | MSELoss          | 0     
5 | loss_fn                   | CrossEntropyLoss | 0     
---------------------------------------------------------------
150 K     Trainable params
0         Non-trainable params
150 K     Total params
0.602     Total estimated model params size (MB)


Epoch 29: 100%|██████████| 67/67 [00:12<00:00,  5.41it/s, v_num=415, train_loss=0.626, train_accuracy_score=0.812, val_accuracy_score=0.836, val_loss=0.602]

`Trainer.fit` stopped: `max_epochs=30` reached.


Epoch 29: 100%|██████████| 67/67 [00:12<00:00,  5.41it/s, v_num=415, train_loss=0.626, train_accuracy_score=0.812, val_accuracy_score=0.836, val_loss=0.602]


Global seed set to 0


In [19]:
import torch.nn.functional as F

trainer = Trainer(
                    devices = gpus,
                    accelerator = "cuda" if len(gpus)>= 1 else 'cpu',
                    max_epochs = max_epochs,
                    num_sanity_val_steps = 2,
                    callbacks = None,
    )
test_ds = VIMEClassificationDataset(X_valid, y_valid.values, data_hparams, None, continuous_cols, category_cols)
test_dl = DataLoader(test_ds, batch_size, shuffle=False, sampler = SequentialSampler(test_ds), num_workers=n_jobs)

preds = trainer.predict(pl_vime, test_dl)

preds = F.softmax(torch.concat([out.cpu() for out in preds]).squeeze(),dim=1)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 29/29 [00:00<00:00, 668.86it/s]


In [20]:
from sklearn.metrics import accuracy_score
accuracy_score(y_valid, preds.argmax(1))

0.8394185491025729