In [None]:
# Drive version:
# !pip uninstall TabularExperimentTrackerClient --y
# !pip install git+https://github.com/DanielWarfield1/TabularExperimentTrackerClient
# !pip uninstall NeuralNetworksTrainingPackage --y
# !pip install git+https://github.com/Bartosz-G/NeuralNetworksTrainingPackage

In [2]:
%pip install git+https://github.com/DanielWarfield1/TabularExperimentTrackerClient
%pip install git+https://github.com/Bartosz-G/NeuralNetworksTrainingPackage

Collecting git+https://github.com/DanielWarfield1/TabularExperimentTrackerClient
  Cloning https://github.com/DanielWarfield1/TabularExperimentTrackerClient to /tmp/pip-req-build-vd8ci520
  Running command git clone --filter=blob:none --quiet https://github.com/DanielWarfield1/TabularExperimentTrackerClient /tmp/pip-req-build-vd8ci520
  Resolved https://github.com/DanielWarfield1/TabularExperimentTrackerClient to commit 780933411aa8c4e394478a26dec4a39447f8f012
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting openml (from TabularExperimentTrackerClient==0.0.1)
  Using cached openml-0.14.1-py3-none-any.whl
Collecting liac-arff>=2.4.0 (from openml->TabularExperimentTrackerClient==0.0.1)
  Using cached liac_arff-2.5.0-py3-none-any.whl
Collecting xmltodict (from openml->TabularExperimentTrackerClient==0.0.1)
  Using cached xmltodict-0.13.0-py2.py3-none-any.whl (10.0 kB)
Collecting minio (from openml->TabularExperimentTrackerClient==0.0.1)
  Using cached minio-7.1.16-py3-none-

In [3]:
import numpy as np
import pandas as pd
import sklearn
import torch
import time

In [None]:
# Importing NeuralNetworksPackage
#from NeuralNetworksTrainingPackage.dataprocessing.basic_pre_processing import *
# Global namespace:
# Hyperparams(**run_info.get('hyp'))
# CustomDataset(X, Y, relative_indices, tensor_type=torch.float)
# CustomDatasetWrapper(train_dataset, relative_indices)
# kfold_dataloader_iterator(dataset, n_splits=10, random_state=42, batch_size=16, shuffle_kfold=True, shuffle_dataloader=True)
# get_train_test(X, y, categorical_indicator, attribute_names, train_split, seed)
# get_train_val_test
#from NeuralNetworksTrainingPackage.metrics.basic_metrics import *
# test(args, model, device, test_loader, test_set_name)
# train(args, model, device, train_loader, optimizer, epoch, anneal, alpha=1)
# calc_metrics(y, yhat, is_categorical)

In [4]:
import os
from TabularExperimentTrackerClient.ExperimentClient import ExperimentClient

path =  '../creds/'
creds_orch_file = "creds-orch.txt"
creds_openml_file = "creds-openml.txt"



with open(os.path.join(path, creds_orch_file), 'r') as file:
    lines = file.readlines()
    orchname = lines[0].strip()
    orchsecret = lines[1].strip()

with open (os.path.join(path, creds_openml_file), "r") as myfile:
    openMLAPIKey = myfile.read()

ex = ExperimentClient(verbose = True)


ex.define_orch_cred(orchname, orchsecret)
ex.define_opml_cred(openMLAPIKey)

# Colab version
# ex.define_opml_cred_drive('/My Drive/research/non-homogenous-data/creds/creds-openml.txt')
# ex.define_orch_cred_drive('bart', '/My Drive//research/non-homogenous-data/creds/creds-colab.txt')

# 1. Data pre-processing

In [5]:
from NeuralNetworksTrainingPackage.event_handler import dataPreProcessingEventEmitter
from NeuralNetworksTrainingPackage.dataprocessing.basic_pre_processing import filterCardinality, quantileTransform, trunctuateData
from NeuralNetworksTrainingPackage.dataprocessing.basic_pre_processing import oneHotEncodePredictors, oneHotEncodeTargets, toDataFrame

n_sample = 20000
quantile_transform_distribution='uniform'


data_pre_processing = dataPreProcessingEventEmitter()

filter_cardinality = filterCardinality()
data_pre_processing.add_pre_processing('regression', filter_cardinality)
data_pre_processing.add_pre_processing('classification', filter_cardinality)

quantile_transform = quantileTransform(output_distribution = quantile_transform_distribution)
data_pre_processing.add_pre_processing('regression', quantile_transform)
data_pre_processing.add_pre_processing('classification', quantile_transform)

trunctuate_data = trunctuateData(n = n_sample)
data_pre_processing.add_pre_processing('regression', trunctuate_data)
data_pre_processing.add_pre_processing('classification', trunctuate_data)

one_hot_encode_predictors = oneHotEncodePredictors()
data_pre_processing.add_pre_processing('regression', one_hot_encode_predictors)
data_pre_processing.add_pre_processing('classification', one_hot_encode_predictors)

one_hot_encode_targets = oneHotEncodeTargets()
data_pre_processing.add_pre_processing('classification', one_hot_encode_targets)

to_data_frame = toDataFrame()
data_pre_processing.add_pre_processing('regression', to_data_frame)
data_pre_processing.add_pre_processing('classification', to_data_frame)

# 2. Defining the experiment

In [6]:
experiment_name = 'experiment_LCN_1'



# LCN and LLN Parameters
depth = {'distribution': 'int_uniform', 'min':1, 'max':11}
seed = {'distribution': 'constant', 'value': 42}
drop_type = {'distribution': 'categorical', 'values':['node_dropconnect', 'none']}
p = {'distribution': 'float_uniform', 'min':0.25, 'max':0.75}
back_n = {'distribution': 'categorical', 'values':[0, 0, 0, 1]}
hidden_dim = {'distribution': 'constant', 'value': 1} # Assertion error coming from Net if not 1
anneal = {'distribution': 'categorical', 'values':['interpolation', 'none', 'approx']}
batch_size = {'distribution': 'categorical', 'values':[16,32,64,64,64,128,256]}
epochs = {'distribution': 'constant', 'value': 30}
lr = {'distribution': 'log_uniform', 'min':0.05, 'max':0.2} # yields mean = 0.1082, median 0.1
momentum = {'distribution': 'constant', 'value': 0.9}
no_cuda = {'distribution': 'constant', 'value': False}
lr_step_size = {'distribution': 'categorical', 'values':[10, 10, 15, 20]}
gamma = {'distribution': 'constant', 'value': 0.1}

In [7]:
#============================================================
# Regression: Locally Constant Networks
#============================================================
LCN_reg_SGD_space = {
    'depth': depth,
    'seed': seed,
    'drop_type': drop_type,
    'p': p,
    'ensemble_n': {'distribution': 'constant', 'value': 1},
    'shrinkage': {'distribution': 'constant', 'value': 1},
    'back_n': back_n,
    'net_type': {'distribution': 'constant', 'value': 'locally_constant'},
    'hidden_dim': hidden_dim,
    'anneal': anneal,
    'optimizer': {'distribution': 'constant', 'value': 'SGD'},
    'batch_size': batch_size,
    'epochs': epochs,
    'lr': lr,
    'momentum': momentum,
    'no_cuda': no_cuda,
    'lr_step_size': lr_step_size,
    'gamma': gamma,
    'task': {'distribution': 'constant', 'value': 'regression'}
    }

LCN_reg_AMSGrad_space = {
    'depth': depth,
    'seed': seed,
    'drop_type': drop_type,
    'p': p,
    'ensemble_n': {'distribution': 'constant', 'value': 1},
    'shrinkage': {'distribution': 'constant', 'value': 1},
    'back_n': back_n,
    'net_type': {'distribution': 'constant', 'value': 'locally_constant'},
    'hidden_dim': hidden_dim,
    'anneal': anneal,
    'optimizer': {'distribution': 'constant', 'value': 'AMSGrad'},
    'batch_size': batch_size,
    'epochs': epochs,
    'lr': lr,
    'momentum': momentum,
    'no_cuda': no_cuda,
    'lr_step_size': lr_step_size,
    'gamma': gamma,
    'task': {'distribution': 'constant', 'value': 'regression'}
    }

#============================================================
# Regression: Locally Linear Networks
#============================================================

LLN_reg_SGD_space = {
    'depth': depth,
    'seed': seed,
    'drop_type': drop_type,
    'p': p,
    'ensemble_n': {'distribution': 'constant', 'value': 1},
    'shrinkage': {'distribution': 'constant', 'value': 1},
    'back_n': back_n,
    'net_type': {'distribution': 'constant', 'value': 'locally_linear'},
    'hidden_dim': hidden_dim,
    'anneal': anneal,
    'optimizer': {'distribution': 'constant', 'value': 'SGD'},
    'batch_size': batch_size,
    'epochs': epochs,
    'lr': lr,
    'momentum': momentum,
    'no_cuda': no_cuda,
    'lr_step_size': lr_step_size,
    'gamma': gamma,
    'task': {'distribution': 'constant', 'value': 'regression'}
    }


LLN_reg_AMSGrad_space = {
    'depth': depth,
    'seed': seed,
    'drop_type': drop_type,
    'p': p,
    'ensemble_n': {'distribution': 'constant', 'value': 1},
    'shrinkage': {'distribution': 'constant', 'value': 1},
    'back_n': back_n,
    'net_type': {'distribution': 'constant', 'value': 'locally_linear'},
    'hidden_dim': hidden_dim,
    'anneal': anneal,
    'optimizer': {'distribution': 'constant', 'value': 'AMSGrad'},
    'batch_size': batch_size,
    'epochs': epochs,
    'lr': lr,
    'momentum': momentum,
    'no_cuda': no_cuda,
    'lr_step_size': lr_step_size,
    'gamma': gamma,
    'task': {'distribution': 'constant', 'value': 'regression'}
    }

#============================================================
# Classification: Locally Constant Networks
#============================================================

LCN_cls_SGD_space = {
    'depth': depth,
    'seed': seed,
    'drop_type': drop_type,
    'p': p,
    'ensemble_n': {'distribution': 'constant', 'value': 1},
    'shrinkage': {'distribution': 'constant', 'value': 1},
    'back_n': back_n,
    'net_type': {'distribution': 'constant', 'value': 'locally_constant'},
    'hidden_dim': hidden_dim,
    'anneal': anneal,
    'optimizer': {'distribution': 'constant', 'value': 'SGD'},
    'batch_size': batch_size,
    'epochs': epochs,
    'lr': lr,
    'momentum': momentum,
    'no_cuda': no_cuda,
    'lr_step_size': lr_step_size,
    'gamma': gamma,
    'task': {'distribution': 'constant', 'value': 'classification'}
    }

LCN_cls_AMSGrad_space = {
    'depth': depth,
    'seed': seed,
    'drop_type': drop_type,
    'p': p,
    'ensemble_n': {'distribution': 'constant', 'value': 1},
    'shrinkage': {'distribution': 'constant', 'value': 1},
    'back_n': back_n,
    'net_type': {'distribution': 'constant', 'value': 'locally_constant'},
    'hidden_dim': hidden_dim,
    'anneal': anneal,
    'optimizer': {'distribution': 'constant', 'value': 'AMSGrad'},
    'batch_size': batch_size,
    'epochs': epochs,
    'lr': lr,
    'momentum': momentum,
    'no_cuda': no_cuda,
    'lr_step_size': lr_step_size,
    'gamma': gamma,
    'task': {'distribution': 'constant', 'value': 'classification'}
    }

#============================================================
# Classification: Locally Linear Networks
#============================================================

LLN_cls_SGD_space = {
    'depth': depth,
    'seed': seed,
    'drop_type': drop_type,
    'p': p,
    'ensemble_n': {'distribution': 'constant', 'value': 1},
    'shrinkage': {'distribution': 'constant', 'value': 1},
    'back_n': back_n,
    'net_type': {'distribution': 'constant', 'value': 'locally_linear'},
    'hidden_dim': hidden_dim,
    'anneal': anneal,
    'optimizer': {'distribution': 'constant', 'value': 'SGD'},
    'batch_size': batch_size,
    'epochs': epochs,
    'lr': lr,
    'momentum': momentum,
    'no_cuda': no_cuda,
    'lr_step_size': lr_step_size,
    'gamma': gamma,
    'task': {'distribution': 'constant', 'value': 'classification'}
    }


LLN_cls_AMSGrad_space = {
    'depth': depth,
    'seed': seed,
    'drop_type': drop_type,
    'p': p,
    'ensemble_n': {'distribution': 'constant', 'value': 1},
    'shrinkage': {'distribution': 'constant', 'value': 1},
    'back_n': back_n,
    'net_type': {'distribution': 'constant', 'value': 'locally_linear'},
    'hidden_dim': hidden_dim,
    'anneal': anneal,
    'optimizer': {'distribution': 'constant', 'value': 'AMSGrad'},
    'batch_size': batch_size,
    'epochs': epochs,
    'lr': lr,
    'momentum': momentum,
    'no_cuda': no_cuda,
    'lr_step_size': lr_step_size,
    'gamma': gamma,
    'task': {'distribution': 'constant', 'value': 'classification'}
    }

In [8]:
model_groups = {
    'LCN_reg_SGD':{'model':'LCN_reg_SGD', 'hype':LCN_reg_SGD_space},
    'LCN_reg_AMSGrad':{'model':'LCN_reg_AMSGrad', 'hype':LCN_reg_AMSGrad_space},
    'LLN_reg_SGD':{'model':'LLN_reg_SGD', 'hype':LLN_reg_SGD_space},
    'LLN_reg_AMSGrad':{'model':'LLN_reg_AMSGrad', 'hype':LLN_reg_AMSGrad_space},
    'LCN_cls_SGD':{'model':'LCN_cls_SGD', 'hype':LCN_cls_SGD_space},
    'LCN_cls_AMSGrad':{'model':'LCN_cls_AMSGrad', 'hype':LCN_cls_AMSGrad_space},
    'LLN_cls_SGD':{'model':'LLN_cls_SGD', 'hype':LLN_cls_SGD_space},
    'LLN_cls_AMSGrad':{'model':'LLN_cls_AMSGrad', 'hype':LLN_cls_AMSGrad_space},
}

ex.def_model_groups(model_groups)

In [9]:
ex.def_data_groups_opml()
print(f'automatically defined data groups: {ex.data_groups.keys()}')

classification_models = [k for k in model_groups.keys() if '_cls' in k]
regression_models = [k for k in model_groups.keys() if '_reg' in k]


applications = {'opml_reg_purnum_group': regression_models,
                'opml_reg_numcat_group': regression_models,
                'opml_class_purnum_group': classification_models,
                'opml_class_numcat_group': classification_models}

ex.def_applications(applications)
ex.reg_experiment(experiment_name)

automatically defined data groups: dict_keys(['opml_reg_purnum_group', 'opml_class_purnum_group', 'opml_reg_numcat_group', 'opml_class_numcat_group'])
existing experiment found


'existing experiment found'

In [10]:
exp_info = ex.experiment_info()
successful_runs = exp_info['successful_runs']
required_runs = exp_info['required_runs']
print('total required runs: {}'.format(required_runs))

total required runs: 14160


# 2. Main training loop:

In [None]:
from NeuralNetworksTrainingPackage.dataprocessing.basic_pre_processing import get_train_val_test, CustomDataset
from NeuralNetworksTrainingPackage.metrics.basic_metrics import calc_metrics
from models.LcnNetwork import *
from training.LcnTrain import *


for i in range(14160):
    print(f'==== Begin run:{i} ====')
    print('---- Initialising parameters for the run ----')
    run_info = ex.begin_run_sticky()
    args = Hyperparams(**run_info.get('hyp')) # hyperparameters for LCN need to be in form of an object (you can ignore this)
    print(run_info)


    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    
    data_pre_processing.set_seed_for_all(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)




    print('---- Loading datasets ----')
    X, y, categorical_indicator, attribute_names = ex.opml_load_task(run_info['mtpair_task'])
    train_data, val_data, test_data = get_train_val_test(X, y, categorical_indicator, attribute_names,
                                                         data_pre_processing,
                                                         task = args.task,
                                                         model = run_info.get('model'),
                                                         split = [0.5, 0.25, 0.25],
                                                         args = args) # Returns CustomDataset obj instances




    train_dataloader = torch.utils.data.DataLoader(train_data, # CustomDataset obj can be directly passed to dataloader
                                                   batch_size=args.batch_size,
                                                   shuffle= True)


    train_eval_dataloader = torch.utils.data.DataLoader(train_data, # CustomDataset obj can be directly passed to dataloader
                                                        batch_size=len(train_data),
                                                        shuffle= True) # Required for test_metrics()

    val_dataloader = torch.utils.data.DataLoader(val_data, # CustomDataset obj can be directly passed to dataloader
                                                 batch_size=len(val_data),
                                                 shuffle= True)

    test_dataloader = torch.utils.data.DataLoader(test_data, # CustomDataset obj can be directly passed to dataloader
                                                  batch_size=len(test_data),
                                                  shuffle= True)


    model = Net(input_dim= args.input_dim, 
                output_dim= args.output_dim, 
                hidden_dim= args.hidden_dim, 
                num_layer= args.depth, 
                num_back_layer= args.back_n, 
                dense= True,
                drop_type= args.drop_type,
                net_type= args.net_type,
                approx= args.anneal).to(device)


    if args.optimizer == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, nesterov=True)
    elif args.optimizer == 'AMSGrad':
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, amsgrad=True)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.gamma)


    metrics = {}
    start_epoch = 1  # start from epoch 1 or last checkpoint epoch
    if args.anneal == 'approx':
        args.net_type = 'approx_' + args.net_type


    start_time = time.time()
    for epoch in range(start_epoch, args.epochs + start_epoch):
        print(f"----{epoch}th training epoch ----")
        epoch_metrics = {}

        scheduler.step(epoch)

        alpha = get_alpha(epoch, args.epochs)
        train_approximate_loss = train(args, model, device, train_dataloader, optimizer, epoch, args.anneal, alpha)

        train_loss = get_loss(args, model, device, train_dataloader, 'train')
        val_loss = get_loss(args, model, device, val_dataloader, 'valid')
        test_loss = get_loss(args, model, device, test_dataloader, 'test')

        if epoch == args.epochs:
            continue

        epoch_metrics['train_loss'] = train_loss
        epoch_metrics['val_loss'] = val_loss
        epoch_metrics['test_loss'] = test_loss
        epoch_metrics['epoch'] = epoch
        
        ex.update_run(epoch_metrics)

        if torch.isnan(torch.tensor(train_loss)).item():
            print('---Stopping training due to loss being nan!---')
            break


    else:
        metrics['epoch_time'] = time.time()-start_time


        train_metrics = get_metrics(args, model, device, train_eval_dataloader, calc_metrics, 'train') #Requires batch_size to be entire dataset
        val_metrics = get_metrics(args, model, device, val_dataloader, calc_metrics, 'valid')
        test_metrics = get_metrics(args, model, device, test_dataloader, calc_metrics, 'test')

        metrics['train_loss'] = train_loss
        metrics['val_loss'] = val_loss
        metrics['test_loss'] = test_loss
        metrics['train_metrics'] = train_metrics
        metrics['validate_metrics'] = val_metrics
        metrics['test_metrics'] = test_metrics
        
        ex.update_run(metrics)
        ex.end_run()

## Checking whether the code works as intended

In [11]:
X, y, categorical_indicator, attribute_names = ex.opml_load_task('335-361103')

downloading task 335-361103
task different than previous task, downloading...


In [None]:
from copy import deepcopy
# X_c, y_c, categorical_indicator_c, attribute_names_c = deepcopy(X), deepcopy(y), deepcopy(categorical_indicator), deepcopy(attribute_names)
X, y, categorical_indicator, attribute_names = deepcopy(X_c), deepcopy(y_c), deepcopy(categorical_indicator_c), deepcopy(attribute_names_c)

In [None]:
some_val = train_data.X.iloc[[5], :]
print(some_val)
torch.tensor(some_val.values.squeeze(axis=0), dtype=torch.float)

In [None]:
next(iter(train_dataloader))

In [None]:
X, y, categorical_indicator, attribute_names = data_pre_processing.apply('regression', X, y, categorical_indicator, attribute_names)
X, y, categorical_indicator, attribute_names = data_pre_processing.apply('LCN_reg_AMSGrad', X, y, categorical_indicator, attribute_names)

# Testing New Metrics Code

In [13]:
# run_info = ex.begin_run()
run_info

{'_id': '64faff7eb9df67c3dfb9bda7', 'metrics_per_epoch': [], 'experiment_id': '64f0f2556e02727fe9a6ff59', 'experiment_name': 'experiment_LCN_1', 'mtpair_index': 162, 'mtpair_model': 'LLN_cls_SGD', 'mtpair_task': '337-361063', 'is_completed': False, 'user_id': '64d3a7457658d6ec6db139d0', 'user_name': 'bart', 'hyp': {'depth': 7, 'seed': 42, 'drop_type': 'node_dropconnect', 'p': 0.34397587452213385, 'ensemble_n': 1, 'shrinkage': 1, 'back_n': 0, 'net_type': 'locally_linear', 'hidden_dim': 1, 'anneal': 'approx', 'optimizer': 'SGD', 'batch_size': 64, 'epochs': 30, 'lr': 0.05644720473737607, 'momentum': 0.9, 'no_cuda': False, 'lr_step_size': 10, 'gamma': 0.1, 'task': 'classification'}, 'model': 'LLN_cls_SGD', 'task': '337-361063'}
4faff7eb9df67c3dfb9bda


{'_id': '64faff7eb9df67c3dfb9bda7',
 'metrics_per_epoch': [],
 'experiment_id': '64f0f2556e02727fe9a6ff59',
 'experiment_name': 'experiment_LCN_1',
 'mtpair_index': 162,
 'mtpair_model': 'LLN_cls_SGD',
 'mtpair_task': '337-361063',
 'is_completed': False,
 'user_id': '64d3a7457658d6ec6db139d0',
 'user_name': 'bart',
 'hyp': {'depth': 7,
  'seed': 42,
  'drop_type': 'node_dropconnect',
  'p': 0.34397587452213385,
  'ensemble_n': 1,
  'shrinkage': 1,
  'back_n': 0,
  'net_type': 'locally_linear',
  'hidden_dim': 1,
  'anneal': 'approx',
  'optimizer': 'SGD',
  'batch_size': 64,
  'epochs': 30,
  'lr': 0.05644720473737607,
  'momentum': 0.9,
  'no_cuda': False,
  'lr_step_size': 10,
  'gamma': 0.1,
  'task': 'classification'},
 'model': 'LLN_cls_SGD',
 'task': '337-361063'}

In [14]:
from NeuralNetworksTrainingPackage.dataprocessing.basic_pre_processing import get_train_val_test, CustomDataset
from NeuralNetworksTrainingPackage.metrics.basic_metrics import calc_metrics
from models.LcnNetwork import *
from training.LcnTrain import *

args = Hyperparams(**run_info.get('hyp'))
use_cuda = not args.no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
    
data_pre_processing.set_seed_for_all(args.seed)
torch.manual_seed(args.seed)
np.random.seed(args.seed)


print('---- Loading datasets ----')
X, y, categorical_indicator, attribute_names = ex.opml_load_task(run_info['mtpair_task'])
train_data, val_data, test_data = get_train_val_test(X, y, categorical_indicator, attribute_names,
                                                        data_pre_processing,
                                                        task = args.task,
                                                        model = run_info.get('model'),
                                                        split = [0.5, 0.25, 0.25],
                                                        args = args) # Returns CustomDataset obj instances




train_dataloader = torch.utils.data.DataLoader(train_data, # CustomDataset obj can be directly passed to dataloader
                                                batch_size=args.batch_size,
                                                shuffle= True)


train_eval_dataloader = torch.utils.data.DataLoader(train_data, # CustomDataset obj can be directly passed to dataloader
                                                    batch_size=len(train_data),
                                                    shuffle= True) # Required for test_metrics()

val_dataloader = torch.utils.data.DataLoader(val_data, # CustomDataset obj can be directly passed to dataloader
                                                batch_size=len(val_data),
                                                shuffle= True)

test_dataloader = torch.utils.data.DataLoader(test_data, # CustomDataset obj can be directly passed to dataloader
                                                batch_size=len(test_data),
                                                shuffle= True)


model = Net(input_dim= args.input_dim, 
            output_dim= args.output_dim, 
            hidden_dim= args.hidden_dim, 
            num_layer= args.depth, 
            num_back_layer= args.back_n, 
            dense= True,
            drop_type= args.drop_type,
            net_type= args.net_type,
            approx= args.anneal).to(device)


if args.optimizer == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, nesterov=True)
elif args.optimizer == 'AMSGrad':
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, amsgrad=True)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.gamma)


metrics = {}
start_epoch = 1  # start from epoch 1 or last checkpoint epoch
if args.anneal == 'approx':
    args.net_type = 'approx_' + args.net_type


start_time = time.time()
for epoch in range(start_epoch, args.epochs + start_epoch):
    print(f"----{epoch}th training epoch ----")
    epoch_metrics = {}

    scheduler.step(epoch)

    alpha = get_alpha(epoch, args.epochs)
    train_approximate_loss = train(args, model, device, train_dataloader, optimizer, epoch, args.anneal, alpha)

---- Loading datasets ----
downloading task 337-361063
task different than previous task, downloading...
----1th training epoch ----
----2th training epoch ----
----3th training epoch ----
----4th training epoch ----
----5th training epoch ----
----6th training epoch ----
----7th training epoch ----
----8th training epoch ----
----9th training epoch ----
----10th training epoch ----
----11th training epoch ----
----12th training epoch ----
----13th training epoch ----
----14th training epoch ----
----15th training epoch ----
----16th training epoch ----
----17th training epoch ----
----18th training epoch ----
----19th training epoch ----
----20th training epoch ----
----21th training epoch ----
----22th training epoch ----
----23th training epoch ----
----24th training epoch ----
----25th training epoch ----
----26th training epoch ----
----27th training epoch ----
----28th training epoch ----
----29th training epoch ----
----30th training epoch ----


In [17]:
%pip install torcheval

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [39]:
# For evaluating time
import time

def timer_decorator(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print(f"The function took {end_time - start_time} seconds to execute.")
        return result
    return wrapper

In [75]:
metrics = LcnMetricsClassification()
for _ in range(20):
    score = metrics.get_metrics(model, val_dataloader, device)
score

The function took 0.9189419746398926 seconds to execute.
The function took 1.0482335090637207 seconds to execute.
The function took 0.9133145809173584 seconds to execute.
The function took 0.9159131050109863 seconds to execute.
The function took 0.9191830158233643 seconds to execute.
The function took 0.9187009334564209 seconds to execute.
The function took 0.9281127452850342 seconds to execute.
The function took 0.9227919578552246 seconds to execute.
The function took 1.0493524074554443 seconds to execute.
The function took 0.9110701084136963 seconds to execute.
The function took 0.9193122386932373 seconds to execute.
The function took 0.9211428165435791 seconds to execute.
The function took 0.9248418807983398 seconds to execute.
The function took 0.9229333400726318 seconds to execute.
The function took 0.9151473045349121 seconds to execute.
The function took 0.9212911128997803 seconds to execute.
The function took 0.9221484661102295 seconds to execute.
The function took 1.05370259284

(tensor(0.8692, dtype=torch.float64),
 tensor([0.9390, 0.9390], dtype=torch.float64),
 tensor([[1451.,  237.],
         [ 204., 1480.]], device='cuda:0'))

In [77]:
metrics = LcnMetricsClassification()
for _ in range(20):
    score = metrics.get_metrics(model, val_dataloader, device)
score

The function took 0.9419999122619629 seconds to execute.
The function took 0.9360606670379639 seconds to execute.
The function took 0.9318320751190186 seconds to execute.
The function took 0.926185131072998 seconds to execute.
The function took 0.9490854740142822 seconds to execute.
The function took 1.0689499378204346 seconds to execute.
The function took 0.9393575191497803 seconds to execute.
The function took 0.9274029731750488 seconds to execute.
The function took 0.9264748096466064 seconds to execute.
The function took 0.9338750839233398 seconds to execute.
The function took 0.9263033866882324 seconds to execute.
The function took 0.9285609722137451 seconds to execute.
The function took 1.0509958267211914 seconds to execute.
The function took 0.9219207763671875 seconds to execute.
The function took 0.9202761650085449 seconds to execute.
The function took 0.9375119209289551 seconds to execute.
The function took 0.9305665493011475 seconds to execute.
The function took 0.923996448516

(tensor(0.8692, dtype=torch.float64),
 tensor([0.9390, 0.9390], dtype=torch.float64),
 tensor([[1451.,  237.],
         [ 204., 1480.]], device='cuda:0'))

In [80]:
metrics = LcnMetricsClassification()
for _ in range(20):
    score = metrics.get_loss(model, val_dataloader, device)
score

The function took 1.0151679515838623 seconds to execute.
The function took 0.8488566875457764 seconds to execute.
The function took 0.8456614017486572 seconds to execute.
The function took 0.8461451530456543 seconds to execute.
The function took 0.8485949039459229 seconds to execute.
The function took 0.848900556564331 seconds to execute.
The function took 0.8555574417114258 seconds to execute.
The function took 0.8454968929290771 seconds to execute.
The function took 0.8552320003509521 seconds to execute.
The function took 0.8611528873443604 seconds to execute.
The function took 0.8606765270233154 seconds to execute.
The function took 0.9763870239257812 seconds to execute.
The function took 0.858640193939209 seconds to execute.
The function took 0.8529043197631836 seconds to execute.
The function took 0.851269006729126 seconds to execute.
The function took 0.8486208915710449 seconds to execute.
The function took 0.8478198051452637 seconds to execute.
The function took 0.84466600418090

{'loss': 0.31648332227025877}

In [79]:
from torcheval.metrics import BinaryAUROC
from torcheval.metrics import BinaryAccuracy
from torcheval.metrics.toolkit import sync_and_compute


class LcnMetricsClassification():
    def __init__(self):
        pass
    
    @timer_decorator
    def get_loss(self, model, test_loader, device, test_set_name = None):
        with torch.no_grad():
            model.eval()
            test_loss = 0
            dataset_len = 0


            for data, target in test_loader:
                dataset_len += len(target)
                data, target = data.to(device), target.to(device)

                target = target.type(torch.cuda.LongTensor)

                ###############
                data.requires_grad = True
                if model.net_type == 'locally_constant':
                    output, relu_masks = model(data, p=0, training=False)
                elif model.net_type == 'locally_linear':
                    output, relu_masks = model.normal_forward(data, p=0, training=False)
                ###############

                target_one_dim = torch.argmax(target, dim=1)
                test_loss += F.cross_entropy(output, target_one_dim, reduction='sum').item()

            test_loss /= dataset_len
            
            if test_set_name:
                assert isinstance(test_set_name, str), "test_set_name must be a string, such as train, val, test"
                test_set_name = f'{test_set_name}_loss'
                final_metrics = {test_set_name: test_loss}
            else:
                final_metrics = {'loss': test_loss}

            return final_metrics

    @timer_decorator
    def get_metrics(self, model, test_loader, device):
        with torch.no_grad():
            model.eval()
            test_loss = 0
            dataset_len = 0
            
            roc_auc_accumulator = BinaryAUROC(num_tasks=2)
            acc_accumulator = BinaryAUROC()
            confusion_accumulator = ConfusionMatrix()
            

            for data, target in test_loader:
                dataset_len += len(target)
                data, target = data.to(device), target.to(device)

                target = target.type(torch.cuda.LongTensor)

                ###############
                data.requires_grad = True
                if model.net_type == 'locally_constant':
                    output, relu_masks = model(data, p=0, training=False)
                elif model.net_type == 'locally_linear':
                    output, relu_masks = model.normal_forward(data, p=0, training=False)
                ###############

                
                target_one_dim = torch.argmax(target, dim=1)
                test_loss += F.cross_entropy(output, target_one_dim, reduction='sum').item()
                output = torch.softmax(output, dim=-1)
                
                
                preds = torch.argmax(output, dim=1).cpu()
                target_one_dim = target_one_dim.cpu()
                output = output.cpu()
                
                
                
                roc_auc_accumulator.update(output.t(), target.t())
                acc_accumulator.update(preds, target_one_dim)
                confusion_accumulator.update(preds, target_one_dim, num_classes = target.shape[1])
                
                
            
            roc_auc = sync_and_compute(roc_auc_accumulator)
            accuracy = sync_and_compute(acc_accumulator)
            confusion_matrix = confusion_accumulator.compute()
            
            
            
            
            return accuracy, roc_auc, confusion_matrix

In [57]:
class ConfusionMatrix:
    def __init__(self, num_classes = None):
        self.num_classes = num_classes
        
        if num_classes:
            self.matrix = torch.zeros((num_classes, num_classes), device='cuda')
        else:
            self.matrix = num_classes

    def update(self, outputs, targets, num_classes = 2):
        if self.matrix is None:
            self.num_classes = num_classes
            self.matrix = torch.zeros((num_classes, num_classes), device='cuda')
        
        with torch.no_grad():
            for t, p in zip(targets.view(-1), outputs.view(-1)):
                self.matrix[t.long(), p.long()] += 1

    def compute(self):
        return self.matrix

In [54]:
for _ in range(10):
    train_metrics = get_metrics(args, model, device, val_dataloader, calc_metrics, 'train')
    
train_metrics

The function took 1.7190251350402832 seconds to execute.
The function took 1.8374691009521484 seconds to execute.
The function took 1.7214810848236084 seconds to execute.
The function took 1.7017178535461426 seconds to execute.
The function took 1.692218542098999 seconds to execute.
The function took 1.811413049697876 seconds to execute.
The function took 1.6977145671844482 seconds to execute.
The function took 1.693662166595459 seconds to execute.
The function took 1.719682216644287 seconds to execute.
The function took 1.704272985458374 seconds to execute.


({'accuracy_score': 0.8692170818505338,
  'roc_auc_score': 0.9390042608999112,
  'confusion_matrix': [[1451, 237], [204, 1480]]},
 0.31648335847141906)

In [29]:
next(iter(train_dataloader))[1].shape

torch.Size([64, 2])

In [53]:
@timer_decorator
def get_metrics(args, model, device, test_loader, metrics_func, test_set_name):
    with torch.no_grad():
        model.eval()

        # ==============================================================
        # ===TODO: Add batched dataloader handling
        # ==============================================================

        data, target = next(iter(test_loader))

        data, target = data.to(device), target.to(device)
        if args.task == 'classification':
            target = target.type(torch.cuda.LongTensor)

        ###############
        data.requires_grad = True
        if model.net_type == 'locally_constant':
            output, relu_masks = model(data, p=0, training=False)
        elif model.net_type == 'locally_linear':
            output, relu_masks = model.normal_forward(data, p=0, training=False)
        ###############

        if args.task == 'classification':
            output = torch.softmax(output, dim=-1)
            metrics = metrics_func(target, output, True)
        elif args.task == 'regression':
            metrics = metrics_func(target, output, False)
            
        loss = get_loss(args, model, device, test_loader, 'val')
            

        return metrics, loss

In [50]:
def get_loss(args, model, device, test_loader, test_set_name):
    with torch.no_grad():
        model.eval()
        test_loss = 0
        correct = 0

        score = []
        label = []
        dataset_len = 0

        pattern_to_pred = dict()
        tree_x = []
        tree_pattern = []

        for data, target in test_loader:
            dataset_len += len(target)
            label += list(target)
            data, target = data.to(device), target.to(device)
            if args.task == 'classification':
                target = target.type(torch.cuda.LongTensor)

            ###############
            data.requires_grad = True
            if model.net_type == 'locally_constant':
                output, relu_masks = model(data, p=0, training=False)
            elif model.net_type == 'locally_linear':
                output, relu_masks = model.normal_forward(data, p=0, training=False)
            ###############

            if args.task == 'classification':
                # Modified: Bart
                target_one_dim = torch.argmax(target, dim=1)
                test_loss += F.cross_entropy(output, target_one_dim, reduction='sum').item()
                # Removed: Bart
                # output = torch.softmax(output, dim=-1)
                # ...
                # output = output[:, 1]
            elif args.task == 'regression':
                output = output.squeeze(-1)
                test_loss += ((output - target) ** 2).mean().item() * len(target)

        test_loss /= dataset_len

        # Removed: Bart
        # if args.task == 'classification':
        # ...

        return test_loss
