In [1]:
!pip uninstall TabularExperimentTrackerClient --y
!pip install git+https://github.com/DanielWarfield1/TabularExperimentTrackerClient
!pip uninstall NeuralNetworksTrainingPackage --y
!pip install git+https://github.com/Bartosz-G/NeuralNetworksTrainingPackage

[0mCollecting git+https://github.com/DanielWarfield1/TabularExperimentTrackerClient
  Cloning https://github.com/DanielWarfield1/TabularExperimentTrackerClient to /tmp/pip-req-build-38wshg1p
  Running command git clone --filter=blob:none --quiet https://github.com/DanielWarfield1/TabularExperimentTrackerClient /tmp/pip-req-build-38wshg1p
  Resolved https://github.com/DanielWarfield1/TabularExperimentTrackerClient to commit df52eac0ce37df983d93a1b76cb9f4380a27b40d
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting openml (from TabularExperimentTrackerClient==0.0.1)
  Downloading openml-0.14.1.tar.gz (131 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.3/131.3 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting liac-arff>=2.4.0 (from openml->TabularExperimentTrackerClient==0.0.1)
  Downloading liac-arff-2.5.0.tar.gz (13 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting x

In [2]:
import numpy as np
import pandas as pd
import sklearn
import torch
import time

In [3]:
from NNTraining import *
# Global namespace:
# Hyperparams(**run_info.get('hyp'))
# CustomDataset(X, Y, relative_indices, tensor_type=torch.float)
# CustomDatasetWrapper(train_dataset, relative_indices)
# kfold_dataloader_iterator(dataset, n_splits=10, random_state=42, batch_size=16, shuffle_kfold=True, shuffle_dataloader=True)
# get_train_test(X, y, categorical_indicator, attribute_names, train_split, seed)
# Net(input_dim,output_dim,hidden_dim,num_layer,num_back_layer, dense = False,drop_type = 'none',net_type = 'locally_constant',approx = 'none')
# test(args, model, device, test_loader, test_set_name)
# train(args, model, device, train_loader, optimizer, epoch, anneal, alpha=1)
# calc_metrics(y, yhat, is_categorical)

In [4]:
from TabularExperimentTrackerClient.ExperimentClient import ExperimentClient

ex = ExperimentClient(verbose = True)
ex.define_opml_cred_drive('/My Drive/research/non-homogenous-data/creds/creds-openml.txt')
ex.define_orch_cred_drive('bart', '/My Drive//research/non-homogenous-data/creds/creds-colab.txt')

Mounted at /content/drive
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# 1. Defining the experiment

In [5]:
experiment_name = 'experiment_LCN_1'



# LCN and LLN Parameters
depth = {'distribution': 'int_uniform', 'min':1, 'max':11}
seed = {'distribution': 'constant', 'value': 42}
drop_type = {'distribution': 'categorical', 'values':['node_dropconnect', 'none']}
p = {'distribution': 'float_uniform', 'min':0.25, 'max':0.75}
back_n = {'distribution': 'categorical', 'values':[0, 0, 0, 1]}
hidden_dim = {'distribution': 'constant', 'value': 1} # Assertion error coming from Net if not 1
anneal = {'distribution': 'categorical', 'values':['interpolation', 'none', 'approx']}
batch_size = {'distribution': 'categorical', 'values':[16,32,64,64,64,128,256]}
epochs = {'distribution': 'constant', 'value': 30}
lr = {'distribution': 'log_uniform', 'min':0.05, 'max':0.2} # yields mean = 0.1082, median 0.1
momentum = {'distribution': 'constant', 'value': 0.9}
no_cuda = {'distribution': 'constant', 'value': False}
lr_step_size = {'distribution': 'categorical', 'values':[10, 10, 15, 20]}
gamma = {'distribution': 'constant', 'value': 0.1}

In [6]:
#============================================================
# Regression: Locally Constant Networks
#============================================================
LCN_reg_SGD_space = {
    'depth': depth,
    'seed': seed,
    'drop_type': drop_type,
    'p': p,
    'ensemble_n': {'distribution': 'constant', 'value': 1},
    'shrinkage': {'distribution': 'constant', 'value': 1},
    'back_n': back_n,
    'net_type': {'distribution': 'constant', 'value': 'locally_constant'},
    'hidden_dim': hidden_dim,
    'anneal': anneal,
    'optimizer': {'distribution': 'constant', 'value': 'SGD'},
    'batch_size': batch_size,
    'epochs': epochs,
    'lr': lr,
    'momentum': momentum,
    'no_cuda': no_cuda,
    'lr_step_size': lr_step_size,
    'gamma': gamma,
    'task': {'distribution': 'constant', 'value': 'regression'}
    }

LCN_reg_AMSGrad_space = {
    'depth': depth,
    'seed': seed,
    'drop_type': drop_type,
    'p': p,
    'ensemble_n': {'distribution': 'constant', 'value': 1},
    'shrinkage': {'distribution': 'constant', 'value': 1},
    'back_n': back_n,
    'net_type': {'distribution': 'constant', 'value': 'locally_constant'},
    'hidden_dim': hidden_dim,
    'anneal': anneal,
    'optimizer': {'distribution': 'constant', 'value': 'AMSGrad'},
    'batch_size': batch_size,
    'epochs': epochs,
    'lr': lr,
    'momentum': momentum,
    'no_cuda': no_cuda,
    'lr_step_size': lr_step_size,
    'gamma': gamma,
    'task': {'distribution': 'constant', 'value': 'regression'}
    }

#============================================================
# Regression: Locally Linear Networks
#============================================================

LLN_reg_SGD_space = {
    'depth': depth,
    'seed': seed,
    'drop_type': drop_type,
    'p': p,
    'ensemble_n': {'distribution': 'constant', 'value': 1},
    'shrinkage': {'distribution': 'constant', 'value': 1},
    'back_n': back_n,
    'net_type': {'distribution': 'constant', 'value': 'locally_linear'},
    'hidden_dim': hidden_dim,
    'anneal': anneal,
    'optimizer': {'distribution': 'constant', 'value': 'SGD'},
    'batch_size': batch_size,
    'epochs': epochs,
    'lr': lr,
    'momentum': momentum,
    'no_cuda': no_cuda,
    'lr_step_size': lr_step_size,
    'gamma': gamma,
    'task': {'distribution': 'constant', 'value': 'regression'}
    }


LLN_reg_AMSGrad_space = {
    'depth': depth,
    'seed': seed,
    'drop_type': drop_type,
    'p': p,
    'ensemble_n': {'distribution': 'constant', 'value': 1},
    'shrinkage': {'distribution': 'constant', 'value': 1},
    'back_n': back_n,
    'net_type': {'distribution': 'constant', 'value': 'locally_linear'},
    'hidden_dim': hidden_dim,
    'anneal': anneal,
    'optimizer': {'distribution': 'constant', 'value': 'AMSGrad'},
    'batch_size': batch_size,
    'epochs': epochs,
    'lr': lr,
    'momentum': momentum,
    'no_cuda': no_cuda,
    'lr_step_size': lr_step_size,
    'gamma': gamma,
    'task': {'distribution': 'constant', 'value': 'regression'}
    }

#============================================================
# Classification: Locally Constant Networks
#============================================================

LCN_cls_SGD_space = {
    'depth': depth,
    'seed': seed,
    'drop_type': drop_type,
    'p': p,
    'ensemble_n': {'distribution': 'constant', 'value': 1},
    'shrinkage': {'distribution': 'constant', 'value': 1},
    'back_n': back_n,
    'net_type': {'distribution': 'constant', 'value': 'locally_constant'},
    'hidden_dim': hidden_dim,
    'anneal': anneal,
    'optimizer': {'distribution': 'constant', 'value': 'SGD'},
    'batch_size': batch_size,
    'epochs': epochs,
    'lr': lr,
    'momentum': momentum,
    'no_cuda': no_cuda,
    'lr_step_size': lr_step_size,
    'gamma': gamma,
    'task': {'distribution': 'constant', 'value': 'classification'}
    }

LCN_cls_AMSGrad_space = {
    'depth': depth,
    'seed': seed,
    'drop_type': drop_type,
    'p': p,
    'ensemble_n': {'distribution': 'constant', 'value': 1},
    'shrinkage': {'distribution': 'constant', 'value': 1},
    'back_n': back_n,
    'net_type': {'distribution': 'constant', 'value': 'locally_constant'},
    'hidden_dim': hidden_dim,
    'anneal': anneal,
    'optimizer': {'distribution': 'constant', 'value': 'AMSGrad'},
    'batch_size': batch_size,
    'epochs': epochs,
    'lr': lr,
    'momentum': momentum,
    'no_cuda': no_cuda,
    'lr_step_size': lr_step_size,
    'gamma': gamma,
    'task': {'distribution': 'constant', 'value': 'classification'}
    }

#============================================================
# Classification: Locally Linear Networks
#============================================================

LLN_cls_SGD_space = {
    'depth': depth,
    'seed': seed,
    'drop_type': drop_type,
    'p': p,
    'ensemble_n': {'distribution': 'constant', 'value': 1},
    'shrinkage': {'distribution': 'constant', 'value': 1},
    'back_n': back_n,
    'net_type': {'distribution': 'constant', 'value': 'locally_linear'},
    'hidden_dim': hidden_dim,
    'anneal': anneal,
    'optimizer': {'distribution': 'constant', 'value': 'SGD'},
    'batch_size': batch_size,
    'epochs': epochs,
    'lr': lr,
    'momentum': momentum,
    'no_cuda': no_cuda,
    'lr_step_size': lr_step_size,
    'gamma': gamma,
    'task': {'distribution': 'constant', 'value': 'classification'}
    }


LLN_cls_AMSGrad_space = {
    'depth': depth,
    'seed': seed,
    'drop_type': drop_type,
    'p': p,
    'ensemble_n': {'distribution': 'constant', 'value': 1},
    'shrinkage': {'distribution': 'constant', 'value': 1},
    'back_n': back_n,
    'net_type': {'distribution': 'constant', 'value': 'locally_linear'},
    'hidden_dim': hidden_dim,
    'anneal': anneal,
    'optimizer': {'distribution': 'constant', 'value': 'AMSGrad'},
    'batch_size': batch_size,
    'epochs': epochs,
    'lr': lr,
    'momentum': momentum,
    'no_cuda': no_cuda,
    'lr_step_size': lr_step_size,
    'gamma': gamma,
    'task': {'distribution': 'constant', 'value': 'classification'}
    }

In [7]:
model_groups = {
    'LCN_reg_SGD':{'model':'LCN_reg_SGD', 'hype':LCN_reg_SGD_space},
    'LCN_reg_AMSGrad':{'model':'LCN_reg_AMSGrad', 'hype':LCN_reg_AMSGrad_space},
    'LLN_reg_SGD':{'model':'LLN_reg_SGD', 'hype':LLN_reg_SGD_space},
    'LLN_reg_AMSGrad':{'model':'LLN_reg_AMSGrad', 'hype':LLN_reg_AMSGrad_space},
    'LCN_cls_SGD':{'model':'LCN_cls_SGD', 'hype':LCN_cls_SGD_space},
    'LCN_cls_AMSGrad':{'model':'LCN_cls_AMSGrad', 'hype':LCN_cls_AMSGrad_space},
    'LLN_cls_SGD':{'model':'LLN_cls_SGD', 'hype':LLN_cls_SGD_space},
    'LLN_cls_AMSGrad':{'model':'LLN_cls_AMSGrad', 'hype':LLN_cls_AMSGrad_space},
}

ex.def_model_groups(model_groups)

In [8]:
ex.def_data_groups_opml()
print(f'automatically defined data groups: {ex.data_groups.keys()}')

classification_models = [k for k in model_groups.keys() if '_cls' in k]
regression_models = [k for k in model_groups.keys() if '_reg' in k]


applications = {'opml_reg_purnum_group': regression_models,
                'opml_reg_numcat_group': regression_models,
                'opml_class_purnum_group': classification_models,
                'opml_class_numcat_group': classification_models}

ex.def_applications(applications)
ex.reg_experiment(experiment_name)

automatically defined data groups: dict_keys(['opml_reg_purnum_group', 'opml_class_purnum_group', 'opml_reg_numcat_group', 'opml_class_numcat_group'])
existing experiment found


'existing experiment found'

In [9]:
exp_info = ex.experiment_info()
successful_runs = exp_info['successful_runs']
required_runs = exp_info['required_runs']
print('total required runs: {}'.format(required_runs))

total required runs: 14160


# 2. Main training loop:

In [None]:
for i in range(14160):
  print(f'==== Begin run:{i} ====')
  print('---- Initialising parameters for the run ----')
  run_info = ex.begin_run_sticky()
  args = Hyperparams(**run_info.get('hyp')) # hyperparameters for LCN need to be in form of an object (you can ignore this)
  print(run_info)


  use_cuda = not args.no_cuda and torch.cuda.is_available()
  torch.manual_seed(args.seed)
  np.random.seed(args.seed)
  device = torch.device("cuda" if use_cuda else "cpu")



  print('---- Loading datasets ----')
  X, y, categorical_indicator, attribute_names = ex.opml_load_task(run_info['mtpair_task'])
  train_data, val_data, test_data = get_train_val_test(X, y, categorical_indicator, attribute_names, #values from ex.openml_load_task
                                                      split = [0.5, 0.25, 0.25],
                                                      trunctuate = 20000 ,
                                                      seed = args.seed,
                                                      args = args) # Returns CustomDataset obj instances




  train_dataloader = torch.utils.data.DataLoader(train_data, # CustomDataset obj can be directly passed to dataloader
                                                batch_size=args.batch_size,
                                                shuffle= True)


  train_eval_dataloader = torch.utils.data.DataLoader(train_data, # CustomDataset obj can be directly passed to dataloader
                                                batch_size=len(train_data),
                                                shuffle= True) # Required for test_metrics()

  val_dataloader = torch.utils.data.DataLoader(val_data, # CustomDataset obj can be directly passed to dataloader
                                                batch_size=len(val_data),
                                                shuffle= True)

  test_dataloader = torch.utils.data.DataLoader(test_data, # CustomDataset obj can be directly passed to dataloader
                                                batch_size=len(test_data),
                                                shuffle= True)


  model = Net(input_dim= args.input_dim, output_dim= args.output_dim,
                hidden_dim=args.hidden_dim,
                num_layer=args.depth,
                num_back_layer=args.back_n,
                dense=True,
                drop_type=args.drop_type,
                net_type=args.net_type,
                approx=args.anneal).to(device)


  if args.optimizer == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, nesterov=True)
  elif args.optimizer == 'AMSGrad':
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, amsgrad=True)
  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.gamma)


  metrics = {}
  start_epoch = 1  # start from epoch 1 or last checkpoint epoch
  if args.anneal == 'approx':
    args.net_type = 'approx_' + args.net_type


  start_time = time.time()
  for epoch in range(start_epoch, args.epochs + start_epoch):
      print(f"----{epoch}th training epoch ----")
      epoch_metrics = {}

      scheduler.step(epoch)

      alpha = get_alpha(epoch, args.epochs)
      train_approximate_loss = train(args, model, device, train_dataloader, optimizer, epoch, args.anneal, alpha)

      train_loss = get_loss(args, model, device, train_dataloader, 'train')
      val_loss = get_loss(args, model, device, val_dataloader, 'valid')
      test_loss = get_loss(args, model, device, test_dataloader, 'test')

      if epoch == args.epochs:
        continue

      epoch_metrics['train_loss'] = train_loss
      epoch_metrics['val_loss'] = val_loss
      epoch_metrics['test_loss'] = test_loss
      epoch_metrics['epoch'] = epoch

      ex.update_run(epoch_metrics)
      print(f'epoch_metrics:{epoch_metrics}')

      if torch.isnan(torch.tensor(train_loss)).item():
        print('---Stopping training due to loss being nan!---')
        break


  else:
    metrics['epoch_time'] = time.time()-start_time


    train_metrics = get_metrics(args, model, device, train_eval_dataloader, calc_metrics, 'train') #Requires batch_size to be entire dataset
    val_metrics = get_metrics(args, model, device, val_dataloader, calc_metrics, 'valid')
    test_metrics = get_metrics(args, model, device, test_dataloader, calc_metrics, 'test')

    metrics['train_loss'] = train_loss
    metrics['val_loss'] = val_loss
    metrics['test_loss'] = test_loss
    metrics['train_metrics'] = train_metrics
    metrics['validate_metrics'] = val_metrics
    metrics['test_metrics'] = test_metrics

    ex.update_run(metrics)
    print(ex.get_run())
    ex.end_run()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
---- Loading datasets ----
downloading task 336-361279
using values from previous task load, skipped download
----1th training epoch ----
run updated
epoch_metrics:{'train_loss': 0.00234488905409585, 'val_loss': 0.002418084070086479, 'test_loss': 0.0028055666480213404, 'epoch': 1}
----2th training epoch ----
run updated
epoch_metrics:{'train_loss': 0.0013711015958647599, 'val_loss': 0.0014395462349057198, 'test_loss': 0.001805896987207234, 'epoch': 2}
----3th training epoch ----
run updated
epoch_metrics:{'train_loss': 0.0008221803449661643, 'val_loss': 0.0008415898191742599, 'test_loss': 0.0012594660511240363, 'epoch': 3}
----4th training epoch ----
run updated
epoch_metrics:{'train_loss': 0.000906876234313786, 'val_loss': 0.0009050571243278682, 'test_loss': 0.001353468163870275, 'epoch': 4}
----5th training epoch ----
run updated
epoch_metrics:{'train_loss': 0.0008354559753294355, 'val_loss': 0.000877450976986438, 'test

## Checking whether the code works as intended