In [None]:
import importlib
import numpy as np
import pandas as pd
import os, sys
import tensorflow as tf
from imblearn.over_sampling import SMOTE
import bayesopt

import Models.Reversible_Classifier as MC
import Models.Net as Net

import Data_Handler.Classifier_data as CD

Classifier_data = CD.Classifier_data
CNet = MC.Classifier

# Surpress print statements while training
class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = None
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout = self._original_stdout

In [None]:
data_dir = 'Data'
g = pd.read_csv(os.path.join(data_dir, 
    'LupusGeneExpressionCompendium_AllGfeatures.csv'))
l = pd.read_csv(os.path.join(data_dir,
    'PatientDx_Labels.csv'))

df = pd.DataFrame({'trt':[0,1,2], 'sle_class':['Healthy_No_Treatment','SLE_No_Treatment', 'SLE_Treatment']})
#Merge the dataframe created with the labels dataset
df2 = pd.merge(l, df, how='outer', on='sle_class', left_on=None, right_on=None, left_index=False, right_index=False, sort=True, suffixes=('_x', '_y'), copy=True, indicator=False, validate=None)

#Merge all datasets together
#research ready dataset
df3 = pd.merge(g, df2, how='inner', on='patid', left_on=None, right_on=None, left_index=False, right_index=False, sort=True, suffixes=('_x', '_y'), copy=True, indicator=False, validate=None)

#split the dataset into data and labels

df_data = df3.iloc[:,1:-2]
df_labels = df3.loc[:, 'trt']

In [None]:
CNet = MC.Classifier
Classifier_data = CD.Classifier_data
Lupus_data = Classifier_data(100, df_data.values, df_labels.values)
N_outputs = Lupus_data.Num_of_classes
N_inputs = Lupus_data.Num_of_features
config = CNet.get_default_configs(N_inputs, N_outputs)
# config['hyperparameter']['lr'] = 3e-5
cnet = CNet(config, Lupus_data, [0.6, 0.2])

In [None]:
N = 3
def BO_loss_functions(N):
    def classifierLoss(i):
        inputlist = i.tolist()
        tunables = ['lr','beta1', 'beta2', 'epsilon', 'max_epochs']
        hp = dict(zip(tunables, inputlist))
    
        #Casting to an int to allow max epochs to be a valid value
        hp['max_epochs'] = int(hp['max_epochs'])
        total_loss = 0
    
        #Number of samples to average
        N = N
        print(N)
    
        for n in range(0, N):
            with HiddenPrints():
                cnet.train(save_every=0, eval_every = 10000, 
                       hyperparameters = hp)
                total_loss += cnet.val_loss
        return total_loss/N
    
    def generatorLoss(i):
        inputlist = i.tolist()
        tunables = ['lr','beta1', 'beta2', 'epsilon', 'max_epochs']
        hp = dict(zip(tunables, inputlist))
    
        #Casting to an int to allow max epochs to be a valid value
        hp['max_epochs'] = int(hp['max_epochs'])
        total_loss = 0
    
        #Number of samples to average
        N = N
        print(N)
    
        for n in range(0, N):
            with HiddenPrints():
                cnet.train_generator(save_every=0, eval_every = 10000, 
                       hyperparameters = hp)
                total_loss += cnet.val_loss
        return total_loss/N
    
    return classifierLoss, generatorLoss

classifierLoss, generatorLoss = BO_loss_functions(N)

In [None]:
## Getting noise
val_losses = []
print('Finding noise with %d samples' % (5 * N))
with HiddenPrints():
    for i in range (0, 5 * N):
        cnet.train(save_every=0, eval_every=1000, 
                   hyperparameters={'lr':3e-5, 'max_epochs': 10})
        val_losses.append(cnet.val_loss)

noise = np.std(val_losses)
print('Classifier Validation Loss Noise: %02.4f' % noise)

In [None]:
lb = np.array([1e-10, 1e-10, 1e-10, 1e-15, 10])
ub = np.array([1e-2, 0.99, 0.99, 1, 100])
params = []
params['n_iterations'] = 75
params['noise'] = 2 * noise/sqrt(N) #Conservative noise estimate

In [None]:
m_loss, C_hp, error = bayesopt.optimize(classifierLoss,
                                       5, lb, ub, params)
inputlist = C_hp.tolist()
tunables = ['lr','beta1', 'beta2', 'epsilon', 'max_epochs']
hp = dict(zip(tunables, inputlist))
hp['max_epochs'] = int(hp['max_epochs'])

cnet.train(save_every = 5, eval_every = 5,
          hyperparamters = hp)

In [None]:
## Getting noise
val_losses = []
print('Finding noise with %d samples' % (5 * N))
with HiddenPrints():
    for i in range (0, 5 * N):
        cnet.train_generator(save_every=0, eval_every=1000, 
                   hyperparameters={'lr':3e-5, 'max_epochs': 10})
        val_losses.append(cnet.val_loss)

noise = np.std(val_losses) 
print('Classifier Validation Loss Noise: %02.4f' % noise)

In [None]:
m_loss, G_hp, error = bayesopt.optimize(generatorLoss,
                                       5, lb, ub, params)

inputlist = C_hp.tolist()
tunables = ['lr','beta1', 'beta2', 'epsilon', 'max_epochs']
hp = dict(zip(tunables, inputlist))
hp['max_epochs'] = int(hp['max_epochs'])

cnet.train_generator(save_every = 5, eval_every = 5,
          hyperparamters = hp)

In [None]:
cnet.save(1000)
cnet.sess.close()