In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data
import torch.optim as optim
import torchvision
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils import data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import os
import cProfile, pstats, io
from pstats import SortKey


print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)
#print(torch.cuda.nccl.is_available())
torch.manual_seed(0)   # reproducible


OPTIMIZATION_PLUGIN = 'Scikit' # 'Bayesian' or 'Scikit'
GET_STATS = False
GPU_SELECT = 2 # can be 0, 1, 2 (both)
PARALLEL_PROCESSES = 2
TRIALS = 20
RANDOM_STARTS = 10
LR  = 1e-5                # learning rate
SCI_LR =  1e-5
LR2 = 1e-5
SCI_MM = 0.5                 # momentum - used only with SGD optimizer
MM = 0.5
L_FIRST = 1
KERNEL_X = 7
patience = 12             # if validation loss not going down, wait "patience" number of epochs
accuracy = 0

PyTorch Version:  1.1.0
Torchvision Version:  0.3.0


In [2]:
#DATASET

CLASSES = 2
TRAIN_SIZE = 256
VALIDATION_SIZE = 90
TEST_SIZE = 654
TESTED_ELEMENTS = torch.tensor([527,127]).type(torch.FloatTensor) 
TEST_RESULTS = torch.zeros(1,2)
LAST_DATA_ELEMENT = 24
file = ['train6.csv','validate6.csv','test6.csv']

pr = cProfile.Profile()

if GET_STATS:
    pr.enable()
    

if GPU_SELECT == 2:
    if torch.cuda.device_count() > 1:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print("Using", torch.cuda.device_count(), "NVIDIA 1080TI GPUs!")

if GPU_SELECT == 1:
    device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")    
    print("Using one (the second) NVIDIA 1080TI GPU!")

if GPU_SELECT == 0:
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")       
    print("Using one (the first) NVIDIA 1080TI GPU!")


def CreateDataset(file, SIZE, LAST_DATA_COLUMN):
    file_reader = pd.read_csv(file, header=None)
    temp_tensor = torch.tensor(file_reader.values)

    target = torch.zeros(SIZE, device = device)
    target = temp_tensor[:,LAST_DATA_COLUMN]
    target.requires_grad = False
    target = torch.t(target).type(torch.LongTensor).cuda()

    data = torch.zeros(1,1,SIZE,LAST_DATA_COLUMN, device = device)
    data[0,0,:,:] = temp_tensor[:,0:LAST_DATA_COLUMN]
    data = data.permute(2,1,3,0)
    data.requires_grad = False

    return data, target

train_data, train_target = CreateDataset(file[0], TRAIN_SIZE, LAST_DATA_ELEMENT)
validation_data, validation_target = CreateDataset(file[1], VALIDATION_SIZE, LAST_DATA_ELEMENT)
test_data, test_target = CreateDataset(file[2], TEST_SIZE, LAST_DATA_ELEMENT)

train_dataset = Data.TensorDataset(train_data, train_target)
validation_dataset = Data.TensorDataset(validation_data, validation_target)
test_dataset = Data.TensorDataset(test_data, test_target)

Using 2 NVIDIA 1080TI GPUs!


In [3]:
def weights_init(m):
    if isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            torch.nn.init.zeros_(m.bias)
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            torch.nn.init.zeros_(m.bias)        

            
def weights_reset(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        m.reset_parameters()


from early_stopping import EarlyStopping

early_stopping = EarlyStopping(patience=patience, verbose=True)  # initialize the early_stopping object

# Counter for the execution time
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

start.record()

In [None]:
if OPTIMIZATION_PLUGIN == 'Scikit' :
    from skopt import gp_minimize
    from sklearn.datasets import load_boston
    from sklearn.ensemble import GradientBoostingRegressor
    from sklearn.model_selection import cross_val_score
    from skopt.space import Real, Integer
    from skopt.utils import use_named_args
    from skopt.plots import plot_convergence
    from functools import partial
    from skopt.plots import plot_evaluations
    from skopt import gp_minimize, forest_minimize, dummy_minimize, gbrt_minimize
    from skopt.plots import plot_objective
    from sklearn.preprocessing import LabelEncoder
    from sklearn.preprocessing import CategoricalEncoder
    from skopt.space import Real, Categorical, Integer
    from sklearn.externals.joblib import Parallel, delayed

    #SCI_LR = Categorical(categories=[1e-1, 3e-1, 5e-1, 7e-1, 1e-2, 3e-2, 5e-2, 7e-2, 1e-3, 3e-3, 5e-3, 7e-3, 1e-4, 3e-4, 0.1, 0.2, 0.3, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.001, 0.0001, 1e-5],name= 'SCI_LR')
    SCI_LR = Categorical(categories=[1e-1, 3e-1, 5e-1, 7e-1, 1e-2, 3e-2, 5e-2, 7e-2, 0.1, 0.2, 0.3, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.001],name= 'SCI_LR')
    SCI_MM = Categorical(categories=[0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 0.999], name='SCI_MM')
    SCI_REGULARIZATION = Categorical(categories=[0.0001, 0.0003, 0.0007, 0.001, 0.003, 0.007, 0.01, 0.03, 0.07, 0.1, 0.3, 0.7], name='SCI_REGULARIZATION')
    SCI_EPOCHS = Categorical(categories=[20000, 10000], name='SCI_EPOCHS')
    SCI_optimizer = Categorical(categories=['Adam', 'Adadelta', 'SGD', 'Adagrad', 'AMSGrad', 'AdamW'],name='SCI_optimizer') #
    SCI_loss_type = Categorical(categories=['CrossEntropyLoss', 'MultiMarginLoss','NLLLoss'],name='SCI_loss_type') # 
    SCI_BATCH_SIZE = Categorical(categories=[4, 8, 12, 16, 24, 32, 48, 64, 96, 128, 160, 192, 224, 256], name='SCI_BATCH_SIZE')
    SCI_DROPOUT = Categorical(categories=[0, 0.01, 0.03, 0.07, 0.1, 0.13, 0.17, 0.2, 0.23, 0.27, 0.3, 0.33, 0.37, 0.4] , name = 'SCI_DROPOUT')
    SCI_RELU = Categorical(categories=['True', 'False'] , name = 'SCI_RELU')
    SCI_BIAS = Categorical(categories=['True', 'False'] , name = 'SCI_BIAS')
    SCI_L_SECOND = Categorical(categories=[2, 4, 6, 8, 12, 16, 20, 24, 32, 48, 64], name='SCI_L_SECOND')
    SCI_BN_MOMENTUM = Categorical(categories=[0, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99] , name = 'SCI_BN_MOMENTUM') 
    SCI_SGD_MOMENTUM = Categorical(categories=[0, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99] , name = 'SCI_SGD_MOMENTUM') 

    dimensions = [SCI_BATCH_SIZE, SCI_MM, SCI_REGULARIZATION, SCI_optimizer, SCI_LR, SCI_loss_type, SCI_DROPOUT, SCI_RELU, SCI_BIAS, SCI_L_SECOND, SCI_EPOCHS, SCI_BN_MOMENTUM, SCI_SGD_MOMENTUM]

    @use_named_args(dimensions = dimensions)

    def objective(SCI_BATCH_SIZE, SCI_MM, SCI_REGULARIZATION, SCI_optimizer, SCI_LR, SCI_loss_type, SCI_DROPOUT, SCI_RELU, SCI_BIAS, SCI_L_SECOND, SCI_EPOCHS, SCI_BN_MOMENTUM, SCI_SGD_MOMENTUM):
        global device  

        from cnn_model import CNN4
                
        cnn = CNN4(L_FIRST, SCI_L_SECOND, KERNEL_X, SCI_BIAS, SCI_BN_MOMENTUM, SCI_RELU, SCI_DROPOUT, CLASSES)     
    
        if GPU_SELECT == 2:
            if torch.cuda.device_count() > 1:
                cnn = nn.DataParallel(cnn) 
            cnn = cnn.cuda()
        if GPU_SELECT == 1:
            cnn.to(device)  
        if GPU_SELECT == 0:
            cnn.to(device)        

        cnn.share_memory()
     
        loss_func = nn.CrossEntropyLoss().cuda()

        def create_loss(LOSS):   
            if LOSS == 'CrossEntropyLoss':
                loss_func = nn.CrossEntropyLoss().cuda()
            if LOSS == 'NLLLoss':
                loss_func = nn.NLLLoss().cuda()
            else:
                loss_func = nn.MultiMarginLoss().cuda()
            return loss_func

        MM = float(str(SCI_MM))
        REGULARIZATION = float(str(SCI_REGULARIZATION))
        optimizer = str(SCI_optimizer)
        LR = float(str(SCI_LR))
        train_losses = []         # to track the training loss as the model trains
        output = 0
        loss = 0
        accuracy = 0
        early_stopping.counter = 0
        early_stopping.best_score = None
        early_stopping.early_stop = False
        early_stopping.verbose = False  
        TEST_RESULTS = torch.zeros(1,2)

        cnn.apply(weights_reset)
    
        loss_type = create_loss(SCI_loss_type)
        
        from adamw import AdamW
        
        
        if SCI_optimizer == 'Adam':
            optimizer = optim.Adam(cnn.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION)
        if SCI_optimizer == 'AMSGrad':
            optimizer = optim.Adam(cnn.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION, amsgrad=True)
        if SCI_optimizer == 'AdamW':
            optimizer = AdamW(cnn.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay = REGULARIZATION)            
        if SCI_optimizer == 'SGD':
            optimizer = optim.SGD(cnn.parameters(), lr=LR, momentum=SCI_SGD_MOMENTUM, weight_decay=REGULARIZATION)
        if SCI_optimizer == 'Adadelta':
            optimizer = optim.Adadelta(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION)
        if SCI_optimizer == 'Adagrad':
            optimizer = optim.Adagrad(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION)
    
        print('Optimization: ', optimizer)
        if optimizer == 'SGD':
            print('MM: ',SCI_SGD_MOMENTUM)
        print('Batch Normalization Momentum: ',SCI_BN_MOMENTUM)   
        print('Nodes: ', SCI_L_SECOND)         
        #print('LR: ', SCI_LR)         
        print('RELU: ', SCI_RELU)       
        print('BIAS: ', SCI_BIAS)   
        print('Loss Type: ', SCI_loss_type)   
        #print('REGULARIZATION: ', REGULARIZATION)    
        print('BATCH_SIZE: ', SCI_BATCH_SIZE)
        print('Dropout: ', SCI_DROPOUT)
    
        #SCI_BATCH_SIZE = 1
        # Data Loader for easy mini-batch return in training
        SCI_BATCH_SIZE = int(SCI_BATCH_SIZE)
        train_loader = Data.DataLoader(dataset = train_dataset, batch_size = SCI_BATCH_SIZE, shuffle = False, num_workers = 0, drop_last=True)
        validation_loader = Data.DataLoader(dataset = validation_dataset, batch_size = SCI_BATCH_SIZE, shuffle = False, num_workers = 0, drop_last=True)    
        test_loader = Data.DataLoader(dataset = test_dataset, batch_size = SCI_BATCH_SIZE, shuffle = False, num_workers = 0)
    
        for epoch in range(SCI_EPOCHS):
            loss = None        
            cnn.train().cuda()
            for step, (train_data, train_target) in enumerate(train_loader):   
                train_data, train_target = train_data.to(device), train_target.to(device)
                output, temp = cnn(train_data)                # forward pass: compute predicted outputs by passing inputs to the model     
                loss = loss_func(output, train_target)
                train_losses.append(loss.item())              # record training loss 
                loss.backward()                               # backward pass: compute gradient of the loss with respect to model parameters
                optimizer.zero_grad()
                optimizer.step()                              # perform a single optimization step (parameter update)
      
            cnn.eval().cuda()                 # switch to evaluation (no change) mode           
            valid_loss = 0
            accuracy = 0
            with torch.no_grad():
                for step, (validation_data, validation_target) in enumerate(validation_loader):
                    validation_data, validation_target = validation_data.to(device), validation_target.to(device)
                    output, temp = cnn(validation_data)            # forward pass: compute predicted outputs by passing inputs to the model
                    valid_loss += loss_func(output, validation_target).item()
                    ps = torch.exp(output)
                    equality = (validation_target[0].data == ps.max(dim=1)[1])
                    accuracy += equality.type(torch.FloatTensor).mean()      
               
            train_losses = []
            early_stopping(valid_loss, cnn)
       
            if early_stopping.early_stop:
                if os.path.exists('checkpoint.pt'):
                    print("Loaded the model with the lowest Validation Loss!")
                    cnn.load_state_dict(torch.load('checkpoint.pt', map_location="cuda:1"))  # Choose whatever GPU device number you want
                    cnn.to(device)
                break
      
        cnn.eval()
        class_correct = list(0. for i in range(1000))
        class_total = list(0. for i in range(1000))
        with torch.no_grad():
            for (test_data, test_target) in test_loader:
                test_data, test_target = test_data.to(device), test_target.to(device)
                outputs, temp = cnn(test_data)
                _, predicted = torch.max(outputs, 1)
                c = (predicted == test_target).squeeze()
                for i in range(test_target.size(0)):
                    label = test_target[i]
                    class_correct[label] += c[i].item()
                    class_total[label] += 1

        for i in range(CLASSES):
            TEST_RESULTS[0,i] = class_correct[i] / TESTED_ELEMENTS[i]
            print('Class: ',i,' accuracy: ', TEST_RESULTS[0,i])
            print('Class: ',i,' correct: ', class_correct[i])
        percent = (TEST_RESULTS[0,0]+TEST_RESULTS[0,1])/2
        print('Final percentage: ',percent)
    
        CreditCost = int((1 - TEST_RESULTS[0,0]) * TESTED_ELEMENTS[0] + (1 - TEST_RESULTS[0,1]) * TESTED_ELEMENTS[1] * 5)
    
        if TEST_RESULTS[0,0] == 0 or TEST_RESULTS[0,1] == 0 :
            CreditCost = CreditCost + 200
    
        print('Last epoch: ', epoch)
        print('Credit Cost: ',CreditCost)
    
        if os.path.exists('checkpoint.pt'):  
            os.remove('checkpoint.pt') 

        print()
        torch.cuda.empty_cache()
        print()
        
        return CreditCost
    
    #   not working    #res_gp = gp_minimize(objective, dimensions=dimensions, n_calls=TRIALS, random_state=1, verbose=True, acq_func='gp_hedge', acq_optimizer='auto', n_jobs=1)
    #res_gp = forest_minimize(objective, dimensions=dimensions, base_estimator='RF', n_calls=TRIALS, n_random_starts=RANDOM_STARTS, acq_func='EI', x0=None, y0=None, random_state=None, verbose=True, callback=None, n_points=10000, xi=0.01, kappa=1.96, n_jobs=128)
    res_gp = gbrt_minimize(objective, dimensions=dimensions, base_estimator='ET', n_calls=TRIALS, n_random_starts=RANDOM_STARTS, acq_func='LCB', x0=None, y0=None, random_state=None, verbose=True, callback=None, n_points=100, xi=0.01, kappa=1.96, n_jobs=8)
    #res_gp = dummy_minimize(objective, dimensions=dimensions, n_calls=TRIALS, x0=None, y0=None, random_state=None, verbose=True, callback=None)      

    "Best score=%.4f" % res_gp.fun
    print("""Best parameters: - optimization=%d""" % (res_gp.x[0]))
  
    print(res_gp)
    plot_convergence(res_gp)
    #plot_evaluations(res_gp)
    #plot_objective(res_gp)
    

Iteration No: 1 started. Evaluating function at random point.
Optimization:  Adagrad (
Parameter Group 0
    initial_accumulator_value: 0
    lr: 0.09
    lr_decay: 0
    weight_decay: 0.3
)
Batch Normalization Momentum:  0
Nodes:  20
RELU:  False
BIAS:  False
Loss Type:  NLLLoss
BATCH_SIZE:  128
Dropout:  0.2
Class:  0  accuracy:  tensor(1.)
Class:  0  correct:  527.0
Class:  1  accuracy:  tensor(0.)
Class:  1  correct:  0.0
Final percentage:  tensor(0.5000)
Last epoch:  19999
Credit Cost:  835


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 214.0078
Function value obtained: 835.0000
Current minimum: 835.0000
Iteration No: 2 started. Evaluating function at random point.
Optimization:  Adadelta (
Parameter Group 0
    eps: 1e-06
    lr: 0.08
    rho: 0.9
    weight_decay: 0.0007
)
Batch Normalization Momentum:  0.5
Nodes:  24
RELU:  False
BIAS:  True
Loss Type:  MultiMarginLoss
BATCH_SIZE:  192
Dropout:  0.37
Class:  0  accuracy:  tensor(0.)
Class:  0  correct:  0

In [None]:
MaxCredit = -800

if OPTIMIZATION_PLUGIN == 'Bayesian' :
    from bayes_opt import BayesianOptimization
    
    #def black_box_function(x, y):
    def objective(SCI_RELU, SCI_BIAS, SCI_loss_type, SCI_optimizer, SCI_BATCH_SIZE, SCI_MM, SCI_REGULARIZATION, SCI_LR, SCI_DROPOUT, SCI_L_SECOND, SCI_EPOCHS, SCI_BN_MOMENTUM, SCI_SGD_MOMENTUM):
        global device, MaxCredit  
        
        SCI_BATCH_SIZE = int(SCI_BATCH_SIZE)                    # integer between 4 and 256
        SCI_MM = round(SCI_MM,3)                                # real with three decimals between (0.001, 0.999)
        SCI_REGULARIZATION = round(SCI_REGULARIZATION,3)        # real with three decimals between (0.001, 0.7)
        SCI_LR = round(SCI_LR,5)                                # real with five decimals between(1e-4, 7e-1)            
        SCI_DROPOUT = round(SCI_DROPOUT,2)                      # real with two decimals between (0, 0.4)
        SCI_L_SECOND = int(SCI_L_SECOND)                        # integer between 2 and 64
        SCI_EPOCHS = int(SCI_EPOCHS)                            # integer between (100, 500)
        SCI_BN_MOMENTUM = round(SCI_BN_MOMENTUM,2)              # real with two decimals between (0, 0.99)
        SCI_SGD_MOMENTUM = round(SCI_SGD_MOMENTUM,2)            # real with two decimals between (0, 0.99) 
        SCI_optimizer = int(SCI_optimizer)                      # integer between 1 and 4
        SCI_loss_type = int(SCI_loss_type)                      # integer between 1 and 3 ('CrossEntropyLoss', 'MultiMarginLoss','NLLLoss')
        if int(SCI_RELU) == 1 :                                 # integer between 1 and 2 ('True', 'False')
            SCI_RELU = True      
        else:
            SCI_RELU = False      
        if int(SCI_BIAS) == 1 :                                 # integer between 1 and 2 ('True', 'False')
            SCI_BIAS = True      
        else:
            SCI_BIAS = False  
               
        from cnn_model import CNN2
        cnn = CNN4(L_FIRST, SCI_L_SECOND, KERNEL_X, SCI_BIAS, SCI_BN_MOMENTUM, SCI_RELU, SCI_DROPOUT, CLASSES)     
    
        if GPU_SELECT == 2:
            if torch.cuda.device_count() > 1:
                cnn = nn.DataParallel(cnn) 
        if GPU_SELECT == 1:
            cnn.to(device)  
        if GPU_SELECT == 0:
            cnn.to(device)        

        #next(cnn.parameters()).is_cuda
        #print(cnn)  # net architecture   
        #list(cnn.parameters()) 
        cnn.share_memory()
     
        loss_func = nn.CrossEntropyLoss().cuda()

        def create_loss(LOSS):   
            if LOSS == 1:
                loss_func = nn.CrossEntropyLoss().cuda()
            if LOSS == 2:
                loss_func = nn.NLLLoss().cuda()
            else:
                loss_func = nn.MultiMarginLoss().cuda()
            return loss_func

        MM = float(str(SCI_MM))
        REGULARIZATION = float(str(SCI_REGULARIZATION))
        #optimizer = str(SCI_optimizer)
        LR = float(str(SCI_LR))
        train_losses = []         # to track the training loss as the model trains
        output = 0
        loss = 0
        accuracy = 0
        early_stopping.counter = 0
        early_stopping.best_score = None
        early_stopping.early_stop = False
        early_stopping.verbose = False  
        TEST_RESULTS = torch.zeros(1,2)

        cnn.apply(weights_reset)
    
        loss_type = create_loss(SCI_loss_type)
    
        from adamw import AdamW
        
        
        if SCI_optimizer == 1:
            optimizer = optim.Adam(cnn.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION)
        if SCI_optimizer == 2:
            optimizer = optim.Adam(cnn.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION, amsgrad=True)
        if SCI_optimizer == 3:
            optimizer = AdamW(cnn.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay = REGULARIZATION)           
        if SCI_optimizer == 4:
            optimizer = optim.SGD(cnn.parameters(), lr=LR, momentum=SCI_SGD_MOMENTUM, weight_decay=REGULARIZATION)
        if SCI_optimizer == 5:
            optimizer = optim.Adadelta(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION)
        if SCI_optimizer == 6:
            optimizer = optim.Adagrad(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION)
    
    
        print('Optimization: ', optimizer)
        if optimizer == 'SGD':
            print('MM: ',SCI_SGD_MOMENTUM)
        print('Batch Normalization Momentum: ',SCI_BN_MOMENTUM)   
        print('Nodes: ', SCI_L_SECOND)         
        print('LR: ', SCI_LR)         
        print('RELU: ', SCI_RELU)       
        print('BIAS: ', SCI_BIAS)   
        print('Loss Type: ', SCI_loss_type)   
        print('REGULARIZATION: ', REGULARIZATION)    
        print('BATCH_SIZE: ', SCI_BATCH_SIZE)
        print('Dropout: ', SCI_DROPOUT)
    
        # Data Loader for easy mini-batch return in training
        SCI_BATCH_SIZE = int(SCI_BATCH_SIZE)
        train_loader = Data.DataLoader(dataset = train_dataset, batch_size = SCI_BATCH_SIZE, shuffle = False, num_workers = 0, drop_last=True)
        validation_loader = Data.DataLoader(dataset = validation_dataset, batch_size = SCI_BATCH_SIZE, shuffle = False, num_workers = 0, drop_last=True)    
        test_loader = Data.DataLoader(dataset = test_dataset, batch_size = SCI_BATCH_SIZE, shuffle = False, num_workers = 0)
    
        for epoch in range(SCI_EPOCHS):
            loss = None        
            cnn.train().cuda()
            for step, (train_data, train_target) in enumerate(train_loader):   
                train_data, train_target = train_data.to(device), train_target.to(device)
                output, temp = cnn(train_data)                # forward pass: compute predicted outputs by passing inputs to the model     
                loss = loss_func(output, train_target)
                train_losses.append(loss.item())              # record training loss 
                loss.backward()                               # backward pass: compute gradient of the loss with respect to model parameters
                optimizer.zero_grad()
                optimizer.step()                              # perform a single optimization step (parameter update)
      
            cnn.eval().cuda()                 # switch to evaluation (no change) mode           
            valid_loss = 0
            accuracy = 0
            with torch.no_grad():
                for step, (validation_data, validation_target) in enumerate(validation_loader):
                    validation_data, validation_target = validation_data.to(device), validation_target.to(device)
                    output, temp = cnn(validation_data)            # forward pass: compute predicted outputs by passing inputs to the model
                    valid_loss += loss_func(output, validation_target).item()
                    ps = torch.exp(output)
                    equality = (validation_target[0].data == ps.max(dim=1)[1])
                    accuracy += equality.type(torch.FloatTensor).mean()      
               
            train_losses = []
            early_stopping(valid_loss, cnn)
        
            if early_stopping.early_stop:
                if os.path.exists('checkpoint.pt'):
                    #cnn = TheModelClass(*args, **kwargs)
                    print("Loaded the model with the lowest Validation Loss!")
                    cnn.load_state_dict(torch.load('checkpoint.pt', map_location="cuda:1"))  # Choose whatever GPU device number you want
                    cnn.to(device)
                break
      
        cnn.eval()
        class_correct = list(0. for i in range(1000))
        class_total = list(0. for i in range(1000))
        with torch.no_grad():
            for (test_data, test_target) in test_loader:
                test_data, test_target = test_data.to(device), test_target.to(device)
                outputs, temp = cnn(test_data)
                _, predicted = torch.max(outputs, 1)
                c = (predicted == test_target).squeeze()
                for i in range(test_target.size(0)):
                    label = test_target[i]
                    class_correct[label] += c[i].item()
                    class_total[label] += 1

        for i in range(CLASSES):
            TEST_RESULTS[0,i] = class_correct[i] / TESTED_ELEMENTS[i]
            print('Class: ',i,' accuracy: ', TEST_RESULTS[0,i])   
        percent = (TEST_RESULTS[0,0]+TEST_RESULTS[0,1])/2
        print('Final percentage: ',percent)
    
        CreditCost = int((1 - TEST_RESULTS[0,0]) * TESTED_ELEMENTS[0] + (1 - TEST_RESULTS[0,1]) * TESTED_ELEMENTS[1] * 5)
    
        print('Last epoch: ', epoch)
        print('Credit Cost: ',-CreditCost)
        #list(cnn.parameters())
    
        if os.path.exists('checkpoint.pt'):  
            os.remove('checkpoint.pt') 

        print()
        print()
        
        if -CreditCost > MaxCredit : 
            MaxCredit = -CreditCost
        print('Best Score So Far: ',MaxCredit)    
        
        return -CreditCost
    
    
    optimizer = BayesianOptimization(
        f=objective,
        #pbounds=pbounds,
        pbounds={'SCI_RELU': (1,2.99), 'SCI_BIAS': (1,2.99), 'SCI_loss_type': (1, 3.99), 'SCI_optimizer': (1, 6.99),'SCI_LR': (0.00001, 0.01), 'SCI_MM': (0.001, 0.999), 'SCI_REGULARIZATION': (0.0001, 0.7), 'SCI_EPOCHS': (10000, 20000), 'SCI_BATCH_SIZE': (4, 256), 'SCI_DROPOUT': (0, 0.4), 'SCI_L_SECOND': (2, 64), 'SCI_BN_MOMENTUM': (0, 0.99), 'SCI_SGD_MOMENTUM': (0, 0.99)},
        verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
        random_state=1,
    )
        

    optimizer.maximize(
        n_iter=1000, acq="ucb", kappa=0.1
    )
    
    
    print(optimizer.max)
    
    for i, res in enumerate(optimizer.res):
        print("Iteration {}: \n\t{}".format(i, res))

In [None]:
end.record()

#print('Minimum Credit Cost: ',Min_Credit_Cost)

print()
print('Total execution time (minutes): ',start.elapsed_time(end)/60000)

torch.cuda.empty_cache()

if GET_STATS:
    pr.disable()
    s = io.StringIO()
    sortby = SortKey.CUMULATIVE
    ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
    ps.print_stats()
    print(s.getvalue())