In [1]:
#Let's get rid of some imports
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
#Define the model 
import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.datasets import load_boston
import pandas as pd

from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split



# Dataset

https://github.com/hyperopt/hyperopt

In [71]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class Boston_Dataset(Dataset):
    def __init__(self, train=True):
        boston = load_boston()
        boston_df = pd.DataFrame(boston['data'] )
        boston_df.columns = boston['feature_names']
        boston_df['PRICE']= boston['target']
        X = boston_df.iloc[:,0:13]
        y = boston_df['PRICE']

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
        
        if train:
            self.x_data = X_train
            self.y_data = y_train
        else:
            self.x_data = X_test
            self.y_data = y_test
            
    def __len__(self):
        return len(self.x_data)
    
    def __getitem__(self, idx):
        return torch.tensor(self.x_data.iloc[idx]), torch.tensor(self.y_data.iloc[idx])

In [52]:
class Boston_Dataset(Dataset):
    def __init__(self, train=True):
        boston = load_boston()
        boston_df = pd.DataFrame(boston['data'] )
        boston_df.columns = boston['feature_names']
        boston_df['PRICE']= boston['target']
        self.x_data = boston_df.iloc[:,0:13]
        self.y_data = boston_df['PRICE']

    def __len__(self):
        return len(self.x_data)
    
    def __getitem__(self, idx):
        return torch.tensor(self.x_data.iloc[idx]), torch.tensor(self.y_data.iloc[idx])

In [72]:
boston_traindataset = Boston_Dataset(train=True)
boston_testdataset = Boston_Dataset(train=False)

boston_trainloader = DataLoader(boston_traindataset, batch_size=5, shuffle=True, num_workers=4)
boston_testloader = DataLoader(boston_testdataset, batch_size=5, shuffle=True, num_workers=4)

# Model Creation

In [54]:
learning_rate = 0.01

In [76]:
#Create the model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class Net(torch.nn.Module):
    def __init__(self, n_feature = 13, size_hidden = 1, n_output=1):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(n_feature, size_hidden)   # hidden layer
        self.predict = torch.nn.Linear(size_hidden, n_output)   # output layer

    def forward(self, x):
        x = F.relu(self.hidden(x))      # activation function for hidden layer
        x = self.predict(x)             # linear output
        return x
    
size_hidden = 10
net = Net(size_hidden)

#Adam is a specific flavor of gradient decent which is typically better
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
criterion = torch.nn.MSELoss(size_average=False)  # this is for regression mean squared loss

def train_network(hidden):
    net = Net(size_hidden = hidden)
    optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
    criterion = torch.nn.MSELoss(size_average=False)  # this is for regression mean squared loss
    net.train()
    running_loss = 0.0
    for epoch in range(50):
        for idx, (inputs, labels) in enumerate(boston_trainloader):        
            # zero the parameter gradients
            optimizer.zero_grad()
            # forward + backward + optimize
            outputs = net(inputs.float())

            loss = criterion(outputs, torch.unsqueeze(labels.float(),dim=1))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
#         if epoch % 10 == 0:
#             print('Epoch {}'.format(epoch+1), "loss: ",running_loss)
        running_loss = 0.0
    
    net.eval()
    test_loss = 0
    for idx, (inputs, labels) in enumerate(boston_testloader): 
        outputs = net(inputs.float())
        loss = criterion(outputs, torch.unsqueeze(labels.float(),dim=1))
        test_loss += loss.item()
    avg_test_loss = test_loss / len(boston_testloader)
    return avg_test_loss 

In [95]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

def vector_2d(array):
    return np.array(array).reshape((-1, 1))

def gaussian_process(x_train, y_train, x_test):
    x_train = vector_2d(x_train)
    y_train = vector_2d(y_train)
    x_test = vector_2d(x_test)

    # Train gaussian process
    kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
    gp = GaussianProcessRegressor(kernel, n_restarts_optimizer=100)
    gp.fit(x_train, y_train)

    # Get mean and standard deviation for each possible
    # number of hidden units
    y_mean, y_std = gp.predict(x_test, return_std=True)
    y_std = y_std.reshape((-1, 1))

    return y_mean, y_std

def next_parameter_by_ei(y_min, y_mean, y_std, x_choices):
    # Calculate expected improvement from 95% confidence interval
    expected_improvement = y_min - (y_mean - 1.96 * y_std)
    expected_improvement[expected_improvement < 0] = 0

    max_index = expected_improvement.argmax()
    # Select next choice
    next_parameter = x_choices[max_index]

    return next_parameter

import random

def hyperparam_selection(func, n_hidden_range, data, n_iter=20):

    scores = []
    parameters = []

    min_n_hidden, max_n_hidden = n_hidden_range
    n_hidden_choices = np.arange(min_n_hidden, max_n_hidden + 1)

    # To be able to perform gaussian process we need to
    # have at least 2 samples.
    n_hidden = random.randint(min_n_hidden, max_n_hidden)
    score = func(n_hidden)

    parameters.append(n_hidden)
    scores.append(score)

    n_hidden = random.randint(min_n_hidden, max_n_hidden)
    
    for iteration in range(2, n_iter + 1):
        score = func(n_hidden)

        parameters.append(n_hidden)
        scores.append(score)

        y_min = min(scores)
        y_mean, y_std = gaussian_process(parameters, scores,
                                         n_hidden_choices)

        n_hidden = next_parameter_by_ei(y_min, y_mean, y_std,
                                        n_hidden_choices)

        if y_min == 0 or n_hidden in parameters:
            # Lowest expected improvement value have been achieved
            break
        print(f"Current Minimum: {y_min}")
        print(f"Suggested n_hidden: {n_hidden}")
        
    min_score_index = np.argmin(scores)
    print(f"Suggested Hidden: {parameters[min_score_index]}")
    print(f"Minimum Score: {min(scores)}")
    return parameters, scores

In [96]:
best_n_hidden, scores = hyperparam_selection(
    train_network,
    n_hidden_range=[50, 525],
    data=boston_trainloader,
    n_iter=20,
)

Current Minimum: 135.3922591670867
Suggested n_hidden: 525
Current Minimum: 135.3922591670867
Suggested n_hidden: 50
Current Minimum: 135.3922591670867
Suggested n_hidden: 259
Current Minimum: 135.3922591670867
Suggested n_hidden: 424
Current Minimum: 135.3922591670867
Suggested n_hidden: 381
Current Minimum: 135.3922591670867
Suggested n_hidden: 326
Current Minimum: 123.80714474954912
Suggested n_hidden: 186
Current Minimum: 123.80714474954912
Suggested n_hidden: 469
Current Minimum: 123.80714474954912
Suggested n_hidden: 343
Current Minimum: 123.80714474954912
Suggested n_hidden: 300
Current Minimum: 123.80714474954912
Suggested n_hidden: 89
Current Minimum: 123.80714474954912
Suggested n_hidden: 221
Current Minimum: 123.80714474954912
Suggested n_hidden: 156
Current Minimum: 123.80714474954912
Suggested n_hidden: 497
Current Minimum: 123.80714474954912
Suggested n_hidden: 314
Current Minimum: 123.80714474954912
Suggested n_hidden: 285
Current Minimum: 117.28127110388971
Suggested n_

In [99]:
best_n_hidden

[128,
 366,
 525,
 50,
 259,
 424,
 381,
 326,
 186,
 469,
 343,
 300,
 89,
 221,
 156,
 497,
 314,
 285,
 446,
 111]

In [130]:
import plotly.express as px

fig = px.bar(
    x=best_n_hidden, y=scores, title="Histogram of Gaussian Process",
    text = scores,
    labels = {'x':'Hidden Layers',
              'y': 'Losses'}
)
fig.show()

In [77]:
all_loss = []
for i in range(50, 526, 25):
    print(f'hidden: {i}')
    loss = train_network(i)
    print(f'loss: {loss}')
    all_loss.append(loss)

hidden: 50
loss: 154.96165955451227
hidden: 75
loss: 144.43469973533385
hidden: 100
loss: 128.2427533057428
hidden: 125
loss: 139.28066942768712
hidden: 150
loss: 114.28010519089237
hidden: 175
loss: 184.28412683548467
hidden: 200
loss: 137.14770575492614
hidden: 225
loss: 167.63943616805537
hidden: 250
loss: 180.72661707478184
hidden: 275
loss: 175.6809624087426
hidden: 300
loss: 194.09320240636026
hidden: 325
loss: 159.57766539050687
hidden: 350
loss: 192.81504042686956
hidden: 375
loss: 130.16389022334928
hidden: 400
loss: 244.06776834303332
hidden: 425
loss: 126.66135827956661
hidden: 450
loss: 167.5245672656644
hidden: 475
loss: 155.91540641169394
hidden: 500
loss: 147.51774464884113
hidden: 525
loss: 147.3401267143988


In [113]:
len(all_loss)

20

In [123]:
import plotly.express as px

fig = px.histogram(x = list(range(50,526,25)), y = all_loss, nbins = 100, title='Histogram of Grid Search')
fig.show()

In [84]:
def uniform_int(name, lower, upper):
    # `quniform` returns:
    # round(uniform(low, high) / q) * q
    return hp.quniform(name, lower, upper, q=1)

#Create the model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Assume that we are on a CUDA machine, then this should print a CUDA device:
print("Executing the model on :",device)
class Net(torch.nn.Module):
    def __init__(self, n_feature = 13, size_hidden = 1, n_output=1):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(n_feature, size_hidden)   # hidden layer
        self.predict = torch.nn.Linear(size_hidden, n_output)   # output layer

    def forward(self, x):
        x = F.relu(self.hidden(x))      # activation function for hidden layer
        x = self.predict(x)             # linear output
        return x

#Adam is a specific flavor of gradient decent which is typically better
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
criterion = torch.nn.MSELoss(size_average=False)  # this is for regression mean squared loss

def train_network(hidden): 
    hidden = int(hidden)
    net = Net(size_hidden = hidden)
    optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
    criterion = torch.nn.MSELoss(reduction='sum')  # this is for regression mean squared loss
    net.train()
    running_loss = 0.0
    for epoch in range(50):
        for idx, (inputs, labels) in enumerate(boston_trainloader):        
            # zero the parameter gradients
            optimizer.zero_grad()
            # forward + backward + optimize
            outputs = net(inputs.float())

            loss = criterion(outputs, torch.unsqueeze(labels.float(),dim=1))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
#         if epoch % 10 == 0:
#             print('Epoch {}'.format(epoch+1), "loss: ",running_loss)
        running_loss = 0.0
    
    net.eval()
    test_loss = 0
    for idx, (inputs, labels) in enumerate(boston_testloader): 
        outputs = net(inputs.float())
        loss = criterion(outputs, torch.unsqueeze(labels.float(),dim=1))
        test_loss += loss.item()
    avg_test_loss = test_loss / len(boston_testloader)
    
    return avg_test_loss

Executing the model on : cuda:0


In [85]:
import numpy as np
import hyperopt
from hyperopt import tpe, hp, fmin
from functools import partial

# Object stores all information about each trial.
# Also, it stores information about the best trial.
trials = hyperopt.Trials()

tpes = partial(
    hyperopt.tpe.suggest,
    # Sample 1000 candidate and select candidate that
    # has highest Expected Improvement (EI)
    n_EI_candidates=50,

    # Use 15% of best observations to estimate next
    # set of parameters
    gamma=0.15,

    # First 20 trials are going to be random
    n_startup_jobs=20,
)

In [86]:
tpe_best_params = fmin(
    fn=train_network, # Objective Function to optimize
    trials=trials,
    space= uniform_int('hidden', 50, 500), # Hyperparameter's Search Space
    algo=tpe.suggest,
    max_evals=20 # Number of optimization attempts
)

100%|██████████| 20/20 [02:20<00:00,  7.02s/trial, best loss: 123.68026782620338]


({'hidden': [0,
   1,
   2,
   3,
   4,
   5,
   6,
   7,
   8,
   9,
   10,
   11,
   12,
   13,
   14,
   15,
   16,
   17,
   18,
   19]},
 {'hidden': [485.0,
   120.0,
   306.0,
   76.0,
   292.0,
   314.0,
   443.0,
   66.0,
   361.0,
   172.0,
   257.0,
   85.0,
   364.0,
   98.0,
   292.0,
   449.0,
   53.0,
   221.0,
   388.0,
   493.0]})

In [120]:
trials.losses()

[163.7189073870259,
 144.95191687922323,
 123.68026782620338,
 146.59237947771626,
 254.67261505126953,
 227.53975579046434,
 151.72535518292457,
 176.9843207943824,
 186.66594173062234,
 236.07007648098855,
 136.42733097845507,
 181.82928540629726,
 143.05796011032598,
 181.8982592551939,
 160.705294885943,
 133.41662230030184,
 139.54234609296245,
 187.85984506914693,
 148.586092951317,
 141.0262289354878]

In [118]:
trials.idxs_vals[1]['hidden']

[485.0,
 120.0,
 306.0,
 76.0,
 292.0,
 314.0,
 443.0,
 66.0,
 361.0,
 172.0,
 257.0,
 85.0,
 364.0,
 98.0,
 292.0,
 449.0,
 53.0,
 221.0,
 388.0,
 493.0]

In [121]:
import plotly.express as px

fig = px.histogram(x = trials.idxs_vals[1]['hidden'], y = trials.losses(), nbins = 100)
fig.show()

In [143]:
best_n_hidden

[128,
 366,
 525,
 50,
 259,
 424,
 381,
 326,
 186,
 469,
 343,
 300,
 89,
 221,
 156,
 497,
 314,
 285,
 446,
 111]

In [146]:
best_n_hidden[np.argmin(scores)]

285

In [147]:
import plotly.express as px

fig = px.bar(
    x=best_n_hidden, y=scores, title=f"Histogram of Gaussian Process (Min Loss: {int(min(scores))}, Hidden Layer: {best_n_hidden[np.argmin(scores)]})",
    text = scores,
    labels = {'x':'Hidden Layers',
              'y': 'Losses'}
)
fig.show()

In [154]:
import plotly.express as px

fig = px.bar(
    x=trials.idxs_vals[1]['hidden'], y=trials.losses(), title=f"Histogram of Tree Structured Parzen Window Estimation (Min Loss: {int(min(trials.losses()))}, Hidden Layer: {trials.idxs_vals[1]['hidden'][np.argmin(trials.losses())]})",
    text = trials.losses(),
    labels = {'x':'Hidden Layers',
              'y': 'Losses'}
)
fig.show()

In [153]:
import plotly.express as px

fig = px.bar(
    x=list(range(50,526,25)), y=all_loss, title=f"Histogram of Grid Search (Min Loss: {int(min(all_loss))}, Hidden Layer: {list(range(50,526,25))[np.argmin(scores)]})",
    text = all_loss,
    labels = {'x':'Hidden Layers',
              'y': 'Losses'}
)
fig.show()

In [None]:
import plotly.express as px

fig = px.histogram(x = list(range(50,526,25)), y = all_loss, nbins = 100, title='Histogram of Grid Search')
fig.show()