Getting the data

In [29]:
from sklearn.datasets import make_regression

from torch import Tensor
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torch.utils.data import random_split

from sklearn.model_selection import KFold

import matplotlib.pyplot as plt
import numpy as np

class my_data(Dataset):

    def __init__(self):
        X, y = make_regression(n_samples=2000, n_features=10, n_informative=6, n_targets=3, random_state=0)

        # z-scores for normalising
        self.X = (X - np.mean(X, axis=0))/np.std(X, axis=0)
        self.y = (y - np.mean(y, axis=0))/np.std(y, axis=0)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return [self.X[idx], self.y[idx]]
 
    def get_splits(self, n_test=0.3):
        size_for_test = round(n_test * len(self.X))
        size_for_training = len(self.X) - size_for_test
        return random_split(self, [size_for_training, size_for_test])


dataset = my_data()
train, test = dataset.get_splits()


Getting the model

In [30]:
# multi-output multi-layer perceptron (ANN)
class my_MLP():
  def __init__(self, X, y):

        ''' 
        for sample size n
        X: input data with d features to predict upon   (dimension: (n x d))
        y: target data with m outputs to predict        (dimension: (n x m))
        h: number of nodes in the hidden layers

        learning_rate (alpha): multiplies the gradient update
        momentum (mu): multiplies the gradient update carried from the preceding epoch

        input layer (size: d for d features)

        hidden layers (size: h, which is arbitrary):
        w1: weight  (dimension: (d x h))
        b1: bias    (dimension: (1 x h))
        w2: weight  (dimension: (h x h))
        b2: bias    (dimension: (1 x h))
        ... additional hidden layers follow w2 and b2
        output layer (size: m for m outputs):
        w3: weight  (dimension: (h x m))
        b3: bias    (dimension: (1 x m))

        forward pass:
        a1 = x . w1 + b1          (dimension: (n x h))
        z1 = ReLU(a1)=max(a1,0)   (dimension: (n x h))
        a2 = z1 . w2 + b2         (dimension: (n x h))
        z2 = ReLU(a2)=max(a2,0)   (dimension: (n x h))
        ... additional hidden layers follow a2 and z2
        a3 = z2 . w3 + b3         (dimension: (n x m))
        z3 = hTan(a3)=max(a3,0)   (dimension: (n x m))
        returns z3

        loss function: mean squared error (MSE)
        activation function: rectified linear unit (ReLU) and hyperbolic tangent (hTan) for the outer layer

        backward pass:
        deltaL = dl/dzL * dzL/daL
        for j in (L:-1:0):
          using the chain rule dl/dwj = dl/dzj * dzj/daj * daj/dwj, with dot products (.) and scalar/piecewise multiplication (*) respectively, gives
          wj -= alpha * deltaj . z[j-1]
          bj -= alpha * deltaj
          delta[j-1]=sigma_gradient(a[j-1])*wj . deltaj
        returns the adjusted weights and biases
        '''

        self.X, d = X, X.shape[1] # d=10 inputs
        self.y, m = y, y.shape[1] # m=3 outputs

        h = 11
        self.learning_rate = 0.01
        self.momentum = 0.5
        
        # the weights (w1,w2,w3) and the stored weights for the momentum are initialised randomly from Gaussian
        self.w1, self.w1_stored = np.random.randn(d, h), np.random.randn(d, h)
        self.w2, self.w2_stored = np.random.randn(h, h), np.random.randn(h, h)
        self.w3, self.w3_stored = np.random.randn(h, m), np.random.randn(h, m)

        # biases initialised as zero
        self.b1 = np.zeros((1, h))
        self.b2 = np.zeros((1, h))
        self.b3 = np.zeros((1, m))

  # rectified linear unit activation function
  def ReLU(self, r):
    return (r>0) * r

  # the gradient of the ReLU function
  def ReLU_gradient(self, r):
    return (r>0)

  # hyperbolic tangent activation function
  def hTan(self, r):
    return np.tanh(r)

  # the gradient of the hTan function
  def hTan_gradient(self, r):
    return 1 - (np.tanh(r) * np.tanh(r))

  # mean squared error loss function
  def MSE_loss(self, yhat, true_y):
    assert yhat.shape == true_y.shape
    return 0.5 * np.sum( np.multiply(yhat-true_y,yhat-true_y) ) / yhat.size

  # prediction
  def forward_pass(self):
      self.a1 = np.dot(self.X, self.w1) + self.b1
      self.z1 = self.ReLU(self.a1)

      self.a2 = np.dot(self.z1, self.w2) + self.b2
      self.z2 = self.ReLU(self.a2)

      self.a3 = np.dot(self.z2, self.w3) + self.b3
      self.z3 = self.hTan(self.a3)  # hTan
      # self.z3 = self.a3           # no activation function, i.e. linear(x)=x


  # gradient update
  def backward_pass(self):

      dl_dz3 = self.z3 - self.y                        # (n x m)      
      dz3_da3 = self.hTan_gradient(self.a3)            # (n x m) # hTan
      delta3 = np.multiply(dl_dz3, dz3_da3)            # (n x m)
      # delta3 = dl_dz3                                # (n x m) # no activation function, i.e. linear(x)=x (unstable)
      da3_dw3 = self.z2                                # (n x h)
      self.w3 -= self.learning_rate * ( np.dot(da3_dw3.T, delta3) + self.momentum * self.w3_stored)     # (h x m)
      self.w3_stored = np.dot(da3_dw3.T, delta3) + self.momentum * self.w3_stored
      self.b3 -= self.learning_rate * np.sum(delta3, axis=0)                                            # (1 x m)

      dl_dz2 = np.dot(delta3, self.w3.T)               # (n x h)
      dz2_da2 = self.ReLU_gradient(self.a2)            # (n x h)
      delta2 = np.multiply(dl_dz2, dz2_da2)            # (n x h)
      da2_dw2 = self.z1                                # (n x h)
      self.w2 -= self.learning_rate * (np.dot(da2_dw2.T, delta2) + self.momentum * self.w2_stored)      # (h x h)
      self.w2_stored = np.dot(da2_dw2.T, delta2) + self.momentum * self.w2_stored
      self.b2 -= self.learning_rate * np.sum(delta2, axis=0)                                            # (1 x h)


      dl_dz1 = np.dot(delta2, self.w2.T)                # (n x h)
      dz1_da1 = self.ReLU_gradient(self.a1)             # (n x h)
      delta1 = np.multiply(dl_dz1, dz1_da1)             # (n x h)
      da1_dw1 = self.X                                  # (n x d)
      self.w1 -= self.learning_rate * (np.dot(da1_dw1.T, delta1)  + self.momentum * self.w1_stored)     # (d x h)
      self.w1_stored = np.dot(da1_dw1.T, delta1)  + self.momentum * self.w1_stored
      self.b1 -= self.learning_rate * np.sum(delta1, axis=0)                                            # (1 x h)


  def run_prediction(self, data_X):
    self.X = data_X
    self.forward_pass()
    return self.z3



Running the model

In [31]:
def MSE(yhat, true_y):
  assert yhat.shape == true_y.shape
  return 0.5 * np.sum( np.multiply(yhat-true_y,yhat-true_y) ) / yhat.size

def model_evaluation():

  validation = []
  xv = KFold(n_splits=4, random_state=1, shuffle=True)

  for xv_trn, xv_val in xv.split(train):
    
    trn_dataloader = DataLoader(TensorDataset(Tensor(dataset.X[xv_trn]),Tensor(dataset.y[xv_trn])), batch_size=20, shuffle=True)

    epochs = 5
    predictions, true_ = [],[]

    model = my_MLP(dataset.X[train.indices],dataset.y[train.indices])
    
    for ep in range(epochs):
      for i, (inputs, targets) in enumerate(trn_dataloader):
        model.X = inputs.numpy()
        model.y = targets.numpy()
        model.forward_pass()
        model.backward_pass()

    for X_val, y_val in zip(dataset.X[xv_val], dataset.y[xv_val]):
      yhat = model.run_prediction(X_val).flatten()

      predictions.append(yhat)
      true_.append(y_val)
    
    predictions, true_ = np.vstack(predictions), np.vstack(true_)
    validation.append(MSE(predictions,true_))
  
  return validation

res = model_evaluation()
print("Cross-validation errors:", res, np.mean(res), np.std(res))


Cross-validation errors: [0.2596883703543092, 0.7354069771861644, 0.25745914708805545, 0.2841913437629887] 0.38418645959787945 0.20304830775401614


While using ReLU for the outer layer I could not make this work. Once I switched to hTan it seems to work. It is, however, still unclear why the linear activation function (no activation function effectively) creates numerical instability in np.multiply(). I will show the final loss using this implementation, but for the hyperparameter-tuning I resort to using keras Sequential() where the loss function of the different targets (multi-output setup) is the same.

A more robust approach would be the following.

In [32]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense

def MSE(yhat, true_y):
  assert yhat.shape == true_y.shape
  return 0.5 * np.sum( np.multiply(yhat-true_y,yhat-true_y) ) / yhat.size

def keras_MLP(X, y, layers, h, epochs, batch_size, alpha, instances):

  model = Sequential()

  # input layer and first hidden layer
  model.add(Dense(units=h, input_dim=X.shape[1], kernel_initializer='normal', activation='relu'))
  # more hidden layers
  for iter in range(layers-1):
    model.add(Dense(units=h, kernel_initializer='normal', activation='relu'))
  # output layer
  model.add(Dense(y.shape[1], kernel_initializer='normal', activation=None))

  model.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adam(learning_rate=alpha))

  count = int(X.shape[0] * instances) # how many data points to use from the available training data (fixed_instances=1 means use all)

  model.fit(X[0:count], y[0:count], batch_size = batch_size, epochs = epochs, verbose=0)
  return model


Hyperparameter tuning using cross-validation

In [33]:
# finds the optminal number of hidden layers, node length (neurons) and batching size
def parameter_tuning(layers, hidden_nodes, batch_sizes):

  validation = []

  k = 2
  xv = KFold(n_splits=k, random_state=1, shuffle=True)

  for xv_trn, xv_val in xv.split(train):
    
    X_trn, y_trn = dataset.X[xv_trn], dataset.y[xv_trn]
    X_val, y_val = dataset.X[xv_val], dataset.y[xv_val]

    fixed_epoch = 50
    fixed_instances = 1.0
    fixed_learning_rate = 0.01

    predictions = np.zeros((len(layers), len(hidden_nodes), batch_sizes.shape[0]))

    for layer_idx in range(len(layers)):
      for h_idx in range(len(hidden_nodes)):
        for b_idx in range(batch_sizes.shape[0]):
          
          print("Train with:", layers[layer_idx], hidden_nodes[h_idx], fixed_epoch, batch_sizes[b_idx], fixed_learning_rate, fixed_instances)
          model=keras_MLP(X_trn, y_trn, layers[layer_idx], hidden_nodes[h_idx], fixed_epoch, batch_sizes[b_idx], fixed_learning_rate, fixed_instances)

          pred, true_ = model.predict(X_val), y_val
          error = MSE(pred,true_)
          print("MSE:", error)

          predictions[layer_idx,h_idx,b_idx] = error
    
    validation.append(predictions)
  
  validation_table = np.mean(np.array(validation), axis=0)

  idx = np.unravel_index(np.argmin(validation_table), validation_table.shape)

  return validation_table, layers[idx[0]], hidden_nodes[idx[1]], batch_sizes[idx[2]]



layers = [i for i in range(1,6)]                          
hidden_nodes = [i for i in range(1,42,5)]                 
batch_sizes = np.linspace(5,X.shape[0]/3,10).astype(int)  

print(layers,hidden_nodes,batch_sizes)

validation_table, layer, h, batch = parameter_tuning(layers, hidden_nodes, batch_sizes)

print(layer, h, batch) # 2, 11 and 56

layers_error = np.mean(validation_table, axis=(1,2))
hidden_nodes_error = np.mean(validation_table, axis=(0,2))
batch_sizes_error = np.mean(validation_table, axis=(0,1))

# plt.plot(batch_sizes,batch_sizes_error)
# plt.ylabel('MSE')
# plt.title('Error for batch size')
# plt.savefig('batch size error.png')

# plt.close()
# plt.plot(layers,layers_error)
# plt.ylabel('MSE')
# plt.title('Error for number of hidden layers')
# plt.savefig('layer size error.png')

# plt.close()
# plt.plot(hidden_nodes,hidden_nodes_error)
# plt.ylabel('MSE')
# plt.title('Error for number of nodes (neurons)')
# plt.savefig('node size error.png')


[1, 2, 3, 4, 5] [1, 6, 11, 16, 21, 26, 31, 36, 41] [  5  56 107 158 210 261 312 364 415 466]
Train with: 1 1 50 5 0.01 1.0
MSE: 0.0924873890796804
Train with: 1 1 50 56 0.01 1.0
MSE: 0.09439062607685565
Train with: 1 1 50 107 0.01 1.0
MSE: 0.09713520379635393
Train with: 1 1 50 158 0.01 1.0
MSE: 0.10227433673135722
Train with: 1 1 50 210 0.01 1.0
MSE: 0.11021685225595484
Train with: 1 1 50 261 0.01 1.0
MSE: 0.11978643987226154
Train with: 1 1 50 312 0.01 1.0
MSE: 0.12320684585875272
Train with: 1 1 50 364 0.01 1.0


Exception ignored in: <function IteratorResourceDeleter.__del__ at 0x7f80467ef440>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/iterator_ops.py", line 546, in __del__
    handle=self._handle, deleter=self._deleter)
  File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/gen_dataset_ops.py", line 1264, in delete_iterator
    _ctx, "DeleteIterator", name, handle, deleter)
KeyboardInterrupt: 


MSE: 0.13888080507932185
Train with: 1 1 50 415 0.01 1.0
MSE: 0.13177776000804853
Train with: 1 1 50 466 0.01 1.0
MSE: 0.13228504816142653
Train with: 1 6 50 5 0.01 1.0
MSE: 0.0003365751255549085
Train with: 1 6 50 56 0.01 1.0
MSE: 1.6879609737929462e-14
Train with: 1 6 50 107 0.01 1.0
MSE: 0.0003619032929706974
Train with: 1 6 50 158 0.01 1.0
MSE: 0.002556460970926512
Train with: 1 6 50 210 0.01 1.0
MSE: 0.003628603350226214
Train with: 1 6 50 261 0.01 1.0
MSE: 0.006700866753306402
Train with: 1 6 50 312 0.01 1.0
MSE: 0.007032247726902424
Train with: 1 6 50 364 0.01 1.0
MSE: 0.01661412574112119
Train with: 1 6 50 415 0.01 1.0
MSE: 0.021258940217067503
Train with: 1 6 50 466 0.01 1.0
MSE: 0.01788348130214713
Train with: 1 11 50 5 0.01 1.0
MSE: 0.0001836288209221145
Train with: 1 11 50 56 0.01 1.0


KeyboardInterrupt: ignored

In [34]:
# finds the optminal number of epochs and evaluates the model using bias-variance decomposition on the validation set
def bias_variance_evaluation(epochs, instances):

  validation, validation_trn = [], []

  k = 2
  xv = KFold(n_splits=k, random_state=1, shuffle=True)

  for xv_trn, xv_val in xv.split(train):
    
    X_trn, y_trn = dataset.X[xv_trn], dataset.y[xv_trn]
    X_val, y_val = dataset.X[xv_val], dataset.y[xv_val]

    # from parameter_tuning(layers, hidden_nodes, batch_sizes)
    fixed_layer = 2
    fixed_nodes = 11
    fixed_batch = 56
    fixed_learning_rate = 0.01

    predictions = np.zeros((len(epochs), len(instances)))
    predictions_trn = np.zeros((len(epochs), len(instances)))


    for epoch_idx in range(len(epochs)):
      for instance_idx in range(len(instances)):
          
          print("Train with:", fixed_layer, fixed_nodes, epochs[epoch_idx], fixed_batch, fixed_learning_rate, instances[instance_idx])
          model=keras_MLP(X, y, fixed_layer, fixed_nodes, epochs[epoch_idx], fixed_batch, fixed_learning_rate, instances[instance_idx])

          pred, true_ = model.predict(X_val), y_val
          error = MSE(pred,true_)
          print("MSE:", error)
          predictions[epoch_idx,instance_idx] = error

          pred_trn, true_trn = model.predict(X_trn), y_trn
          error_trn = MSE(pred_trn,true_trn)
          print("MSE:", error_trn)
          predictions_trn[epoch_idx,instance_idx] = error_trn

    validation.append(predictions)
    validation_trn.append(predictions_trn)    


  validation_table = np.mean(np.array(validation), axis=0)          # validation
  validation_table_trn = np.mean(np.array(validation_trn), axis=0)  # training

  idx = np.unravel_index(np.argmin(validation_table), validation_table.shape)

  return validation_table, validation_table_trn, epochs[idx[0]], instances[idx[1]]


epochs = [i for i in range(5,505,50)]     
instances = [i/10 for i in range(1,11)]

print(epochs, instances)

validation_table, validation_table_trn, epoch, instance = bias_variance_evaluation(epochs, instances)

print(epoch, instance) # 205 and 0.8



[5, 55, 105, 155, 205, 255, 305, 355, 405, 455] [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
Train with: 2 11 5 56 0.01 0.1
MSE: 0.34136823052715576
MSE: 0.3274394642219167
Train with: 2 11 5 56 0.01 0.2
MSE: 0.13488550073627947
MSE: 0.1269244999085841
Train with: 2 11 5 56 0.01 0.3
MSE: 0.09390671715498887
MSE: 0.08760532504309818
Train with: 2 11 5 56 0.01 0.4
MSE: 0.032767497633559364
MSE: 0.032428583848356334
Train with: 2 11 5 56 0.01 0.5
MSE: 0.02520998079605384
MSE: 0.02469337464352382
Train with: 2 11 5 56 0.01 0.6
MSE: 0.023589683598624595
MSE: 0.0229003205167667
Train with: 2 11 5 56 0.01 0.7
MSE: 0.015182146664723763
MSE: 0.0148338082113493
Train with: 2 11 5 56 0.01 0.8
MSE: 0.018301965349652012
MSE: 0.0177126196107089
Train with: 2 11 5 56 0.01 0.9
MSE: 0.017173481391766724
MSE: 0.01620782772995405
Train with: 2 11 5 56 0.01 1.0


KeyboardInterrupt: ignored

In [35]:
# finds the optminal number of epochs and evaluates the model using bias-variance decomposition on the test set
def bias_variance_evaluation_test_set(epochs, instances):

  validation, validation_trn = [], []

  k = 2
  xv = KFold(n_splits=k, random_state=1, shuffle=True)

  for xv_trn, xv_val in xv.split(train):
    
    X_trn, y_trn = dataset.X[xv_trn], dataset.y[xv_trn]
    X_val, y_val = dataset.X[xv_val], dataset.y[xv_val]
    X_tst, y_tst = dataset.X[test.indices], dataset.y[test.indices]

    # from parameter_tuning(layers, hidden_nodes, batch_sizes)
    fixed_layer = 2
    fixed_nodes = 11
    fixed_batch = 56
    fixed_learning_rate = 0.01

    predictions = np.zeros((len(epochs), len(instances)))
    predictions_trn = np.zeros((len(epochs), len(instances)))


    for epoch_idx in range(len(epochs)):
      for instance_idx in range(len(instances)):
          
          print("Train with:", fixed_layer, fixed_nodes, epochs[epoch_idx], fixed_batch, fixed_learning_rate, instances[instance_idx])

          model=keras_MLP(X, y, fixed_layer, fixed_nodes, epochs[epoch_idx], fixed_batch, fixed_learning_rate, instances[instance_idx])

          pred, true_ = model.predict(X_tst), y_tst
          error = MSE(pred,true_)
          print("MSE:", error)
          predictions[epoch_idx,instance_idx] = error

          pred_trn, true_trn = model.predict(X_trn), y_trn
          error_trn = MSE(pred_trn,true_trn)
          print("MSE:", error_trn)
          predictions_trn[epoch_idx,instance_idx] = error_trn


    validation.append(predictions )         #test
    validation_trn.append(predictions_trn)  #training


  validation_table = np.mean(np.array(validation), axis=0)
  validation_table_trn = np.mean(np.array(validation_trn), axis=0)

  idx = np.unravel_index(np.argmin(validation_table), validation_table.shape)

  return validation_table, validation_table_trn, epochs[idx[0]], instances[idx[1]]


epochs = [i for i in range(5,505,50)]     
instances = [i/10 for i in range(1,11)]

validation_table_tst, validation_table_trn, epoch, instance = bias_variance_evaluation_test_set(epochs, instances)

print(epoch, instance) # 55 and 1 (which is different from 205 and 0.8 earlier)

# using instances=0.8 only
epochs_error_trn = validation_table_trn[:,7] # for 0.8
epochs_error_trn = validation_table_tst[:,7]

# using epochs=205 only
instances_error_tst = validation_table_tst[4,:] # for 205
instances_error_trn = validation_table_trn[4,:]


plt.plot(epochs, epochs_error_tst, label='test')
plt.plot(epochs, epochs_error_trn, label='training')
plt.ylabel('MSE')
plt.legend()
plt.title('Error for epoch size')
plt.savefig('epoch size error test.png')

# plt.close()
# plt.plot(instances, instances_error_tst, label='test')
# plt.plot(instances, instances_error_trn, label='training')
# plt.ylabel('MSE')
# plt.legend()
# plt.title('Error for sample size')
# plt.savefig('sample size error test.png')




Train with: 2 11 5 56 0.01 0.1
MSE: 0.3197087217584346
MSE: 0.31856425220584395
Train with: 2 11 5 56 0.01 0.2
MSE: 0.12088488267311957
MSE: 0.12142022118199593
Train with: 2 11 5 56 0.01 0.3
MSE: 0.05781628004129744
MSE: 0.0553832455486037
Train with: 2 11 5 56 0.01 0.4
MSE: 0.09103430808336542
MSE: 0.09015837322443604
Train with: 2 11 5 56 0.01 0.5
MSE: 0.026460981052426377
MSE: 0.02540670640471068
Train with: 2 11 5 56 0.01 0.6
MSE: 0.021809154093018833
MSE: 0.02066126320987134
Train with: 2 11 5 56 0.01 0.7
MSE: 0.021547556041219478
MSE: 0.02082014355279075
Train with: 2 11 5 56 0.01 0.8
MSE: 0.010464554981685054
MSE: 0.010613499488487986
Train with: 2 11 5 56 0.01 0.9
MSE: 0.012443208216806625
MSE: 0.012167187857014445
Train with: 2 11 5 56 0.01 1.0


Exception ignored in: <function IteratorResourceDeleter.__del__ at 0x7f80467ef440>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/iterator_ops.py", line 546, in __del__
    handle=self._handle, deleter=self._deleter)
  File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/gen_dataset_ops.py", line 1264, in delete_iterator
    _ctx, "DeleteIterator", name, handle, deleter)
KeyboardInterrupt: 


MSE: 0.006505822641258127
MSE: 0.005952251243698336
Train with: 2 11 55 56 0.01 0.1
MSE: 0.005411607007081888
MSE: 0.005503600736898596
Train with: 2 11 55 56 0.01 0.2
MSE: 0.0020834779959083565
MSE: 0.002023130521265229
Train with: 2 11 55 56 0.01 0.3
MSE: 0.00013845624596841834


Exception ignored in: <function IteratorResourceDeleter.__del__ at 0x7f80467ef440>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/iterator_ops.py", line 546, in __del__
    handle=self._handle, deleter=self._deleter)
  File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/gen_dataset_ops.py", line 1264, in delete_iterator
    _ctx, "DeleteIterator", name, handle, deleter)
KeyboardInterrupt: 


MSE: 0.0001567141038260259
Train with: 2 11 55 56 0.01 0.4
MSE: 0.000160819507436753
MSE: 0.00016622776659735148
Train with: 2 11 55 56 0.01 0.5
MSE: 9.943216191411394e-06
MSE: 7.711185624892483e-06
Train with: 2 11 55 56 0.01 0.6
MSE: 2.6666334645708566e-05
MSE: 2.4709561946429826e-05
Train with: 2 11 55 56 0.01 0.7


KeyboardInterrupt: ignored

Loss on the test set

In [None]:
# optimal parameters
layer = 2
nodes = 11
batch = 56
learning_rate = 0.01
epochs = 205
X, y = dataset.X[train.indices], dataset.y[train.indices]

model=keras_MLP(X, y, layer, nodes, epochs, batch, learning_rate, 1.0)
predictions = model.predict(dataset.X[test.indices])
true_ = dataset.y[test.indices]
print("Final error on the test set:", MSE(predictions,true_))


trn_dataloader = DataLoader(TensorDataset(Tensor(X),Tensor(y)), batch_size=batch, shuffle=True)
model = my_MLP(X,y)

for ep in range(epochs):
  for i, (inputs, targets) in enumerate(trn_dataloader):
    model.X = inputs.numpy()
    model.y = targets.numpy()
    model.forward_pass()
    model.backward_pass()

predictions=[]
for X_tst in dataset.X[test.indices]:
  yhat = model.run_prediction(X_tst).flatten()
  predictions.append(yhat)

predictions = np.vstack(predictions)
print("Final error on the test set:", MSE(predictions,true_))

