CHAPTER 2:

CONFIGURE CAPACITY WITH:

1. NODES

2. LAYERS





SECTION 2.3.2


Change Model Capacity with NODES

--
--
--

In [None]:
# Study of Multilayer Perceptron learning curves given 
# different number of 
# nodes for multi-class classification

from sklearn.datasets.samples_generator import make_blobs
from keras.layers import Dense
from keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot as plt




# Prepare multiclass classification dataset


def create_dataset():

  # generate 2D classification dataset

  X, y = make_blobs(n_samples=1000, centers=20, n_features=100, 
                    
                    cluster_std=2, random_state=2)

  # one-hot encode output variable

  y = to_categorical(y)

  # split into train and test

  n_train = 500

  train_X, test_X = X[:n_train, :], X[n_train:, :]

  train_y, test_y = y[:n_train], y[n_train:]

  return train_X, train_y, test_X, test_y




# fit model with given number of nodes , returns test set accuracy


def evaluate_model(n_nodes, train_X, train_y, test_X, test_y):

  # configure the model, based on data

  n_input, n_classes = train_X.shape[1], test_y.shape[1]

  # define model

  model = Sequential()

  model.add(Dense(n_nodes, input_dim=n_input, activation='relu', 
                  
                  kernel_initializer='he_uniform'))

  model.add(Dense(n_classes, activation='softmax'))

  # compile model

  opt = SGD(learning_rate=0.01, momentum=0.9)

  model.compile(loss='categorical_crossentropy', optimizer=opt, 
                
                metrics=['accuracy'])
  
  # fit model on train set

  history = model.fit(train_X, train_y, epochs=100, verbose=0)

  # evaluate model on test set

  _, test_acc = model.evaluate(test_X, test_y, verbose=0)

  return history, test_acc




# prepare dataset


train_X, train_y, test_X, test_y = create_dataset()




# evaluate model and plot learning curve with given number of nodes


num_nodes = [1, 2, 3, 4, 5, 6, 7]

for n_nodes in num_nodes:

  # evaluate model with a given number of nodes

  history, result = evaluate_model(n_nodes, 
                                   
                                   train_X, train_y, test_X, test_y)

  # summarise final test set accuracy

  print('nodes=%d: %.3f' % (n_nodes, result))

  # plot learning curve

  plt.plot(history.history['loss'], label=str(n_nodes))




  # show the plot

plt.legend()

plt.show()











--
--
--

SECTION 2.3.3  


Change Model Capacity with LAYERS

--
--
--

In [None]:
# study of MLP learning curves given different number of layers
# for multiclass classification


from sklearn.datasets.samples_generator import make_blobs
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot as plt


# prepare multiclass classification dataset


def create_dataset():

    # generate 2D classification dataset

    X,y = make_blobs(n_samples=1000, centers=20,
                     
                     n_features=100, cluster_std=2,

                     random_state=2)
    
    # one hot encode output variable

    y = to_categorical(y)

    # split into train and test

    n_train = 500

    train_X, test_X = X[:n_train, :], X[n_train:, :]

    train_y, test_y = y[:n_train], y[n_train:]

    return train_X, train_y, test_X, test_y



# fit model with given number of layers, return test accuracy


def evaluate_model(n_layers, train_X, train_y, test_X, test_y):

    # configure the model based on the data

    n_input, n_classes = train_X.shape[1], test_y.shape[1]

    # define model

    model = Sequential()

    model.add(Dense(10, activation='relu', 
                    
                    kernel_initializer='he_uniform'))
    
    model.add(Dense(n_classes, activation='softmax'))

    # compile model

    opt = SGD(learning_rate=0.01, momentum=0.9)

    model.compile(loss='categorical_crossentropy',
                  
                  optimizer=opt, metrics=['accuracy'])
    
    # fit model

    history = model.fit(train_X, train_y, epochs=100, 
                        
                        verbose = 0)
    
    # evaluate model on test set

    _, test_acc = model.evaluate(test_X, test_y, 
                                 
                                 verbose=0)
    
    return history, test_acc



# get dataset

train_X, train_y, test_X, test_y = create_dataset()



# evaluate model and plot learning curve of model
# with given number pf layers



all_history = list()

num_layers = [1, 2, 3, 4, 5]



for n_layers in num_layers:

    # evaluate model with a given number of layers

    history, result = evaluate_model(n_layers, train_X,
                                     
                                     train_y, test_X, 
                                     
                                     test_y)
    
    print('layers=%d: %.3f' % (n_layers, result))

    # plot learning curve

    plt.plot(history.history['loss'], label=str(n_layers))



plt.legend()

plt.show()









CHAPTER 3:

CONFIGURE GRADIENT PRECISION WITH BATCH SIZE

--
--
--

POINT 1:

The NUMBER OF EXAMPLES in the TRAINING DATASET used in the estimation of the ERROR GRADIENT is called the BATCH SIZE


POINT 2:

THREE different flavours of gradient descent are Batch, Stochastic and Minibatc Gradient Descent 

--
--
--



3.3.2 Multi-Layer Perceptron with Batch Gradient Descent

--
--

This would be used to address a Multiclass Classification Problem.

--
--



In [None]:
# Multi-Layer Perceptron for the blobs 
#problem with batch gradient descent

from sklearn.datasets.samples_generator import make_blobs
from keras.layers import Dense
from keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot as plt



# generate 2D classification dataset

X, y = make_blobs(n_samples=1000, centers=3, 
                  
                  n_features=2, cluster_std=2, 

                  random_state=2)



# one hot encode output variable

y = to_categorical(y)



# split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(50, input_dim=2, 
                
                activation='relu',
                
                kernel_initializer='he_uniform')) 

                # Note that '50' is the number of nodes 
                # in the layer

model.add(Dense(3, 
                
                activation='softmax'))


# Compile model

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='categorical_crossentropy',
              
              optimizer=opt, metrics=['accuracy'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=200, verbose=0,
                    
                    batch_size=len(train_X))



# evaluate model

_, train_acc = model.evaluate(train_X, train_y, verbose=0)

_, test_acc = model.evaluate(test_X, test_y, verbose=0)


print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss learning curve

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()



# Plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()

plt.show()






--

--

3.3.3 Multi-Layer Perceptron fit 

with Stochastic Gradient Descent

--

--



In [None]:
# Multi-Layer Perceptron for the blobs 
#problem with STOCHASTIC gradient descent

from sklearn.datasets.samples_generator import make_blobs
from keras.layers import Dense
from keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot as plt



# generate 2D classification dataset

X, y = make_blobs(n_samples=1000, centers=3, 
                  
                  n_features=2, cluster_std=2, 

                  random_state=2)



# one hot encode output variable

y = to_categorical(y)



# split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(50, input_dim=2, 
                
                activation='relu',
                
                kernel_initializer='he_uniform')) 

                # Note that '50' is the number of nodes 
                # in the layer

model.add(Dense(3, 
                
                activation='softmax'))


# Compile model

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='categorical_crossentropy',
              
              optimizer=opt, metrics=['accuracy'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=200, verbose=0,
                    
                    batch_size=1)



# evaluate model

_, train_acc = model.evaluate(train_X, train_y, verbose=0)

_, test_acc = model.evaluate(test_X, test_y, verbose=0)


print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss learning curve

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()



# Plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()

plt.show()






--

--

The poor performance and the erratic changes to the model ABOVE (in 3.3.3, as seen in the resulting graphs) suggest that the LEARNING RATE used to update weights after each training example may be TOO LARGE, and that a SMALLER learning rate may make the LEARNING PROCESS more stable.

--

--

We can TEST this by re-running the model fit with stochastic gradient descent and a SMALLER LEARNING RATE. 

--

--

For example, we can reduce the learning rate from 0.01 to 0.001.

--

--

Let us see

--

--

In [None]:
# Multi-Layer Perceptron for the blobs 
#problem with STOCHASTIC gradient descent (SMALLER LEARNING RATE)

from sklearn.datasets.samples_generator import make_blobs
from keras.layers import Dense
from keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot as plt



# generate 2D classification dataset

X, y = make_blobs(n_samples=1000, centers=3, 
                  
                  n_features=2, cluster_std=2, 

                  random_state=2)



# one hot encode output variable

y = to_categorical(y)



# split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(50, input_dim=2, 
                
                activation='relu',
                
                kernel_initializer='he_uniform')) 

                # Note that '50' is the number of nodes 
                # in the layer

model.add(Dense(3, 
                
                activation='softmax'))


# Compile model

opt = SGD(learning_rate=0.001, momentum=0.9)

model.compile(loss='categorical_crossentropy',
              
              optimizer=opt, metrics=['accuracy'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=200, verbose=0,
                    
                    batch_size=1)



# evaluate model

_, train_acc = model.evaluate(train_X, train_y, verbose=0)

_, test_acc = model.evaluate(test_X, test_y, verbose=0)


print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss learning curve

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()



# Plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()

plt.show()






--

--

3.3.4 Multi-Layer Perceptron fit with minibatch gradient descent

--

--

An ALTERNATIVE to using stochastic gradient descent and tuning the learning rate is to HOLD THE LEARNING RATE CONSTANT and CHANGE THE BATCH SIZE.

--

--

In [None]:
# Multi-Layer Perceptron for the blobs 
#problem with MINIBATCH gradient descent

from sklearn.datasets.samples_generator import make_blobs
from keras.layers import Dense
from keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot as plt



# generate 2D classification dataset

X, y = make_blobs(n_samples=1000, centers=3, 
                  
                  n_features=2, cluster_std=2, 

                  random_state=2)



# one hot encode output variable

y = to_categorical(y)



# split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(50, input_dim=2, 
                
                activation='relu',
                
                kernel_initializer='he_uniform')) 

                # Note that '50' is the number of nodes 
                # in the layer

model.add(Dense(3, 
                
                activation='softmax'))


# Compile model

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='categorical_crossentropy',
              
              optimizer=opt, metrics=['accuracy'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=200, verbose=0,
                    
                    batch_size=32)



# evaluate model

_, train_acc = model.evaluate(train_X, train_y, verbose=0)

_, test_acc = model.evaluate(test_X, test_y, verbose=0)


print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss learning curve

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()



# Plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()

plt.show()






--

--

--

3.3.5 Effect of batch size on Model behaviour

--

--

--

In [None]:
# Multi-Layer Perceptron for the BLOBS problem 
# with minibatch gradient descent with VARIED BATCH SIZE

from sklearn.datasets.samples_generator import make_blobs
from keras.layers import Dense
from keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot as plt


# Prepare train and test dataset

def prepare_data():

    # generate 2D classification dataset

    X, y = make_blobs(n_samples=1000, centers=3, 
                      
                      n_features=2, cluster_std=2,
                      
                      random_state=2)
    
    # one hot encode output variable

    y = to_categorical(y)

    # split into train and test set

    n_train = 500

    train_X, test_X = X[:n_train, :], X[n_train:, :]

    train_y, test_y = y[:n_train], y[n_train:]

    return train_X, train_y, test_X, test_y




# fit a model and plot learning curve

def fit_model(train_X, train_y, test_X, test_y, n_batch):

    # define model

    model = Sequential()

    model.add(Dense(50, input_dim=2, activation='relu',
                    
                    kernel_initializer='he_uniform'))
    
    model.add(Dense(3, activation='softmax'))

    # compile model

    opt = SGD(learning_rate=0.01, momentum=0.9)

    model.compile(loss='categorical_crossentropy',
                  
                  optimizer=opt, metrics=['accuracy'])
    
    # fit model

    history = model.fit(train_X, train_y, 
                        
                        validation_data=(test_X, test_y),
                        
                        epochs=200, verbose=0,
                        
                        batch_size=n_batch)
    
    # plot learning curves

    plt.plot(history.history['accuracy'], label='train')

    plt.plot(history.history['val_accuracy'], label='test')

    plt.title('batch=' + str(n_batch), pad=-40)




# prepare dataset

train_X, train_y, test_X, test_y = prepare_data()

# create learning curves for different batch sizes.

batch_sizes = [4, 8, 16, 32, 64, 128, 256, 450]




for i in range(len(batch_sizes)):

    # determine the plot number

    plot_number = 420 + (i + 1)

    plt.subplot(plot_number)

    # fit model and plot learning curves for a batch size

    fit_model(train_X, train_y, test_X, test_y, batch_sizes[i])




# show learning curves

plt.show






--

--

4.2.1 

Mean Squared Error Loss

(MSE)

--

--

In [None]:
# Multi-Layer Perceptron with Mean Squared Error
# loss function

from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt


# generate regression dataset

X, y = make_regression(n_samples=1000, n_features=20,
                       
                       noise=0.1,
                       
                       random_state=1)

# Standardize dataset

X = StandardScaler().fit_transform(X)

y = StandardScaler().fit_transform(y.reshape(len(y), 1))[:, 0]


# split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(25, input_dim=20, activation='relu',
                
                kernel_initializer='he_uniform'))

model.add(Dense(1, activation='linear'))

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='mean_squared_error', 
              
              optimizer=opt)



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=100, verbose=0)


# evaluate the model

train_mse = model.evaluate(train_X, train_y, 
                           
                           verbose=0)

test_mse = model.evaluate(test_X, test_y, 
                             
                             verbose=0)

print('Train: %.3f, Test: %.3f' % (train_mse, test_mse))




# plot loss during training

plt.title('Mean Squared Error Loss')

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

plt.show()





--

--

4.2.2

Mean Squared Logarithmic Error Loss


--


--

In [None]:
# Multi-Layer Perceptron with Mean Squared Error
# loss function

from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt


# generate regression dataset

X, y = make_regression(n_samples=1000, n_features=20,
                       
                       noise=0.1,
                       
                       random_state=1)

# Standardize dataset

X = StandardScaler().fit_transform(X)

y = StandardScaler().fit_transform(y.reshape(len(y), 1))[:, 0]


# split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(25, input_dim=20, activation='relu',
                
                kernel_initializer='he_uniform'))

model.add(Dense(1, activation='linear'))

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='mean_squared_logarithmic_error', 
              
              optimizer=opt, metrics=['mse'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=100, verbose=0)


# evaluate the model

_, train_mse = model.evaluate(train_X, train_y, 
                           
                           verbose=0)

_,test_mse = model.evaluate(test_X, test_y, 
                             
                             verbose=0)

print('Train: %.3f, Test: %.3f' % (train_mse, test_mse))




# plot loss during training

plt.subplot(211)

plt.title('Mean Squared Logarithmic Error Loss', pad=-20)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

plt.show()




# plot mse during training

plt.subplot(212)

plt.title('Mean Squared Error', pad=-20)

plt.plot(history.history['mse'], label='train')

plt.plot(history.history['val_mse'], label='test')

plt.legend()

plt.show()





--


--


--


4.2.3 

Mean Absolute Error Loss


--

--

--

In [None]:
# Multi-Layer Perceptron with Mean Squared Error
# loss function

from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt


# generate regression dataset

X, y = make_regression(n_samples=1000, n_features=20,
                       
                       noise=0.1,
                       
                       random_state=1)

# Standardize dataset

X = StandardScaler().fit_transform(X)

y = StandardScaler().fit_transform(y.reshape(len(y), 1))[:, 0]


# split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(25, input_dim=20, activation='relu',
                
                kernel_initializer='he_uniform'))

model.add(Dense(1, activation='linear'))

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='mean_absolute_error', 
              
              optimizer=opt, metrics=['mse'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=100, verbose=0)


# evaluate the model

_, train_mse = model.evaluate(train_X, train_y, 
                           
                           verbose=0)

_,test_mse = model.evaluate(test_X, test_y, 
                             
                             verbose=0)

print('Train: %.3f, Test: %.3f' % (train_mse, test_mse))




# plot loss during training

plt.subplot(211)

plt.title('Mean Absolute Error Loss', pad=-20)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

plt.show()




# plot mse during training

plt.subplot(212)

plt.title('Mean Squared Error', pad=-20)

plt.plot(history.history['mse'], label='train')

plt.plot(history.history['val_mse'], label='test')

plt.legend()

plt.show()





--

--

--

4.3.

Binary Classification Loss Function Case Study

--

--

--

4.3.1 

Binary Cross-Entropy Loss

--

--
--

In [None]:
# Multi-Layer Perceptron for the circles 
# problem with cross-entropy loss

from sklearn.datasets import make_circles
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt


# generate 2D classification dataset

X,y = make_circles(n_samples=1000, noise=0.1,
                   
                   random_state=1)


# split into train and test

n_train=500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(50, input_dim=2, activation='relu',
                
                kernel_initializer='he_uniform'))

model.add(Dense(1, activation='sigmoid'))

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='binary_crossentropy', optimizer=opt,
              
              metrics=['accuracy'])


# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=200, verbose=0)


# evaluate the model


_, train_acc = model.evaluate(train_X, train_y, verbose=0)

_, test_acc = model.evaluate(test_X, test_y, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss during training

plt.subplot(211)

plt.title('Binary Cross-Entropy Loss', pad=-20)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()





# plot accuracy during training


plt.subplot(212)

plt.title('Classification Accuracy', pad=-20)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()



plt.show()






--

--

--

--

4.3.2 Hinge Loss

--

--

--


--


In [None]:
# Multi-Layer Perceptron for the circles 
# problem with hinge loss

from sklearn.datasets import make_circles
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt
from numpy import where


# generate 2D classification dataset

X,y = make_circles(n_samples=1000, noise=0.1,
                   
                   random_state=1)


# Change y from {0,1} to {-1, 1}

y[where(y == 0)] = -1



# split into train and test

n_train=500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(50, input_dim=2, activation='relu',
                
                kernel_initializer='he_uniform'))

model.add(Dense(1, activation='tanh'))

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='hinge', optimizer=opt,
              
              metrics=['accuracy'])


# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=200, verbose=0)


# evaluate the model


_, train_acc = model.evaluate(train_X, train_y, verbose=0)

_, test_acc = model.evaluate(test_X, test_y, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss during training

plt.subplot(211)

plt.title('Hinge Loss', pad=-20)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()





# plot accuracy during training


plt.subplot(212)

plt.title('Classification Accuracy', pad=-20)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()



plt.show()






--

--

--

--

4.3.3

Squared Hinge Loss

--

--

--

--

In [None]:
# Multi-Layer Perceptron for the circles 
# problem with Squared hinge loss

from sklearn.datasets import make_circles
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt
from numpy import where


# generate 2D classification dataset

X,y = make_circles(n_samples=1000, noise=0.1,
                   
                   random_state=1)


# Change y from {0,1} to {-1, 1}

y[where(y == 0)] = -1



# split into train and test

n_train=500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(50, input_dim=2, activation='relu',
                
                kernel_initializer='he_uniform'))

model.add(Dense(1, activation='tanh'))

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='squared_hinge', optimizer=opt,
              
              metrics=['accuracy'])


# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=200, verbose=0)


# evaluate the model


_, train_acc = model.evaluate(train_X, train_y, verbose=0)

_, test_acc = model.evaluate(test_X, test_y, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss during training

plt.subplot(211)

plt.title('Squared Hinge Loss', pad=-20)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()





# plot accuracy during training


plt.subplot(212)

plt.title('Classification Accuracy', pad=-20)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()



plt.show()





--

--

--

---

4.4.1

Multi-Class Cross-Entropy loss

---

--

--

--

In [None]:
# Multi-Layer Perceptron for the blobs 
# multi-class classification problem with
# cross-entropy loss

from sklearn.datasets.samples_generator import make_blobs
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot as plt


# generate 2D classification dataset

X, y = make_blobs(n_samples=1000, centers=3,
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)


# one hot encode output variable

y = to_categorical(y)


# split into train and test set

n_train=500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(50, input_dim=2, activation='relu',
                
                kernel_initializer='he_uniform'))

model.add(Dense(3, activation='softmax'))



# compile model

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='categorical_crossentropy', 
              
              optimizer=opt, metrics=['accuracy'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=100, verbose=0)



# evaluate the model
# evaluate the model


_, train_acc = model.evaluate(train_X, train_y, verbose=0)

_, test_acc = model.evaluate(test_X, test_y, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss during training

plt.subplot(211)

plt.title('Categorical Cross-Entropy Loss', pad=-20)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()





# plot accuracy during training


plt.subplot(212)

plt.title('Classification Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()



plt.show()





--

--

--

---

4.4.2 

Sparse Multiclass Cross-Entropy Loss

---

--

--

--

In [None]:
# Multi-Layer Perceptron for the blobs 
# multi-class classification problem with
# sparse cross-entropy loss

from sklearn.datasets.samples_generator import make_blobs
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt


# generate 2D classification dataset

X, y = make_blobs(n_samples=1000, centers=3,
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)


# split into train and test set

n_train=500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(50, input_dim=2, activation='relu',
                
                kernel_initializer='he_uniform'))

model.add(Dense(3, activation='softmax'))



# compile model

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='sparse_categorical_crossentropy', 
              
              optimizer=opt, metrics=['accuracy'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=100, verbose=0)



# evaluate the model
# evaluate the model


_, train_acc = model.evaluate(train_X, train_y, verbose=0)

_, test_acc = model.evaluate(test_X, test_y, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss during training

plt.subplot(211)

plt.title('Sparse Categorical Cross-Entropy Loss', pad=-20)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()





# plot accuracy during training


plt.subplot(212)

plt.title('Classification Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()



plt.show()





--

--

--

---

4.4.3

Kullback Leibler Divergence Loss

---

--

--

--

In [None]:
# Multi-Layer Perceptron for the blobs 
# multi-class classification problem with
# Kullback Leibler devergence loss

from sklearn.datasets.samples_generator import make_blobs
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot as plt


# generate 2D classification dataset

X, y = make_blobs(n_samples=1000, centers=3,
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)


# one hot encode output variable

y = to_categorical(y)


# split into train and test set

n_train=500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(50, input_dim=2, activation='relu',
                
                kernel_initializer='he_uniform'))

model.add(Dense(3, activation='softmax'))



# compile model

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='kullback_leibler_divergence', 
              
              optimizer=opt, metrics=['accuracy'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=100, verbose=0)



# evaluate the model


_, train_acc = model.evaluate(train_X, train_y, verbose=0)

_, test_acc = model.evaluate(test_X, test_y, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss during training

plt.subplot(211)

plt.title('Kullback Leibler Divergence Loss', pad=-20)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()





# plot accuracy during training


plt.subplot(212)

plt.title('Classification Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()



plt.show()





--

--

--

--

---

5.3.2

Effect of Leaning Rate and Momentum

---

--

--

--

--

---

A. 

Learning Rate Dynamics

---

--

--

--

--

In [None]:
# Study on the effects of Learning Rate on Accuracy 
# for the blobs problem


from sklearn.datasets.samples_generator import make_blobs
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot as plt



# COPIED from 2.3.3


# prepare train and test dataset


def prepare_data():

    # generate 2D classification dataset

    X,y = make_blobs(n_samples=1000, centers=3,
                     
                     n_features=2, cluster_std=2,

                     random_state=2)
    
    # one hot encode output variable

    y = to_categorical(y)

    # split into train and test

    n_train = 500

    train_X, test_X = X[:n_train, :], X[n_train:, :]

    train_y, test_y = y[:n_train], y[n_train:]

    return train_X, train_y, test_X, test_y




# fit a model and plot learning curve


def fit_model(train_X, train_y, test_X, test_y, lrate):

    # define model

    model = Sequential()

    model.add(Dense(50, input_dim=2, activation='relu', 
                    
                    kernel_initializer='he_uniform'))
    
    model.add(Dense(3, activation='softmax'))

    # compile model

    opt = SGD(learning_rate=lrate)

    model.compile(loss='categorical_crossentropy',
                  
                  optimizer=opt, metrics=['accuracy'])
    
    # fit model

    history = model.fit(train_X, train_y, 
                        
                        validation_data=(test_X, test_y),
                        
                        epochs=200, 
                        
                        verbose=0)
    
    # plot learning curves

    plt.plot(history.history['accuracy'], label='train')

    plt.plot(history.history['val_accuracy'], label='test')

    plt.title('lrate=' + str(lrate), pad=-50)



# prepare dataset

train_X, train_y, test_X, test_y = prepare_data()



# create learning curves for different learning rates

learning_rates = [1E-0, 1E-1, 1E-2, 1E-3, 
                  
                  1E-4, 1E-5, 1E-6, 1E-7]


for i in range(len(learning_rates)):

    # Determine the plot number

    plot_no = 420 + (i+1)

    plt.subplot(plot_no)

    # fit model and plot learning curves for a learning rate

    fit_model(train_X, train_y, 
              
              test_X, test_y, learning_rates[i])
    



# show leaning curves


plt.show()





--

--

--

--

---

5.3.2

B.

Momentum Dynamics

---

--

--

--

--

In [None]:
# Study on the effects of Momentum on Accuracy 
# for the blobs problem


from sklearn.datasets.samples_generator import make_blobs
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot as plt



# COPIED from 5.3.2 A: Learning Rate Dynamics


# prepare train and test dataset


def prepare_data():

    # generate 2D classification dataset

    X,y = make_blobs(n_samples=1000, centers=3,
                     
                     n_features=2, cluster_std=2,

                     random_state=2)
    
    # one hot encode output variable

    y = to_categorical(y)

    # split into train and test

    n_train = 500

    train_X, test_X = X[:n_train, :], X[n_train:, :]

    train_y, test_y = y[:n_train], y[n_train:]

    return train_X, train_y, test_X, test_y




# fit a model and plot learning curve


def fit_model(train_X, train_y, test_X, test_y, momentum):

    # define model

    model = Sequential()

    model.add(Dense(50, input_dim=2, activation='relu', 
                    
                    kernel_initializer='he_uniform'))
    
    model.add(Dense(3, activation='softmax'))

    # compile model

    opt = SGD(learning_rate=0.01, momentum=momentum)

    model.compile(loss='categorical_crossentropy',
                  
                  optimizer=opt, metrics=['accuracy'])
    
    # fit model

    history = model.fit(train_X, train_y, 
                        
                        validation_data=(test_X, test_y),
                        
                        epochs=200, 
                        
                        verbose=0)
    
    # plot learning curves

    plt.plot(history.history['accuracy'], label='train')

    plt.plot(history.history['val_accuracy'], label='test')

    plt.title('lrate=' + str(momentums), pad=-80)



# prepare dataset

train_X, train_y, test_X, test_y = prepare_data()



# create learning curves for different momentums

momentums = [0.0, 0.5, 0.9, 0.99]


for i in range(len(momentums)):

    # Determine the plot number

    plot_no = 220 + (i+1)

    plt.subplot(plot_no)

    # fit model and plot learning curves for a momentum

    fit_model(train_X, train_y, 
              
              test_X, test_y, momentums[i])
    



# show leaning curves


plt.show()





--

--

--

--

---

5.3.3

Effect of Learning Rate Schedules

---

--

--

--

---

A. 

Learning Rate Decay

(An Experiment)

---

--

--

--

--

In [None]:
# demonstrate the effect of 
# decay on the learning rate

from matplotlib import pyplot as plt




# learning rate decay

def decay_lrate(initial_lrate, decay, iteration):

    result = initial_lrate * (1.0 / (1.0 + (decay * iteration)))

    return  result


decays = [1E-1, 1E-2, 1E-3, 1E-4]

lrate = 0.01

n_updates = 200

for decay in decays:

    # calculate the learning rates for updates

    lrates = [decay_lrate(lrate, decay, i) 
    
                for i in range(n_updates)]

    # plot result

    plt.plot(lrates, label=str(decay))



plt.legend()

plt.show()





--

--

--

--

--

---

5.3.3

B.

page 107

Evaluate the same 4 decay values of   [1E-1, 1E-2, 1E-3, 1E-4]   and their effects on model accuracy

---

--

--

--

--

In [None]:
# study on the effects of decay rate on accuracy 
# for the blobs problem

from sklearn.datasets.samples_generator import make_blobs
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot as plt



# COPIED from 5.3.2 B: Momentum Dynamics


# prepare train and test dataset


def prepare_data():

    # generate 2D classification dataset

    X,y = make_blobs(n_samples=1000, centers=3,
                     
                     n_features=2, cluster_std=2,

                     random_state=2)
    
    # one hot encode output variable

    y = to_categorical(y)

    # split into train and test

    n_train = 500

    train_X, test_X = X[:n_train, :], X[n_train:, :]

    train_y, test_y = y[:n_train], y[n_train:]

    return train_X, train_y, test_X, test_y




# fit a model and plot learning curve


def fit_model(train_X, train_y, test_X, test_y, decay):

    # define model

    model = Sequential()

    model.add(Dense(50, input_dim=2, activation='relu', 
                    
                    kernel_initializer='he_uniform'))
    
    model.add(Dense(3, activation='softmax'))

    # compile model

    opt = SGD(learning_rate=0.01, decay=decay)

    model.compile(loss='categorical_crossentropy',
                  
                  optimizer=opt, metrics=['accuracy'])
    
    # fit model

    history = model.fit(train_X, train_y, 
                        
                        validation_data=(test_X, test_y),
                        
                        epochs=200, 
                        
                        verbose=0)
    
    # plot learning curves

    plt.plot(history.history['accuracy'], label='train')

    plt.plot(history.history['val_accuracy'], label='test')

    plt.title('decay=' + str(decay), pad=-80)



# prepare dataset

train_X, train_y, test_X, test_y = prepare_data()



# create learning curves for different decay rates

decay_rates = [1E-1, 1E-2, 1E-3, 1E-4]


for i in range(len(decay_rates)):

    # Determine the plot number

    plot_no = 220 + (i+1)

    plt.subplot(plot_no)

    # fit model and plot learning curves for a decay rate

    fit_model(train_X, train_y, 
              
              test_X, test_y, decay_rates[i])
    



# show leaning curves


plt.show()




--

--

--

---

5.3.3 

page 108, code on page 110

C. 

Drop Learning Rate on Plateau

---

--

--

--

--

In [None]:
# study on the effects of the 
# patience of the learning rate drop schedule
# on the blobs problem.


from sklearn.datasets.samples_generator import make_blobs
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras import backend
from matplotlib import pyplot as plt



# monitor the learning rate

class learning_rate_monitor(Callback):

    # start of training

    def on_train_begin(self, logs={}):
        self.lrates=list()
    
    # end of each training epoch

    def on_epoch_end(self, epoch, logs={}):

        # get and store the learning rate

        optimizer = self.model.optimizer

        lrate = float(backend.get_value(optimizer.learning_rate))

        self.lrates.append(lrate)



# prepare train and test dataset


def prepare_data():

    # generate 2D classification dataset

    X,y = make_blobs(n_samples=1000, centers=3,
                     
                     n_features=2, cluster_std=2,

                     random_state=2)
    
    # one hot encode output variable

    y = to_categorical(y)

    # split into train and test

    n_train = 500

    train_X, test_X = X[:n_train, :], X[n_train:, :]

    train_y, test_y = y[:n_train], y[n_train:]

    return train_X, train_y, test_X, test_y




# fit a model and plot learning curve


def fit_model(train_X, train_y, test_X, test_y, patience):

    # define model

    model = Sequential()

    model.add(Dense(50, input_dim=2, activation='relu', 
                    
                    kernel_initializer='he_uniform'))
    
    model.add(Dense(3, activation='softmax'))

    # compile model

    opt = SGD(learning_rate=0.01)

    model.compile(loss='categorical_crossentropy',
                  
                  optimizer=opt, metrics=['accuracy'])
    
    # fit model

    rlrp = ReduceLROnPlateau(monitor='val_loss',
                             
                             factor=0.1,
                             
                             patience=patience,
                             
                             min_delta=1E-7)
    
    lrm = learning_rate_monitor()


    history = model.fit(train_X, train_y, 
                        
                        validation_data=(test_X, test_y),
                        
                        epochs=200, verbose=0,
                        
                        callbacks=[rlrp, lrm])
    
    return lrm.lrates, history.history['loss'], history.history['accuracy']
    


# create line plot for a series

def line_plots(patiences, series):

    for i in range (len(patiences)):

        plt.subplot(220 + (i + 1))

        plt.plot(series[i])

        plt.title('patience=' + str(patiences[i]), pad=-80)

    plt.show()



# prepare dataset

train_X, train_y, test_X, test_y = prepare_data()


# create learning curves for different patiences

patiences = [2, 5, 10, 15]

lr_list, loss_list, accuracy_list = list(), list(), list()

for i in range(len(patiences)):

    # fit model and plot learning curves for a patience
    
    lr, loss, accuracy = fit_model(train_X, train_y,
                                              
                                    test_X, test_y, 
                                              
                                    patiences[i])

    lr_list.append(lr)

    loss_list.append(loss)

    accuracy_list.append(accuracy)






# plot learning rates

line_plots(patiences, lr_list)

# plot loss

line_plots(patiences, loss_list)

# plot accuracy

line_plots(patiences, accuracy_list)






--

--

--

--

---
5.3.4

page 115, code on page 116

Effect of Adaptive Learning Rates

---

--

--

--

--

In [None]:
# study of Stochastic Gradient Descent (SGD) 
# with Adaptive Learning Rates for the blobs problem

from sklearn.datasets.samples_generator import make_blobs
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot as plt




# prepare train and test dataset


def prepare_data():

    # generate 2D classification dataset

    X,y = make_blobs(n_samples=1000, centers=3,
                     
                     n_features=2, cluster_std=2,

                     random_state=2)
    
    # one hot encode output variable

    y = to_categorical(y)

    # split into train and test

    n_train = 500

    train_X, test_X = X[:n_train, :], X[n_train:, :]

    train_y, test_y = y[:n_train], y[n_train:]

    return train_X, train_y, test_X, test_y




# fit a model and plot learning curve


def fit_model(train_X, train_y, test_X, test_y, optimizer):

    # define model

    model = Sequential()

    model.add(Dense(50, input_dim=2, activation='relu', 
                    
                    kernel_initializer='he_uniform'))
    
    model.add(Dense(3, activation='softmax'))

    # compile model

    model.compile(loss='categorical_crossentropy',
                  
                  optimizer=optimizer, metrics=['accuracy'])
    
    # fit model

    history = model.fit(train_X, train_y, 
                        
                        validation_data=(test_X, test_y),
                        
                        epochs=200, 
                        
                        verbose=0)
    
    # plot learning curves

    plt.plot(history.history['accuracy'], label='train')

    plt.plot(history.history['val_accuracy'], label='test')

    plt.title('opt=' + optimizer, pad=-80)



# prepare dataset

train_X, train_y, test_X, test_y = prepare_data()



# create learning curves for different optimizers

optimizers = ['sgd', 'rmsprop', 'adagrad', 'adam']


for i in range(len(optimizers)):

    # Determine the plot number

    plot_no = 220 + (i+1)

    plt.subplot(plot_no)

    # fit model and plot learning curves for an optimizer

    fit_model(train_X, train_y, 
              
              test_X, test_y, optimizers[i])
    



# show leaning curves


plt.show()




--

--

--

--

---

6.3.1 

Regression Predictive Modelling Problem

A.

Example of generating samples and plotting their distribution for the regression problem

---

--

--

--

--

In [None]:
# regression predictive modelling problem

from sklearn.datasets import make_regression
from matplotlib import pyplot as plt

# Generate regression dataset

X, y = make_regression(n_samples=1000, 
                       
                       n_features=20,
                       
                       noise=0.1, random_state=1)


# histograms of input variables

plt.subplot(211)

plt.hist(X[:, 0])

plt.subplot(212)

plt.hist(X[:, 1])

plt.show()


# histogram of target variable

plt.hist(y)

plt.show()



--

--

--

--

---

6.3.2

Multilayer Perceptron with Unscaled Data

---

--

--

--

--

In [None]:
# Multi-Layer Perceptron with unscaled data
# for the regression problem

from sklearn.datasets import make_regression
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt



# Generate regression dataset

X, y = make_regression(n_samples=1000, 
                       
                       n_features=20,
                       
                       noise=0.1, random_state=1)


# split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]


# define model

model = Sequential()

model.add(Dense(25, input_dim=20, activation='relu', 
                    
                kernel_initializer='he_uniform'))
    
model.add(Dense(1, activation='linear'))

# compile model

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='mean_squared_error',
                  
                optimizer=opt)




# fit model

history = model.fit(train_X, train_y,
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=100, verbose=0)



# evaluate the model

train_mse = model.evaluate(train_X, train_y, verbose=0)

test_mse = model.evaluate(test_X, test_y, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_mse, test_mse))




# plot loss during training

plt.title('Mean Squared Error')

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')


plt.legend()

plt.show()




--

--

--

--

---

6.3.3

Multi-layer Perceptron with Scaled Output Variables

---

--

--

--

--

In [None]:
# Multi-Layer Perceptron with Scaled Outputs
# for the regression problem

from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt



# Generate regression dataset

X, y = make_regression(n_samples=1000, 
                       
                       n_features=20,
                       
                       noise=0.1, random_state=1)


# split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]



# Reshape 1D arrays into 2D arrays

train_y = train_y.reshape(len(train_y), 1)

test_y = test_y.reshape(len(train_y), 1)



# create scaler

scaler = StandardScaler()



# fit scaler on training dataset

scaler.fit(train_y)



# transform training dataset

train_y = scaler.transform(train_y)



# transform test dataset

test_y = scaler.transform(test_y)



# define model

model = Sequential()

model.add(Dense(25, input_dim=20, activation='relu', 
                    
                kernel_initializer='he_uniform'))
    
model.add(Dense(1, activation='linear'))

# compile model

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='mean_squared_error',
                  
                optimizer=opt)



# fit model

history = model.fit(train_X, train_y,
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=100, verbose=0)



# evaluate the model

train_mse = model.evaluate(train_X, train_y, verbose=0)

test_mse = model.evaluate(test_X, test_y, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_mse, test_mse))




# plot loss during training

plt.title('Mean Squared Error Loss')

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')


plt.legend()

plt.show()




--

--

--

--

---

6.3.4

Multi-Layer Perceptron with Scaled Input Variables

---

--

--

--

--

In [None]:
# compare scaling methods for Multi-Layer Perceptron
# inputs on regression problem

from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt
from numpy import mean
from numpy import std



# Prepare dataset with input and output scalers,
# can be none

def get_dataset(input_scaler, output_scaler):

    # generate dataset

    X, y = make_regression(n_samples=1000, 
                           
                           n_features=20,
                           
                           noise=0.1, 
                           
                           random_state=1)
    
    # split into train and test set

    n_train = 500

    train_X, test_X = X[:n_train, :], X[n_train:, :]

    train_y, test_y = y[:n_train], y[n_train:]

    # scale inputs

    if input_scaler is not None:

        # fit scaler

        input_scaler.fit(train_X)

        # tranform training dataset

        train_X = input_scaler.transform(train_X)

        # transform test dataset

        test_X = input_scaler.transform(test_X)


    if output_scaler is not None:

        # reshape 1D arrays into 2D arrays

        train_y = train_y.reshape(len(train_y), 1)

        test_y = test_y.reshape(len(train_y), 1)

        # fit scaler on training dataset

        output_scaler.fit(train_y)

        # transform training dataset

        train_y = output_scaler.transform(train_y)

        # transform test dataset

        test_y = output_scaler.transform(test_y)
    
    return train_X, train_y, test_X, test_y



# fit and evaluate mean squared error (mse)
# of model on test set

def evaluate_model(train_X, train_y, test_X, test_y):

    # define model

    model = Sequential()

    model.add(Dense(25, input_dim=20, activation='relu', 
                    
                    kernel_initializer='he_uniform'))
    
    model.add(Dense(1, activation='linear'))

    # compile model

    opt = SGD(learning_rate=0.01, momentum=0.9)

    model.compile(loss='mean_squared_error',
                  
                    optimizer=opt)
    
    # fit model

    model.fit(train_X, train_y, epochs=100,
              
              verbose=0)
    
    # evaluate the model

    test_mse = model.evaluate(test_X, test_y, verbose=0)

    return test_mse




# evaluate model multiple times with given input and output scalers

def repeated_evaluation(input_scaler, output_scaler, n_repeats=30):

    # get dataset

    train_X, train_y, test_X, test_y  = get_dataset(input_scaler, output_scaler)

    # repeated evaluation of model

    results = list()

    for _ in range(n_repeats):

        test_mse = evaluate_model(train_X, train_y, test_X, test_y)

        print('>%.3f' % test_mse)

        results.append(test_mse)

    return results



# unscaled inputs

results_unscaled_inputs = repeated_evaluation(None,
                                              
                                              StandardScaler())

# normalized inputs

results_normalized_inputs = repeated_evaluation(MinMaxScaler(),
                                                
                                                StandardScaler())

# standardized inputs

results_standardized_inputs = repeated_evaluation(StandardScaler(),
                                                  
                                                  StandardScaler())



# Summarize results

print('Unscaled: %.3f (%.3f)' % (mean(results_unscaled_inputs),
                                 
                                 std(results_unscaled_inputs)))

print('Normalized: %.3f (%.3f)' % (mean(results_normalized_inputs),
                                 
                                 std(results_normalized_inputs)))

print('Standardized: %.3f (%.3f)' % (mean(results_standardized_inputs),
                                 
                                 std(results_standardized_inputs)))



# plot results

results = [results_unscaled_inputs, 
           
           results_normalized_inputs,
           
           results_standardized_inputs]

labels = ['unscaled', 'normalized', 'standardized']

plt. boxplot(results, labels=labels)

plt.show





--

--

--

--

---

7.3.2 

Multi-Layer Perceptron Model


--

Multi-Layer Perceptron with Tanh for the two circles Classification Problem

--

---

--

--

--

--

In [None]:
# Multi-Layer Perceptron with Tanh for the 
#two circles Classification Problem

from sklearn.datasets import make_circles
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.initializers import RandomUniform
from matplotlib import pyplot as plt



# generate 2D classification dataset

X, y = make_circles(n_samples=1000, noise=0.1, 
                    
                    random_state=1)



# scale input data into [-1, 1]

scaler = MinMaxScaler(feature_range=(-1, 1))

X = scaler.fit_transform(X)



# Split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]



# define model

model = Sequential()

init = RandomUniform(minval=0, maxval=1)

model.add(Dense(5, input_dim=2, activation='tanh',
                
                kernel_initializer=init))

model.add(Dense(1, activation='sigmoid',
                
                kernel_initializer=init))



# compile model

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='binary_crossentropy', 
              
              optimizer=opt, metrics=['accuracy'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=500, verbose=0)



# evaluate the model


_, train_acc = model.evaluate(train_X, train_y, verbose=0)

_, test_acc = model.evaluate(test_X, test_y, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()





# plot accuracy learning curves


plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()



plt.show()





--

--

--

--

---

7.3.3

Deeper MLP Model

--

Deeper MLP with tanh for the two circles classification problem

---

--

--

--

--

In [None]:
# Deeper Multi-Layer Perceptron with Tanh for the 
#two circles Classification Problem 

from sklearn.datasets import make_circles
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.initializers import RandomUniform
from matplotlib import pyplot as plt



# generate 2D classification dataset

X, y = make_circles(n_samples=1000, noise=0.1, 
                    
                    random_state=1)



# scale input data into [-1, 1]

scaler = MinMaxScaler(feature_range=(-1, 1))

X = scaler.fit_transform(X)



# Split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]





# define model

model = Sequential()

init = RandomUniform(minval=0, maxval=1)

model.add(Dense(5, input_dim=2, activation='tanh',
                
                kernel_initializer=init))

model.add(Dense(5, activation='tanh', kernel_initializer=init))
model.add(Dense(5, activation='tanh', kernel_initializer=init))
model.add(Dense(5, activation='tanh', kernel_initializer=init))
model.add(Dense(5, activation='tanh', kernel_initializer=init))


model.add(Dense(1, activation='sigmoid',
                
                kernel_initializer=init))





# compile model

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='binary_crossentropy', 
              
              optimizer=opt, metrics=['accuracy'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=500, verbose=0)



# evaluate the model


_, train_acc = model.evaluate(train_X, train_y, verbose=0)

_, test_acc = model.evaluate(test_X, test_y, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()





# plot accuracy learning curves


plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()



plt.show()





--

--

--

--

---

7.3.4

Deeper MLP Model with ReLU

---

--

--

--

--

In [None]:
# Deeper Multi-Layer Perceptron with ReLU for the 
#two circles Classification Problem (5 hidden Layers)

from sklearn.datasets import make_circles
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt



# generate 2D classification dataset

X, y = make_circles(n_samples=1000, noise=0.1, 
                    
                    random_state=1)



# scale input data into [-1, 1]

scaler = MinMaxScaler(feature_range=(-1, 1))

X = scaler.fit_transform(X)



# Split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]





# define model

model = Sequential()

model.add(Dense(5, input_dim=2, activation='relu',
                
                kernel_initializer='he_uniform'))

model.add(Dense(5, activation='relu', 
                kernel_initializer='he_uniform'))

model.add(Dense(5, activation='relu', 
                kernel_initializer='he_uniform'))

model.add(Dense(5, activation='relu', 
                kernel_initializer='he_uniform'))

model.add(Dense(5, activation='relu', 
                kernel_initializer='he_uniform'))


model.add(Dense(1, activation='sigmoid'))





# compile model

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='binary_crossentropy', 
              
              optimizer=opt, metrics=['accuracy'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=500, verbose=0)



# evaluate the model


_, train_acc = model.evaluate(train_X, train_y, verbose=0)

_, test_acc = model.evaluate(test_X, test_y, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()





# plot accuracy learning curves


plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()



plt.show()





--

--

--

--

---

8.3.1 

Regression Predictive Modelling Problem

---

--

--

--

--

In [None]:
# Visualize the distribution of the target variable

# Regression Predictive Modelling Problem

from sklearn.datasets import make_regression
from matplotlib import pyplot as plt

# generate regression dataset

X, y = make_regression(n_samples=1000, n_features=20, 
                       
                       noise=0.1, random_state=1)

# histogram of target variable

plt.subplot(121)

plt.hist(y)

# box plot of target variable

plt.subplot(122)

plt.boxplot(y)

plt.show()


--

--

--

--

---

8.3.2 

Multi-Layer Perceptron with exploding gradients

---

--

--

--

--

In [None]:
# MLP with unscaled data for the regression problem

from sklearn.datasets import make_regression
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt



# generate regression dataset

X, y = make_regression(n_samples=1000, n_features=20, 
                       
                       noise=0.1, random_state=1)



# Split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]





# define model

model = Sequential()

model.add(Dense(25, input_dim=20, activation='relu',
                
                kernel_initializer='he_uniform'))

model.add(Dense(1, activation='linear'))



# compile model

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='mean_squared_error', 
              
              optimizer=opt, metrics=['accuracy'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=100, verbose=0)



# evaluate the model


_, train_mse = model.evaluate(train_X, train_y, verbose=0)

_, test_mse = model.evaluate(test_X, test_y, verbose=0)


print('Train: %.3f, Test: %.3f' % (train_mse, test_mse))


# plot loss during training

plt.title('Mean Squared Error')

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()



plt.show()





--

--

--

--

---

8.3.3 

Multi-Layer Perceptron with Gradient Norm Scaling

--

MLP with Unscaled data for the regression problem with gradient norm scaling

---

--

--

--

--

In [None]:
# MLP with Unscaled data for the regression problem 
# with gradient norm scaling

from sklearn.datasets import make_regression
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt



# generate regression dataset

X, y = make_regression(n_samples=1000, n_features=20, 
                       
                       noise=0.1, random_state=1)



# Split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]





# define model

model = Sequential()

model.add(Dense(25, input_dim=20, activation='relu',
                
                kernel_initializer='he_uniform'))

model.add(Dense(1, activation='linear'))



# compile model

opt = SGD(learning_rate=0.01, momentum=0.9, clipnorm=1.0)

model.compile(loss='mean_squared_error', 
              
              optimizer=opt, metrics=['accuracy'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=100, verbose=0)



# evaluate the model


_, train_mse = model.evaluate(train_X, train_y, verbose=0)

_, test_mse = model.evaluate(test_X, test_y, verbose=0)


print('Train: %.3f, Test: %.3f' % (train_mse, test_mse))


# plot loss during training

plt.title('Mean Squared Error')

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()



plt.show()





--

--

--

--

---

8.3.4 

Multi-Layer Perceptron with Gradient Value Clipping

--

MLP with Unscaled data for the regression problem with gradient clipping

---

--

--

--

--

In [None]:
# MLP with Unscaled data for the 
# regression problem with gradient clipping

from sklearn.datasets import make_regression
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt



# generate regression dataset

X, y = make_regression(n_samples=1000, n_features=20, 
                       
                       noise=0.1, random_state=1)



# Split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]





# define model

model = Sequential()

model.add(Dense(25, input_dim=20, activation='relu',
                
                kernel_initializer='he_uniform'))

model.add(Dense(1, activation='linear'))



# compile model

opt = SGD(learning_rate=0.01, momentum=0.9, clipvalue=5.0)

model.compile(loss='mean_squared_error', 
              
              optimizer=opt, metrics=['accuracy'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=100, verbose=0)



# evaluate the model


_, train_mse = model.evaluate(train_X, train_y, verbose=0)

_, test_mse = model.evaluate(test_X, test_y, verbose=0)


print('Train: %.3f, Test: %.3f' % (train_mse, test_mse))


# plot loss during training

plt.title('Mean Squared Error')

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()



plt.show()





--

--

--

--

---

9.3.2 

Multi-Layer Perceptron Model

--

MLP for the two circles problem

---

--

--

--

--

In [None]:
# MLP for the two circles problem

from sklearn.datasets import make_circles
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt



# generate 2D classification dataset

X, y = make_circles(n_samples=1000, 
                       
                       noise=0.1, random_state=1)



# Split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]





# define model

model = Sequential()

model.add(Dense(50, input_dim=2, activation='relu',
                
                kernel_initializer='he_uniform'))

model.add(Dense(1, activation='sigmoid'))



# compile model

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='binary_crossentropy', 
              
              optimizer=opt, metrics=['accuracy'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=100, verbose=0)



# evaluate the model


_, train_acc = model.evaluate(train_X, train_y, verbose=0)

_, test_acc = model.evaluate(test_X, test_y, verbose=0)


print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()





# plot accuracy learning curves


plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()



plt.show()









--

--

--

--

---

9.3.2 

Multi-Layer Perceptron with Batch Normalization

--

A.

MLP for the two circles problem with batchnorm after activation function

---

--

--

--

--

In [None]:
# MLP for the two circles problem with 
# BatchNorm AFTER the activation Function

from sklearn.datasets import make_circles
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt



# generate 2D classification dataset

X, y = make_circles(n_samples=1000, 
                       
                       noise=0.1, random_state=1)



# Split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]





# define model

model = Sequential()

model.add(Dense(50, input_dim=2, activation='relu',
                
                kernel_initializer='he_uniform'))

model.add(BatchNormalization())

model.add(Dense(1, activation='sigmoid'))



# compile model

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='binary_crossentropy', 
              
              optimizer=opt, metrics=['accuracy'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=100, verbose=0)



# evaluate the model


_, train_acc = model.evaluate(train_X, train_y, verbose=0)

_, test_acc = model.evaluate(test_X, test_y, verbose=0)


print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()





# plot accuracy learning curves


plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()



plt.show()








--

--

--

--

---

9.3.2 

Multi-Layer Perceptron with Batch Normalization

--

B.

MLP for the two circles problem with batchnorm BEFORE activation function

---

--

--

--

--

In [None]:
# MLP for the two circles problem with 
# BatchNorm BEFORE the activation Function

from sklearn.datasets import make_circles
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt



# generate 2D classification dataset

X, y = make_circles(n_samples=1000, 
                       
                       noise=0.1, random_state=1)



# Split into train and test

n_train = 500

train_X, test_X = X[:n_train, :], X[n_train:, :]

train_y, test_y = y[:n_train], y[n_train:]





# define model

model = Sequential()

model.add(Dense(50, input_dim=2, activation='relu',
                
                kernel_initializer='he_uniform'))

model.add(BatchNormalization())

model.add(Activation('relu'))

model.add(Dense(1, activation='sigmoid'))



# compile model

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='binary_crossentropy', 
              
              optimizer=opt, metrics=['accuracy'])



# fit model

history = model.fit(train_X, train_y, 
                    
                    validation_data=(test_X, test_y),
                    
                    epochs=100, verbose=0)



# evaluate the model


_, train_acc = model.evaluate(train_X, train_y, verbose=0)

_, test_acc = model.evaluate(test_X, test_y, verbose=0)


print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()





# plot accuracy learning curves


plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()



plt.show()








--

--

--

--

---

10.2.2 

Supervised Greedy Layer-Wise Pre-Training
--

Supervised Greedy Layer-Wise Pre-Training for blobs Classification Problem

---

--

--

--

--

In [None]:
# Supervised Greedy Layer-Wise Pre-Training 
# for blobs Classification Problem

from sklearn.datasets import make_blobs
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot as plt



# prepare train and test dataset


def prepare_data():

    # generate 2D classification dataset

    X,y = make_blobs(n_samples=1000, centers=3,
                     
                     n_features=2, cluster_std=2,

                     random_state=2)
    
    # one hot encode output variable

    y = to_categorical(y)

    # split into train and test

    n_train = 500

    train_X, test_X = X[:n_train, :], X[n_train:, :]

    train_y, test_y = y[:n_train], y[n_train:]

    return train_X, test_X, train_y, test_y



# define and fit the base model

def get_base_model(train_X, train_y):

    # define model

    model = Sequential()

    model.add(Dense(10, input_dim=2, activation='relu', 
                    
                    kernel_initializer='he_uniform'))
    
    model.add(Dense(3, activation='softmax'))

    # compile model

    opt = SGD(learning_rate=0.01, momentum=0.9)

    model.compile(loss='categorical_crossentropy', 
                  
                  optimizer=opt, metrics=['accuracy'])
    
    # fit model

    model.fit(train_X, train_y, epochs=100, verbose=0)

    return model





# evaluate a fit model

def evaluate_model(model, train_X, test_X, train_y, test_y):

    _, train_acc = model.evaluate(train_X, train_y, verbose=0)

    _, test_acc = model.evaluate(test_X, test_y, verbose=0)

    return train_acc, test_acc




# add one new layer and retrain only the new layer

def add_layer(model, train_X, train_y):

    # remember the current output layer

    output_layer = model.layers[-1]

    # remove the output layer

    model.pop()

    # mark all remaining layers as non-trainable

    for layer in model.layers:

        layer.trainable = False

    # add a new hidden layer

    model.add(Dense(10, activation='relu',
                    
                    kernel_initializer='he_uniform'))
    
    # re-add the output layer

    model.add(output_layer)

    # fit model

    model.fit(train_X, train_y, epochs=100, verbose=0)





# prepare the data

train_X, test_X, train_y, test_y = prepare_data()



# get the base model

model = get_base_model(train_X, train_y)



# evaluate the base model

scores = dict()

train_acc, test_acc = evaluate_model(model, 
                                     
                                     train_X, test_X,
                                     
                                     train_y, test_y)

print('> layers=%d, train=%.3f, test=%.3f' % (len(model.layers), train_acc, test_acc))

scores[len(model.layers)] = (train_acc, test_acc)






# add layers and evaluate the updated model

n_layers = 10

for i in range(n_layers):

    # add layer

    add_layer(model, train_X, train_y)

    # evaluate model

    train_acc, test_acc = evaluate_model(model, 
                                         
                                         train_X, test_X, 
                                         
                                         train_y, test_y)
    
    print('> layers=%d, train=%.3f, test=%.3f' % (len(model.layers), train_acc, test_acc))

    # store scores for plotting

    scores[len(model.layers)] = (train_acc, test_acc)





# plot number of added layers vs accuracy

plt.plot(list(scores.keys()), [scores[k][0] for k in scores.keys()], label='train', marker='.')

plt.plot(list(scores.keys()), [scores[k][1] for k in scores.keys()], label='test', marker='.')


plt.legend()

plt.show()



--


--

--

---

NOTE

Henceforth, go to Jason Brownlee's website and copy the code, then paste it here and run

---

--

--

--



--

--

--

--

---

13.3.2 

Overfit Multi-Layer Perceptron Model
--

Overfit Multi-Layer Perceptron for the moons dataset

---

--

--

--

--

In [None]:
# overfit mlp for the moons dataset

from sklearn.datasets import make_moons
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from matplotlib import pyplot as plt



# generate 2d classification dataset

X, y = make_moons(n_samples=100, noise=0.2, random_state=1)



# split into train and test

n_train = 30

trainX, testX = X[:n_train, :], X[n_train:, :]

trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(500, input_dim=2, activation='relu'))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              
              optimizer='adam', metrics=['accuracy'])



# fit model

history = model.fit(trainX, trainy, epochs=4000, 
                    
                    validation_data=(testX, testy), verbose=0)




# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()





# plot accuracy learning curves


plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')

plt.legend()



plt.show()







--

--

--

--

---

13.3.3 

Multi-Layer Perceptron Model with Weight Regularisation
--

Multi-Layer Perceptron Model with Weight Regularisation for the moons dataset

---

--

--

--

--

In [None]:
# mlp with weight regularization for the moons dataset

from sklearn.datasets import make_moons
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l2
import matplotlib.pyplot as plt


# generate 2d classification dataset

X, y = make_moons(n_samples=100, noise=0.2, random_state=1)



# split into train and test

n_train = 30

trainX, testX = X[:n_train, :], X[n_train:, :]

trainy, testy = y[:n_train], y[n_train:]




# define model

model = Sequential()

model.add(Dense(500, input_dim=2, 
                
                activation='relu', 
                
                kernel_regularizer=l2(0.001)))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              
              optimizer='adam', metrics=['accuracy'])



# fit model

history = model.fit(trainX, trainy, epochs=4000, 
                    
                    validation_data=(testX, testy), 
                    
                    verbose=0)



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))





# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()



--

--

---

13.3.4 

Grid Search Regularization Hyperparameter
--

rid Search Regularization values for moons dataset

---

--

--

--

--

In [None]:
# grid search regularization values for moons dataset

from sklearn.datasets import make_moons
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l2
from matplotlib import pyplot



# generate 2d classification dataset

X, y = make_moons(n_samples=100, noise=0.2, random_state=1)



# split into train and test

n_train = 30

trainX, testX = X[:n_train, :], X[n_train:, :]

trainy, testy = y[:n_train], y[n_train:]



# grid search values

values = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6]

all_train, all_test = list(), list()



for param in values:

    # define model

    model = Sequential()
    
    model.add(Dense(500, input_dim=2, 
                    
                    activation='relu', 
                    
                    kernel_regularizer=l2(param)))
        
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', 
                  
                  optimizer='adam', metrics=['accuracy'])
    
    # fit model

    model.fit(trainX, trainy, epochs=4000, verbose=0)
    
    # evaluate the model

    _, train_acc = model.evaluate(trainX, trainy, verbose=0)
    
    _, test_acc = model.evaluate(testX, testy, verbose=0)
    
    print('Param: %f, Train: %.3f, Test: %.3f' % (param, train_acc, test_acc))
        
    all_train.append(train_acc)
        
    all_test.append(test_acc)




# plot train and test means

pyplot.semilogx(values, all_train, label='train', marker='o')

pyplot.semilogx(values, all_test, label='test', marker='o')

pyplot.legend()

pyplot.show()





--

--

---

14.3.2 

Overfit Multi-Layer Perceptron
--

MLP overfit on the two-circles dataset

---

--

--

--

--

In [None]:
# mlp overfit on the two circles dataset

from sklearn.datasets import make_circles
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from matplotlib import pyplot as plt




# generate 2d classification dataset

X, y = make_circles(n_samples=100, noise=0.1, random_state=1)



# split into train and test

n_train = 30
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(500, input_dim=2, activation='relu'))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              
              optimizer='adam', metrics=['accuracy'])



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 

                    epochs=4000, verbose=0)




# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()



--

--

---

14.3.3 

Overfit Multi-Layer Perceptron with

Activation Regularization
--

MLP overfit on the two-circles dataset with activation regularisation BEFORE activation

---

--

--

--

--

In [None]:
# mlp overfit on the two circles dataset
# with ACTIVATION REGULARIZATION before ACTIVATION

from sklearn.datasets import make_circles
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l1
from tensorflow.keras.layers import Activation
from matplotlib import pyplot as plt




# generate 2d classification dataset

X, y = make_circles(n_samples=100, noise=0.1, random_state=1)



# split into train and test

n_train = 30
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(500, input_dim=2, activation='linear',
                
                activity_regularizer=l1(0.0001)))

model.add(Activation('relu'))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              
              optimizer='adam', metrics=['accuracy'])



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 

                    epochs=4000, verbose=0)




# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()



--

--

---

14.3.3 B

Overfit Multi-Layer Perceptron with

Activation Regularization
--

MLP overfit on the two-circles dataset with activation regularisation AFTER activation

---

--

--

--

--

In [None]:
# mlp overfit on the two circles dataset
# with ACTIVATION REGULARIZATION after ACTIVATION

from sklearn.datasets import make_circles
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l1
from tensorflow.keras.layers import Activation
from matplotlib import pyplot as plt




# generate 2d classification dataset

X, y = make_circles(n_samples=100, noise=0.1, random_state=1)



# split into train and test

n_train = 30
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(500, input_dim=2, activation='relu',
                
                activity_regularizer=l1(0.0001)))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              
              optimizer='adam', metrics=['accuracy'])



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 

                    epochs=4000, verbose=0)




# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()



--

--

---

15.3.2

Overfit Multi-Layer Perceptron 
--

MLP overfit on the moons dataset 

---

--

--

--

--

In [None]:
# mlp overfit on the moons dataset

from sklearn.datasets import make_moons
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from matplotlib import pyplot as plt



# generate 2d classification dataset

X, y = make_moons(n_samples=100, noise=0.2, random_state=1)



# split into train and test

n_train = 30
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(500, input_dim=2, activation='relu'))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              
              optimizer='adam', metrics=['accuracy'])



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=4000, verbose=0)



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()



--

--

---

15.3.3

Overfit Multi-Layer Perceptron 

with Weight Constraint
--

MLP overfit on the moons dataset with a unit form constraint

---

--

--

--

--

In [None]:
# mlp overfit on the moons dataset with a unit norm constraint

from sklearn.datasets import make_moons
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.constraints import unit_norm
from matplotlib import pyplot as plt



# generate 2d classification dataset

X, y = make_moons(n_samples=100, noise=0.2, random_state=1)



# split into train and test

n_train = 30
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(500, input_dim=2, 
                
                activation='relu', 
                
                kernel_constraint=unit_norm()))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
               
              optimizer='adam',

              metrics=['accuracy'])



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=4000, verbose=0)



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()



--

--

---

16.3.2

Overfit Multi-Layer Perceptron 


--

MLP overfit on the two circles dataset 


---
Please CHECK OUT THIS POWERFUL DROP OUT EXAMPLE

https://machinelearningmastery.com/dropout-regularization-deep-learning-models-keras/

---

--

--

--

--

In [None]:
# mlp overfit on the two circles dataset

from sklearn.datasets import make_circles
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from matplotlib import pyplot as plt



# generate 2d classification dataset

X, y = make_circles(n_samples=100, noise=0.1, random_state=1)



# split into train and test

n_train = 30
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(500, input_dim=2, activation='relu'))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              
              optimizer='adam', metrics=['accuracy'])



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=4000, verbose=0)



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()



--

--

---

16.3.3

Multi-Layer Perceptron with Dropout Regularization


--

MLP with DROP OUT on the two circles dataset 


---
Please CHECK OUT THIS POWERFUL DROP OUT EXAMPLE

https://machinelearningmastery.com/dropout-regularization-deep-learning-models-keras/

---

--

--

--

--

In [None]:
# mlp overfit on the two circles dataset

from sklearn.datasets import make_circles
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout
from matplotlib import pyplot as plt



# generate 2d classification dataset

X, y = make_circles(n_samples=100, noise=0.1, random_state=1)



# split into train and test

n_train = 30
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(500, input_dim=2, activation='relu'))

model.add(Dropout(0.4))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              
              optimizer='adam', metrics=['accuracy'])



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=4000, verbose=0)



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))



# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()



--

--

---

17.3.2

Overfit Multi-Layer Perceptron
--

MLP overfit of the two circles dataset
---

--

--

--

--

In [None]:
# mlp overfit on the two circles dataset

from sklearn.datasets import make_circles
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from matplotlib import pyplot as plt



# generate 2d classification dataset

X, y = make_circles(n_samples=100, noise=0.1, random_state=1)



# split into train and test

n_train = 30
trainX, testX = X[: n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model
model = Sequential()

model.add(Dense(500, input_dim=2, activation='relu'))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              
              optimizer='adam', 
              
              metrics=['accuracy'])



# fit model


history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=4000, verbose=0)



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()





--

--

---

17.3.3

Multi-Layer Perceptron with input layer noise

--

MLP overfit of the two circles dataset with input noise

---

--

--

--

--

In [None]:
# mlp overfit on the two circles dataset with input noise

from sklearn.datasets import make_circles
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import GaussianNoise
from matplotlib import pyplot as plt




# generate 2d classification dataset

X, y = make_circles(n_samples=100, noise=0.1, random_state=1)



# split into train and test

n_train = 30
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(GaussianNoise(0.01, input_shape=(2,)))

model.add(Dense(500, activation='relu'))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              
              optimizer='adam', 
              
              metrics=['accuracy'])



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=4000, verbose=0)



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()





--

--

---

17.3.4

Multi-Layer Perceptron with Hidden Layer Noise

--

MLP overfit of the two circles dataset with Hidden Layer Noise

---

--

--

--

--

In [None]:
# mlp overfit on the two circles dataset with 
# hidden layer noise

from sklearn.datasets import make_circles
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import GaussianNoise
from matplotlib import pyplot as plt




# generate 2d classification dataset

X, y = make_circles(n_samples=100, noise=0.1, random_state=1)



# split into train and test

n_train = 30
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(500, input_dim=2))

model.add(GaussianNoise(0.1))

model.add(Activation('relu'))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              
              optimizer='adam', 
              
              metrics=['accuracy'])



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=4000, verbose=0)



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()





--

--

---

17.3.4 B

Alternative

--

MLP Overfit on the two circles dataset with hidden layer noise (alternative)

---

--

--

--

In [None]:
# mlp overfit on the two circles dataset with 
# hidden layer noise (alternative)

from sklearn.datasets import make_circles
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import GaussianNoise
from matplotlib import pyplot as plt




# generate 2d classification dataset

X, y = make_circles(n_samples=100, noise=0.1, random_state=1)



# split into train and test

n_train = 30
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(500, input_dim=2, activation='relu'))

model.add(GaussianNoise(0.1))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              
              optimizer='adam', 
              
              metrics=['accuracy'])



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=4000, verbose=0)



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()





--

--

---

18.3.2

Overfit Multi-Layer Perceptron 

--

MLP overfit on the moons dataset 

---

--

--

--

--

In [None]:
# mlp overfit on the moons dataset

from sklearn.datasets import make_moons
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from matplotlib import pyplot as plt



# generate 2d classification dataset

X, y = make_moons(n_samples=100, noise=0.2, random_state=1)



# split into train and test

n_train = 30
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(500, input_dim=2, activation='relu'))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              
              optimizer='adam', 
              
              metrics=['accuracy'])



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=4000, verbose=0)



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()





--

--

---

18.3.3

Overfit MLP with Early Stopping

--

MLP overfit on the moons dataset with SIMPLE EARLY STOPPING

---

--

--

--

--

In [None]:
# mlp overfit on the moons dataset with ''SIMPLE'' early stopping



# NOTE THAT as opposed to PAGE 358 - 359 of 

# Better Deep Learning, a patience of 

#1 (negligible, almost zero) has been used to obtain 

# similar results.





from sklearn.datasets import make_moons
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from matplotlib import pyplot as plt




# generate 2d classification dataset

X, y = make_moons(n_samples=100, noise=0.2, random_state=1)



# split into train and test

n_train = 30
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(500, input_dim=2, activation='relu'))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              
              optimizer='adam', metrics=['accuracy'])



# patient early stopping

es = EarlyStopping(monitor='val_loss', 
                   
                   mode='min', 
                   
                   verbose=1, patience=1)



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=4000, 
                    
                    verbose=0, callbacks=[es])



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()










--

--

---

18.3.3

B. 

Overfit MLP with Early Stopping

--

MLP overfit on the moons dataset with ...PATIENT... EARLY STOPPING

FIT IN THE CHANGES (add the patience argument)

---

--

--

--

--

In [None]:
# mlp overfit on the moons dataset with patient early stopping

from sklearn.datasets import make_moons
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from matplotlib import pyplot as plt




# generate 2d classification dataset

X, y = make_moons(n_samples=100, noise=0.2, random_state=1)



# split into train and test

n_train = 30
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(500, input_dim=2, activation='relu'))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              
              optimizer='adam', metrics=['accuracy'])



# patient early stopping

es = EarlyStopping(monitor='val_loss', 
                   
                   mode='min', 
                   
                   verbose=1, patience=200)



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=4000, 
                    
                    verbose=0, callbacks=[es])



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()






--

--

---

18.3.3

C. 

Overfit MLP with Early Stopping

--

MLP overfit on the moons dataset with ...PATIENT... EARLY STOPPING and MODEL CHECKPOINTING

---

--

--

--

--

In [None]:
# mlp overfit on the moons dataset with 
# patient early stopping and model checkpointing

from sklearn.datasets import make_moons
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model



# generate 2d classification dataset

X, y = make_moons(n_samples=100, noise=0.2, random_state=1)



# split into train and test

n_train = 30
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(500, input_dim=2, activation='relu'))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', 
              
              optimizer='adam', 
              
              metrics=['accuracy'])



# simple early stopping

es = EarlyStopping(monitor='val_loss', 
                   
                   mode='min', 
                   
                   verbose=1, patience=200)


mc = ModelCheckpoint('best_model.h5', 
                     
                     monitor='val_accuracy', 
                     
                     mode='max', verbose=1, 
                     
                     save_best_only=True)



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=4000, verbose=0, 
                    
                    callbacks=[es, mc])



# load the saved model

saved_model = load_model('best_model.h5')




# evaluate the model

_, train_acc = saved_model.evaluate(trainX, trainy, verbose=0)

_, test_acc = saved_model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




--

--

---

20.3.2
 

MLP Model for Multiclass Classification

--

fit high variance MLP on blobs classification problem

---

--

--

--

--

In [None]:
# fit high variance mlp on blobs classification problem

from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot as plt



# generate 2d classification dataset

X, y = make_blobs(n_samples=500, centers=3, 
                  
                  n_features=2, 
                  
                  cluster_std=2, random_state=2)

y = to_categorical(y)



# split into train and test

n_train = int(0.3 * X.shape[0])
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(15, input_dim=2, activation='relu'))

model.add(Dense(3, activation='softmax'))

model.compile(loss='categorical_crossentropy', 
              
              optimizer='adam', 
              
              metrics=['accuracy'])



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=200, verbose=0)



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()




--
 
--

--

--

---
20.3.3

High Variance MLP Model 

--

demonstrate high variance of MLP on blobs classification problem

---

--

--

--

--

In [None]:
# demonstrate high variance of mlp model on 
# blobs classification problem

from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from numpy import mean
from numpy import std
from matplotlib import pyplot



# fit and evaluate a neural net model on the dataset

def evaluate_model(trainX, trainy, testX, testy):

	# define model

	model = Sequential()
 
	model.add(Dense(15, input_dim=2, activation='relu'))
 
	model.add(Dense(3, activation='softmax'))
 
	model.compile(loss='categorical_crossentropy', 
               
               optimizer='adam', metrics=['accuracy'])
 
	# fit model

	model.fit(trainX, trainy, epochs=200, verbose=0)
 
	# evaluate the model

	_, test_acc = model.evaluate(testX, testy, verbose=0)
 
	return test_acc




# generate 2d classification dataset

X, y = make_blobs(n_samples=500, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)

y = to_categorical(y)




# split into train and test

n_train = int(0.3 * X.shape[0])
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# repeated evaluation

n_repeats = 30

scores = list()


for _ in range(n_repeats):

	score = evaluate_model(trainX, trainy, testX, testy)
 
	print('> %.3f' % score)
 
	scores.append(score)
 


# summarize the distribution of scores

print('Scores Mean: %.3f, Standard Deviation: %.3f' % (mean(scores), std(scores)))



# histogram of distribution

pyplot.hist(scores, bins=10)

pyplot.show()

# boxplot of distribution

pyplot.boxplot(scores)

pyplot.show()



--
 
--

--

--

---
20.3.4

Model Averaging Ensemble 

--

Model Averaging Ensemble and a study of ensemble size on model accuracy

---

--

--

--

--

In [None]:
# model averaging ensemble and a study of ensemble 
# size on test accuracy

from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy
from numpy import array
from numpy import argmax
from sklearn.metrics import accuracy_score
from matplotlib import pyplot



# fit model on dataset

def fit_model(trainX, trainy):

	# define model

	model = Sequential()
 
	model.add(Dense(15, input_dim=2, activation='relu'))
 
	model.add(Dense(3, activation='softmax'))
 
	model.compile(loss='categorical_crossentropy', 
               
               optimizer='adam', metrics=['accuracy'])
	
    # fit model

	model.fit(trainX, trainy, epochs=200, verbose=0)
 
	return model




# make an ensemble prediction for multi-class classification

def ensemble_predictions(members, testX):

	# make predictions

	yhats = [model.predict(testX) for model in members]

	yhats = array(yhats)
 
	# sum across ensemble members

	summed = numpy.sum(yhats, axis=0)
 
	# argmax across classes

	result = argmax(summed, axis=1)
 
	return result




# evaluate a specific number of members in an ensemble

def evaluate_n_members(members, n_members, testX, testy):

	# select a subset of members

	subset = members[:n_members]

	print(len(subset))
 
	# make prediction

	yhat = ensemble_predictions(subset, testX)
 
	# calculate accuracy

	return accuracy_score(testy, yhat)
 



# generate 2d classification dataset

X, y = make_blobs(n_samples=500, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)



# split into train and test

n_train = int(0.3 * X.shape[0])
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]
trainy = to_categorical(trainy)




# fit all models

n_members = 20

members = [fit_model(trainX, trainy) for _ in range(n_members)]




# evaluate different numbers of ensembles

scores = list()

for i in range(1, n_members+1):

	score = evaluate_n_members(members, i, testX, testy)
 
	print('> %.3f' % score)
 
	scores.append(score)
 



# plot score vs number of ensemble members

x_axis = [i for i in range(1, n_members+1)]

pyplot.plot(x_axis, scores)

pyplot.show()



--

--

---

20.3.4

Model Averaging Ensemble

--

Finally, we can update the repeated evaluation experiment to use an ensemble of five models instead of a single model and compare the distribution of scores.

The complete example of a repeated evaluated five-member ensemble of the blobs dataset is listed below.

---

--

--

In [None]:
# repeated evaluation of model averaging 
# ensemble on blobs dataset

from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy
from numpy import array
from numpy import argmax
from numpy import mean
from numpy import std
from sklearn.metrics import accuracy_score



# fit model on dataset

def fit_model(trainX, trainy):

	# define model

	model = Sequential()
 
	model.add(Dense(15, input_dim=2, activation='relu'))
 
	model.add(Dense(3, activation='softmax'))
 
	model.compile(loss='categorical_crossentropy', 
               
               optimizer='adam', metrics=['accuracy'])
 
	# fit model

	model.fit(trainX, trainy, epochs=200, verbose=0)
 
	return model




# make an ensemble prediction for multi-class classification

def ensemble_predictions(members, testX):

	# make predictions

	yhats = [model.predict(testX) for model in members]

	yhats = array(yhats)
 
	# sum across ensemble members

	summed = numpy.sum(yhats, axis=0)
 
	# argmax across classes

	result = argmax(summed, axis=1)
 
	return result




# evaluate ensemble model

def evaluate_members(members, testX, testy):

	# make prediction

	yhat = ensemble_predictions(members, testX)
 
	# calculate accuracy

	return accuracy_score(testy, yhat)
 



# generate 2d classification dataset

X, y = make_blobs(n_samples=500, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)



# split into train and test

n_train = int(0.3 * X.shape[0])
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]
trainy = to_categorical(trainy)




# repeated evaluation

n_repeats = 30
n_members = 5
scores = list()


for _ in range(n_repeats):

	# fit all models

	members = [fit_model(trainX, trainy) for _ in range(n_members)]

	# evaluate ensemble

	score = evaluate_members(members, testX, testy)
 
	print('> %.3f' % score)
 
	scores.append(score)
 



# summarize the distribution of scores

print('Scores Mean: %.3f, Standard Deviation: %.3f' % (mean(scores), std(scores)))





--
 
--

--

--

---
21.2.2

MLP Model 

--

Develop an MLP for blobs dataset

---

--

--

--

--

In [None]:
# develop an mlp for blobs dataset

from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot as plt


# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)



# one hot encode output variable

y = to_categorical(y)



# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]
print(trainX.shape, testX.shape)




# define model

model = Sequential()

model.add(Dense(25, input_dim=2, activation='relu'))

model.add(Dense(3, activation='softmax'))

model.compile(loss='categorical_crossentropy', 
              
              optimizer='adam', metrics=['accuracy'])



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=500, verbose=0)




# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()





--
 
--

--

--

---
21.2.3

Model Averaging Ensemble

--

Model Averaging Ensemble for the blobs dataset

---

--

--

--

--

In [None]:
# model averaging ensemble for the blobs dataset

from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot
from numpy import mean
from numpy import std
import numpy
from numpy import array
from numpy import argmax



# fit model on dataset

def fit_model(trainX, trainy):

	trainy_enc = to_categorical(trainy)
 
	# define model

	model = Sequential()
 
	model.add(Dense(25, input_dim=2, activation='relu'))
 
	model.add(Dense(3, activation='softmax'))
 
	model.compile(loss='categorical_crossentropy', 
               
               optimizer='adam', metrics=['accuracy'])
 
	# fit model

	model.fit(trainX, trainy_enc, epochs=500, verbose=0)
 
	return model




# make an ensemble prediction for multi-class classification

def ensemble_predictions(members, testX):

	# make predictions

	yhats = [model.predict(testX) for model in members]

	yhats = array(yhats)
 
	# sum across ensemble members

	summed = numpy.sum(yhats, axis=0)
 
	# argmax across classes

	result = argmax(summed, axis=1)
 
	return result




# evaluate a specific number of members in an ensemble

def evaluate_n_members(members, n_members, testX, testy):

	# select a subset of members

	subset = members[:n_members]

	# make prediction

	yhat = ensemble_predictions(subset, testX)
 
	# calculate accuracy

	return accuracy_score(testy, yhat)
 



# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)




# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]
print(trainX.shape, testX.shape)



# fit all models

n_members = 10

members = [fit_model(trainX, trainy) for _ in range(n_members)]




# evaluate different numbers of ensembles on hold out set

single_scores, ensemble_scores = list(), list()

for i in range(1, len(members)+1):

    # evaluate model with i members

    ensemble_score = evaluate_n_members(members, i, testX, testy)

    # evaluate the i'th model standalone

    testy_enc = to_categorical(testy)

    _, single_score = members[i-1].evaluate(testX, 
                                        
                                        testy_enc, verbose=0)

    # summarize this step

    print('> %d: single=%.3f, ensemble=%.3f' % (i, single_score, ensemble_score))

    ensemble_scores.append(ensemble_score)

    single_scores.append(single_score)



# summarize average accuracy of a single final model

print('Accuracy %.3f (%.3f)' % (mean(single_scores), std(single_scores)))



# plot score vs number of ensemble members

x_axis = [i for i in range(1, len(members)+1)]

pyplot.plot(x_axis, single_scores, marker='o', linestyle='None')

pyplot.plot(x_axis, ensemble_scores, marker='o')

pyplot.show()




--
 
--

--

--

---
21.2.4

Grid Search Weighted Average Ensemble

--

Grid Search for Coefficients in a Weighted Average Ensemble for the blobs problem

---

--

--

--

--

In [None]:
# grid search for coefficients in a weighted average 
# ensemble for the blobs problem

from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot
from numpy import mean
from numpy import std
from numpy import array
from numpy import argmax
from numpy import tensordot
from numpy.linalg import norm
from itertools import product



# fit model on dataset

def fit_model(trainX, trainy):

	trainy_enc = to_categorical(trainy)
 
	# define model

	model = Sequential()
 
	model.add(Dense(25, input_dim=2, activation='relu'))
 
	model.add(Dense(3, activation='softmax'))
 
	model.compile(loss='categorical_crossentropy', 
               
               optimizer='adam', metrics=['accuracy'])
 
	# fit model

	model.fit(trainX, trainy_enc, epochs=500, verbose=0)
 
	return model




# make an ensemble prediction for multi-class classification

def ensemble_predictions(members, weights, testX):

	# make predictions
    

	yhats = [model.predict(testX) for model in members]

	yhats = array(yhats)
 
	# weighted sum across ensemble members

	summed = tensordot(yhats, weights, axes=((0),(0)))
 
	# argmax across classes

	result = argmax(summed, axis=1)
 
	return result




# evaluate a specific number of members in an ensemble

def evaluate_ensemble(members, weights, testX, testy):

	# make prediction

	yhat = ensemble_predictions(members, weights, testX)
 
	# calculate accuracy

	return accuracy_score(testy, yhat)
 


# normalize a vector to have unit norm


def normalize(weights):

	# calculate l1 vector norm

	result = norm(weights, 1)
 
	# check for a vector of all zeros

	if result == 0.0:

		return weights

	# return normalized vector (unit norm)

	return weights / result




# grid search weights

def grid_search(members, testX, testy):

	# define weights to consider

	w = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

	best_score, best_weights = 0.0, None

	# iterate all possible combinations (cartesian product)

	for weights in product(w, repeat=len(members)):
     
		# skip if all weights are equal

		if len(set(weights)) == 1:

			continue

		# hack, normalize weight vector

		weights = normalize(weights)
  
		# evaluate weights

		score = evaluate_ensemble(members, weights, testX, testy)
  
		if score > best_score:

			best_score, best_weights = score, weights

			print('>%s %.3f' % (best_weights, best_score))
   
	return list(best_weights)
 




# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)



# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]
print(trainX.shape, testX.shape)




# fit all models

n_members = 5

members = [fit_model(trainX, trainy) for _ in range(n_members)]




# evaluate each single model on the test set

testy_enc = to_categorical(testy)

for i in range(n_members):

	_, test_acc = members[i].evaluate(testX, testy_enc, verbose=0)
 
	print('Model %d: %.3f' % (i+1, test_acc))
 


# evaluate averaging ensemble (equal weights)

weights = [1.0/n_members for _ in range(n_members)]

score = evaluate_ensemble(members, weights, testX, testy)

print('Equal Weights Score: %.3f' % score)




# grid search weights

weights = grid_search(members, testX, testy)

score = evaluate_ensemble(members, weights, testX, testy)

print('Grid Search Weights: %s, Score: %.3f' % (weights, score))




--

--

--

---
21.2.5

Weighted Average MLP Ensemble

--

Global Optimization to find coefficients for weighted ensemble on blobs problem.

---

--

--

--

--

In [None]:
# global optimization to find coefficients 
# for weighted ensemble on blobs problem

from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot
from numpy import mean
from numpy import std
from numpy import array
from numpy import argmax
from numpy import tensordot
from numpy.linalg import norm
from scipy.optimize import differential_evolution




# fit model on dataset

def fit_model(trainX, trainy):

	trainy_enc = to_categorical(trainy)
 
	# define model

	model = Sequential()
	model.add(Dense(25, input_dim=2, activation='relu'))
	model.add(Dense(3, activation='softmax'))
 
	model.compile(loss='categorical_crossentropy', 
               
               optimizer='adam', metrics=['accuracy'])
 
	# fit model

	model.fit(trainX, trainy_enc, epochs=500, verbose=0)
 
	return model




# make an ensemble prediction for multi-class classification

def ensemble_predictions(members, weights, testX):

	# make predictions

	yhats = [model.predict(testX) for model in members]

	yhats = array(yhats)
 
	# weighted sum across ensemble members

	summed = tensordot(yhats, weights, axes=((0),(0)))
 
	# argmax across classes

	result = argmax(summed, axis=1)
 
	return result



# # evaluate a specific number of members in an ensemble

def evaluate_ensemble(members, weights, testX, testy):

	# make prediction

	yhat = ensemble_predictions(members, weights, testX)
 
	# calculate accuracy

	return accuracy_score(testy, yhat)
 



# normalize a vector to have unit norm

def normalize(weights):

	# calculate l1 vector norm

	result = norm(weights, 1)
 
	# check for a vector of all zeros

	if result == 0.0:

		return weights

	# return normalized vector (unit norm)

	return weights / result




# loss function for optimization process, designed to be minimized

def loss_function(weights, members, testX, testy):

	# normalize weights

	normalized = normalize(weights)
 
	# calculate error rate

	return 1.0 - evaluate_ensemble(members, normalized, testX, testy)
 



# generate 2d classification dataset


X, y = make_blobs(n_samples=1100, 
                  
                  centers=3, n_features=2, 
                  
                  cluster_std=2, random_state=2)



# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]

print(trainX.shape, testX.shape)



# fit all models

n_members = 5

members = [fit_model(trainX, trainy) for _ in range(n_members)]




# evaluate each single model on the test set

testy_enc = to_categorical(testy)


for i in range(n_members):

	_, test_acc = members[i].evaluate(testX, testy_enc, verbose=0)
 
	print('Model %d: %.3f' % (i+1, test_acc))
 


# evaluate averaging ensemble (equal weights)

weights = [1.0/n_members for _ in range(n_members)]

score = evaluate_ensemble(members, weights, testX, testy)

print('Equal Weights Score: %.3f' % score)




# define bounds on each weight

bound_w = [(0.0, 1.0)  for _ in range(n_members)]




# arguments to the loss function

search_arg = (members, testX, testy)



# global optimization of ensemble weights

result = differential_evolution(loss_function, bound_w, 
                                
                                search_arg, 
                                
                                maxiter=1000, tol=1e-7)




# get the chosen weights

weights = normalize(result['x'])

print('Optimized Weights: %s' % weights)



# evaluate chosen weights

score = evaluate_ensemble(members, weights, testX, testy)

print('Optimized Weights Score: %.3f' % score)




--

--

--

---
22.2.2

Single MLP Model

--

develop an MLP for blobs dataset

---

--

--

--

--

In [None]:
# develop an mlp for blobs dataset

from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot as plt



# generate 2d classification dataset

X, y = make_blobs(n_samples=1000, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)




# one hot encode output variable

y = to_categorical(y)




# split into train and test

n_train = int(0.9 * X.shape[0])
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]




# define model

model = Sequential()
model.add(Dense(50, input_dim=2, activation='relu'))
model.add(Dense(3, activation='softmax'))

model.compile(loss='categorical_crossentropy', 
              
              optimizer='adam', metrics=['accuracy'])




# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=50, verbose=0)



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)
_, test_acc = model.evaluate(testX, testy, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))





# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()




--

--

--

---
22.2.3

Random Splits Ensemble

--

Random Splits MLP Ensemble on blobs dataset

---

--

--

--

--

In [None]:
# random-splits mlp ensemble on blobs dataset

from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot
from numpy import mean
from numpy import std
import numpy
from numpy import array
from numpy import argmax




# evaluate a single mlp model

def evaluate_model(trainX, trainy, testX, testy):

	# encode targets

	trainy_enc = to_categorical(trainy)
 
	testy_enc = to_categorical(testy)
 
	# define model

	model = Sequential()
 
	model.add(Dense(50, input_dim=2, activation='relu'))
 
	model.add(Dense(3, activation='softmax'))
 
	model.compile(loss='categorical_crossentropy', 
               
               optimizer='adam', metrics=['accuracy'])
 
	# fit model

	model.fit(trainX, trainy_enc, epochs=50, verbose=0)
 
	# evaluate the model

	_, test_acc = model.evaluate(testX, testy_enc, verbose=0)
 
	return model, test_acc




# make an ensemble prediction for multi-class classification

def ensemble_predictions(members, testX):

	# make predictions

	yhats = [model.predict(testX) for model in members]

	yhats = array(yhats)
 
	# sum across ensemble members

	summed = numpy.sum(yhats, axis=0)
 
	# argmax across classes

	result = argmax(summed, axis=1)
 
	return result




# evaluate a specific number of members in an ensemble

def evaluate_n_members(members, n_members, testX, testy):

	# select a subset of members

	subset = members[:n_members]

	# make prediction

	yhat = ensemble_predictions(subset, testX)
 
	# calculate accuracy

	return accuracy_score(testy, yhat)
 



# generate 2d classification dataset

dataX, datay = make_blobs(n_samples=55000, 
                          
                          centers=3, n_features=2, 
                          
                          cluster_std=2, random_state=2)

X, newX = dataX[:5000, :], dataX[5000:, :]

y, newy = datay[:5000], datay[5000:]




# multiple train-test splits

n_splits = 10

scores, members = list(), list()

for _ in range(n_splits):

	# split data

	trainX, testX, trainy, testy = train_test_split(X, y, 
                                                 
                                                 test_size=0.10)
 
	# evaluate model

	model, test_acc = evaluate_model(trainX, 
                                  
                                  trainy, testX, testy)
 
	print('>%.3f' % test_acc)
 
	scores.append(test_acc)
 
	members.append(model)
 



# summarize expected performance

print('Estimated Accuracy %.3f (%.3f)' % (mean(scores), std(scores)))




# evaluate different numbers of ensembles on hold out set

single_scores, ensemble_scores = list(), list()

for i in range(1, n_splits+1):

	ensemble_score = evaluate_n_members(members, i, newX, newy)
 
	newy_enc = to_categorical(newy)
 
	_, single_score = members[i-1].evaluate(newX, 
                                         
                                         newy_enc, verbose=0)
 
	print('> %d: single=%.3f, ensemble=%.3f' % (i, single_score, ensemble_score))
 
	ensemble_scores.append(ensemble_score)
 
	single_scores.append(single_score)
 


# plot score vs number of ensemble members

print('Accuracy %.3f (%.3f)' % (mean(single_scores), std(single_scores)))

x_axis = [i for i in range(1, n_splits+1)]

pyplot.plot(x_axis, single_scores, marker='o', linestyle='None')

pyplot.plot(x_axis, ensemble_scores, marker='o')

pyplot.show()





--

--

--

---
22.2.4

Cross-Validation Ensemble

--

Cross-Validation MLP Ensemble on blobs dataset

---

--

--

--

--

In [None]:
# cross-validation mlp ensemble on blobs dataset

from sklearn.datasets import make_blobs
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot
from numpy import mean
from numpy import std
import numpy
from numpy import array
from numpy import argmax



# evaluate a single mlp model

def evaluate_model(trainX, trainy, testX, testy):

	# encode targets

	trainy_enc = to_categorical(trainy)
 
	testy_enc = to_categorical(testy)
 
	# define model

	model = Sequential()
 
	model.add(Dense(50, input_dim=2, activation='relu'))
 
	model.add(Dense(3, activation='softmax'))
 
	model.compile(loss='categorical_crossentropy', 
               
               optimizer='adam', metrics=['accuracy'])
 
	# fit model

	model.fit(trainX, trainy_enc, epochs=50, verbose=0)
 
	# evaluate the model

	_, test_acc = model.evaluate(testX, testy_enc, verbose=0)
 
	return model, test_acc





# make an ensemble prediction for multi-class classification

def ensemble_predictions(members, testX):

	# make predictions

	yhats = [model.predict(testX) for model in members]

	yhats = array(yhats)
 
	# sum across ensemble members

	summed = numpy.sum(yhats, axis=0)
 
	# argmax across classes

	result = argmax(summed, axis=1)
 
	return result





# evaluate a specific number of members in an ensemble

def evaluate_n_members(members, n_members, testX, testy):

	# select a subset of members

	subset = members[:n_members]

	# make prediction

	yhat = ensemble_predictions(subset, testX)
 
	# calculate accuracy

	return accuracy_score(testy, yhat)
 



# generate 2d classification dataset

dataX, datay = make_blobs(n_samples=55000, 
                          
                          centers=3, n_features=2, 
                          
                          cluster_std=2, random_state=2)

X, newX = dataX[:5000, :], dataX[5000:, :]

y, newy = datay[:5000], datay[5000:]



# prepare the k-fold cross-validation configuration
n_folds = 10
kfold = KFold(n_folds, True, 1)



# cross validation estimation of performance

scores, members = list(), list()

for train_ix, test_ix in kfold.split(X):

	# select samples

	trainX, trainy = X[train_ix], y[train_ix]

	testX, testy = X[test_ix], y[test_ix]

	# evaluate model

	model, test_acc = evaluate_model(trainX, trainy, testX, testy)
 
	print('>%.3f' % test_acc)
 
	scores.append(test_acc)
 
	members.append(model)
 
# summarize expected performance

print('Estimated Accuracy %.3f (%.3f)' % (mean(scores), std(scores)))

# evaluate different numbers of ensembles on hold out set

single_scores, ensemble_scores = list(), list()

for i in range(1, n_folds+1):

	ensemble_score = evaluate_n_members(members, i, newX, newy)
 
	newy_enc = to_categorical(newy)
 
	_, single_score = members[i-1].evaluate(newX, 
                                         
                                         newy_enc, verbose=0)
 
	print('> %d: single=%.3f, ensemble=%.3f' % (i, single_score, ensemble_score))
 
	ensemble_scores.append(ensemble_score)
 
	single_scores.append(single_score)
 



# plot score vs number of ensemble members

print('Accuracy %.3f (%.3f)' % (mean(single_scores), std(single_scores)))

x_axis = [i for i in range(1, n_folds+1)]

pyplot.plot(x_axis, single_scores, marker='o', linestyle='None')

pyplot.plot(x_axis, ensemble_scores, marker='o')

pyplot.show()




--

--

--

---
22.2.5

Bagging Ensemble

--

Bagging MLP Ensemble on blobs dataset

---

--

--

--

In [None]:
# bagging mlp ensemble on blobs dataset
from sklearn.datasets import make_blobs
from sklearn.utils import resample
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot
from numpy import mean
from numpy import std
import numpy
from numpy import array
from numpy import argmax




# evaluate a single mlp model

def evaluate_model(trainX, trainy, testX, testy):

	# encode targets

	trainy_enc = to_categorical(trainy)
 
	testy_enc = to_categorical(testy)
 
	# define model

	model = Sequential()
 
	model.add(Dense(50, input_dim=2, activation='relu'))
 
	model.add(Dense(3, activation='softmax'))
 
	model.compile(loss='categorical_crossentropy', 
               
               optimizer='adam', metrics=['accuracy'])
 
	# fit model

	model.fit(trainX, trainy_enc, epochs=50, verbose=0)
 
	# evaluate the model

	_, test_acc = model.evaluate(testX, testy_enc, verbose=0)
 
	return model, test_acc




# make an ensemble prediction for multi-class classification

def ensemble_predictions(members, testX):

	# make predictions

	yhats = [model.predict(testX) for model in members]

	yhats = array(yhats)
 
	# sum across ensemble members

	summed = numpy.sum(yhats, axis=0)
 
	# argmax across classes

	result = argmax(summed, axis=1)
 
	return result




# evaluate a specific number of members in an ensemble

def evaluate_n_members(members, n_members, testX, testy):

	# select a subset of members

	subset = members[:n_members]

	# make prediction

	yhat = ensemble_predictions(subset, testX)
 
	# calculate accuracy

	return accuracy_score(testy, yhat)
 



# generate 2d classification dataset

dataX, datay = make_blobs(n_samples=55000, 
                          
                          centers=3, n_features=2, 
                          
                          cluster_std=2, random_state=2)

X, newX = dataX[:5000, :], dataX[5000:, :]

y, newy = datay[:5000], datay[5000:]




# multiple train-test splits

n_splits = 10

scores, members = list(), list()

for _ in range(n_splits):

	# select indexes

	ix = [i for i in range(len(X))]

	train_ix = resample(ix, replace=True, n_samples=4500)
 
	test_ix = [x for x in ix if x not in train_ix]

	# select data

	trainX, trainy = X[train_ix], y[train_ix]

	testX, testy = X[test_ix], y[test_ix]

	# evaluate model

	model, test_acc = evaluate_model(trainX, trainy, testX, testy)
 
	print('>%.3f' % test_acc)
 
	scores.append(test_acc)
 
	members.append(model)
 



# summarize expected performance

print('Estimated Accuracy %.3f (%.3f)' % (mean(scores), std(scores)))




# evaluate different numbers of ensembles on hold out set

single_scores, ensemble_scores = list(), list()

for i in range(1, n_splits+1):

	ensemble_score = evaluate_n_members(members, i, newX, newy)
 
	newy_enc = to_categorical(newy)
 
	_, single_score = members[i-1].evaluate(newX, newy_enc, verbose=0)
 
	print('> %d: single=%.3f, ensemble=%.3f' % (i, single_score, ensemble_score))
 
	ensemble_scores.append(ensemble_score)
 
	single_scores.append(single_score)
 



# plot score vs number of ensemble members

print('Accuracy %.3f (%.3f)' % (mean(single_scores), std(single_scores)))

x_axis = [i for i in range(1, n_splits+1)]

pyplot.plot(x_axis, single_scores, marker='o', linestyle='None')

pyplot.plot(x_axis, ensemble_scores, marker='o')

pyplot.show()




--

--

--

---
23.2.2

MLP Model

--

Develop an MLP model for blobs dataset

---

--

--

--

In [None]:
# develop an mlp for blobs dataset

from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot as plt



# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)



# one hot encode output variable

y = to_categorical(y)



# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]

print(trainX.shape, testX.shape)



# define model

model = Sequential()

model.add(Dense(25, input_dim=2, activation='relu'))

model.add(Dense(3, activation='softmax'))

model.compile(loss='categorical_crossentropy',      
              
              optimizer='adam', metrics=['accuracy'])



# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=1000, verbose=0)



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))





# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()




--

--

--

---
23.2.3

Save Horizontal Models

--

save horizontal voting ensemble members during training

---

--

--

--

In [None]:
# save horizontal voting ensemble members during training

from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot
from os import makedirs



# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)



# one hot encode output variable

y = to_categorical(y)




# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]

print(trainX.shape, testX.shape)



# define model

model = Sequential()

model.add(Dense(25, input_dim=2, activation='relu'))

model.add(Dense(3, activation='softmax'))

model.compile(loss='categorical_crossentropy', 
              
              optimizer='adam', 
              
              metrics=['accuracy'])




# create directory for models

makedirs('models')




# fit model

n_epochs, n_save_after = 1000, 950

for i in range(n_epochs):

	# fit model for a single epoch

	model.fit(trainX, trainy, epochs=1, verbose=0)
 
	# check if we should save the model

	if i >= n_save_after:

		model.save('models/model_' + str(i) + '.h5')
  



--

--

--

---
23.2.3

Make Horizontal Ensemble Models

--

load models and make predictions using a horizontal voting ensemble

---

--

--

--

In [None]:
# load models and make predictions using 
# a horizontal voting ensemble

from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot
from numpy import mean
from numpy import std
import numpy
from numpy import array
from numpy import argmax



# load models from file

def load_all_models(n_start, n_end):

	all_models = list()
 
	for epoch in range(n_start, n_end):
     
		# define filename for this ensemble

		filename = 'models/model_' + str(epoch) + '.h5'

		# load model from file

		model = load_model(filename)
  
		# add to list of members

		all_models.append(model)
  
		print('>loaded %s' % filename)
  
	return all_models




# make an ensemble prediction for multi-class classification

def ensemble_predictions(members, testX):

	# make predictions

	yhats = [model.predict(testX) for model in members]

	yhats = array(yhats)
 
	# sum across ensemble members

	summed = numpy.sum(yhats, axis=0)
 
	# argmax across classes

	result = argmax(summed, axis=1)
 
	return result




# evaluate a specific number of members in an ensemble

def evaluate_n_members(members, n_members, testX, testy):

	# select a subset of members

	subset = members[:n_members]

	# make prediction

	yhat = ensemble_predictions(subset, testX)
 
	# calculate accuracy

	return accuracy_score(testy, yhat)
 




# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, 
                  
                  centers=3, n_features=2, 
                  
                  cluster_std=2, random_state=2)



# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]

print(trainX.shape, testX.shape)




# load models in order

members = load_all_models(950, 1000)

print('Loaded %d models' % len(members))




# reverse loaded models so we build 
# the ensemble with the last models first

members = list(reversed(members))




# evaluate different numbers of ensembles on hold out set

single_scores, ensemble_scores = list(), list()

for i in range(1, len(members)+1):

    # evaluate model with i members

    ensemble_score = evaluate_n_members(members, 
                                        
                                        i, testX, testy)

    # evaluate the i'th model standalone

    testy_enc = to_categorical(testy)

    _, single_score = members[i-1].evaluate(testX,
                                            
                                            testy_enc, 

                                            verbose=0)

    # summarize this step

    print('> %d: single=%.3f, ensemble=%.3f' % (i, single_score, ensemble_score))

    single_scores.append(single_score)
    ensemble_scores.append(ensemble_score)
 



# summarize average accuracy of a single final model

print('Accuracy %.3f (%.3f)' % (mean(single_scores), std(single_scores)))




# plot score vs number of ensemble members

x_axis = [i for i in range(1, len(members)+1)]

pyplot.plot(x_axis, single_scores, marker='o', linestyle='None')

pyplot.plot(x_axis, ensemble_scores, marker='o')

pyplot.show()



--

--

--

---
24.2.2

Multi-Layer Perceptron Model

--

develop an MLP for blobs dataset

---

--

--

--

In [None]:
# develop an mlp for blobs dataset

from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from matplotlib import pyplot as plt




# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)



# one hot encode output variable

y = to_categorical(y)



# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# define model

model = Sequential()

model.add(Dense(25, input_dim=2, activation='relu'))

model.add(Dense(3, activation='softmax'))

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='categorical_crossentropy', 
              
              optimizer=opt, 
              
              metrics=['accuracy'])




# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=200, verbose=0)




# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))





# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()




--

--

--

---
24.2.3

Cosine Annealing Learning Rate

--

MLP with Cosine Annealing Learning Rate Schedule on blobs dataset

---

--

--

--

In [None]:
# mlp with cosine annealing learning 
# rate schedule on blobs problem

from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import backend
from math import pi
from math import cos
from math import floor
from matplotlib import pyplot as plt




# define custom learning rate schedule

class CosineAnnealingLearningRateSchedule(Callback):


    # constructor
    def __init__(self, n_epochs, n_cycles, lrate_max, verbose=0):
        self.epochs = n_epochs
        self.cycles = n_cycles
        self.lr_max = lrate_max
        self.lrates = list()


    # calculate learning rate for an epoch
    def cosine_annealing(self, epoch, n_epochs, n_cycles, lrate_max):
        epochs_per_cycle = floor(n_epochs/n_cycles)
        cos_inner = (pi * (epoch % epochs_per_cycle)) / (epochs_per_cycle)
        return lrate_max/2 * (cos(cos_inner) + 1)


    # calculate and set learning rate at the start of the epoch
    def on_epoch_begin(self, epoch, logs=None):
        # calculate learning rate
        lr = self.cosine_annealing(epoch, self.epochs, self.cycles, self.lr_max)
        # set learning rate
        backend.set_value(self.model.optimizer.lr, lr)
        # log value
        self.lrates.append(lr)




# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)




# one hot encode output variable

y = to_categorical(y)



# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]




# define model

model = Sequential()

model.add(Dense(25, input_dim=2, activation='relu'))

model.add(Dense(3, activation='softmax'))

opt = SGD(momentum=0.9)

model.compile(loss='categorical_crossentropy', 
              
              optimizer=opt, 
              
              metrics=['accuracy'])


# define learning rate callback

n_epochs = 400

n_cycles = n_epochs / 50

ca = CosineAnnealingLearningRateSchedule(n_epochs, 
                                         
                                         n_cycles, 
                                         
                                         0.01)


# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=n_epochs, verbose=0, 
                    
                    callbacks=[ca])



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)

_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot learning rate

plt.plot(ca.lrates)

plt.show()



# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()




--

--

--

---
24.2.4

MLP Snapshot Ensemble

--

A.

Save Snapshot Models During Training

---

--

--

--

In [None]:
# example of saving models for a snapshot ensemble

from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import backend
from math import pi
from math import cos
from math import floor



# snapshot ensemble with custom learning rate schedule

class SnapshotEnsemble(Callback):

	# constructor

	def __init__(self, n_epochs, n_cycles, lrate_max, verbose=0):
		self.epochs = n_epochs
		self.cycles = n_cycles
		self.lr_max = lrate_max
		self.lrates = list()

	# calculate learning rate for epoch

	def cosine_annealing(self, epoch, n_epochs, n_cycles, lrate_max):
		epochs_per_cycle = floor(n_epochs/n_cycles)
		cos_inner = (pi * (epoch % epochs_per_cycle)) / (epochs_per_cycle)
		return lrate_max/2 * (cos(cos_inner) + 1)

	# calculate and set learning rate at the start of the epoch

	def on_epoch_begin(self, epoch, logs={}):
     
		# calculate learning rate

		lr = self.cosine_annealing(epoch, 
                             
                             self.epochs, 
                             
                             self.cycles, 
                             
                             self.lr_max)
  
		# set learning rate

		backend.set_value(self.model.optimizer.lr, lr)
  
		# log value

		self.lrates.append(lr)

	# save models at the end of each cycle

	def on_epoch_end(self, epoch, logs={}):
     
		# check if we can save model

		epochs_per_cycle = floor(self.epochs / self.cycles)
  
		if epoch != 0 and (epoch + 1) % epochs_per_cycle == 0:

			# save model to file

			filename = "snapshot_model_%d.h5" % int((epoch + 1) / epochs_per_cycle)
   
			self.model.save(filename)
   
			print('>saved snapshot %s, epoch %d' % (filename, epoch))




# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, 
                  
                  centers=3, n_features=2, 
                  
                  cluster_std=2, random_state=2)




# one hot encode output variable

y = to_categorical(y)



# split into train and test

n_train = 100

trainX, testX = X[:n_train, :], X[n_train:, :]

trainy, testy = y[:n_train], y[n_train:]




# define model

model = Sequential()

model.add(Dense(50, input_dim=2, activation='relu'))

model.add(Dense(3, activation='softmax'))

opt = SGD(momentum=0.9)

model.compile(loss='categorical_crossentropy', 
              
              optimizer=opt, 
              
              metrics=['accuracy'])



# create snapshot ensemble callback


n_epochs = 500

n_cycles = n_epochs / 50

ca = SnapshotEnsemble(n_epochs, n_cycles, 0.01)

# fit model

model.fit(trainX, trainy, 
          
          validation_data=(testX, testy), 
          
          epochs=n_epochs, 
          
          verbose=0, 
          
          callbacks=[ca])




--

--

--

---
24.2.4

MLP Snapshot Ensemble

--

B.

Load Models and Make a Snapshot Ensemble Prediction

---

--

--

--

In [None]:
# load models and make a snapshot ensemble prediction

from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot
from numpy import mean
from numpy import std
import numpy
from numpy import array
from numpy import argmax

# load models from file

def load_all_models(n_models):

	all_models = list()
 
	for i in range(n_models):
     
		# define filename for this ensemble

		filename = 'snapshot_model_' + str(i + 1) + '.h5'

		# load model from file

		model = load_model(filename)
  
		# add to list of members

		all_models.append(model)
  
		print('>loaded %s' % filename)
  
	return all_models

# make an ensemble prediction for multi-class classification

def ensemble_predictions(members, testX):

	# make predictions

	yhats = [model.predict(testX) for model in members]

	yhats = array(yhats)
 
	# sum across ensemble members

	summed = numpy.sum(yhats, axis=0)
 
	# argmax across classes

	result = argmax(summed, axis=1)
 
	return result




# evaluate a specific number of members in an ensemble

def evaluate_n_members(members, n_members, testX, testy):

	# select a subset of members

	subset = members[:n_members]

	# make prediction

	yhat = ensemble_predictions(subset, testX)
 
	# calculate accuracy

	return accuracy_score(testy, yhat)
 



# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, 
                  
                  centers=3, 
                  
                  n_features=2, 
                  
                  cluster_std=2, 
                  
                  random_state=2)




# split into train and test

n_train = 100

trainX, testX = X[:n_train, :], X[n_train:, :]

trainy, testy = y[:n_train], y[n_train:]

print(trainX.shape, testX.shape)



# load models in order

members = load_all_models(10)

print('Loaded %d models' % len(members))




# reverse loaded models so we build the 
# ensemble with the last models first

members = list(reversed(members))



# evaluate different numbers of ensembles on hold out set

single_scores, ensemble_scores = list(), list()

for i in range(1, len(members)+1):

	# evaluate model with i members

	ensemble_score = evaluate_n_members(members, i, testX, testy)
 
	# evaluate the i'th model standalone

	testy_enc = to_categorical(testy)
 
	_, single_score = members[i-1].evaluate(testX, 
                                         
                                         testy_enc, 
                                         
                                         verbose=0)
 
	# summarize this step

	print('> %d: single=%.3f, ensemble=%.3f' % (i, 
                                             
                                             single_score, 
                                             
                                             ensemble_score))
 
	ensemble_scores.append(ensemble_score)
 
	single_scores.append(single_score)
 



# summarize average accuracy of a single final model

print('Accuracy %.3f (%.3f)' % (mean(single_scores), 
                                
                                std(single_scores)))



# plot score vs number of ensemble members

x_axis = [i for i in range(1, len(members)+1)]

pyplot.plot(x_axis, single_scores, marker='o', linestyle='None')

pyplot.plot(x_axis, ensemble_scores, marker='o')

pyplot.show()



--

--

--

---
25.2.2

MLP Model

--

develop an MLP for blobs dataset

https://machinelearningmastery.com/stacking-ensemble-for-deep-learning-neural-networks/

---

--

--

--

In [None]:
# develop an mlp for blobs dataset

from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot as plt



# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)



# one hot encode output variable

y = to_categorical(y)



# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]

print(trainX.shape, testX.shape)




# define model

model = Sequential()

model.add(Dense(25, 
                
                input_dim=2, activation='relu'))

model.add(Dense(3, 
                
                activation='softmax'))

model.compile(loss='categorical_crossentropy', 
              
              optimizer='adam', 
              
              metrics=['accuracy'])




# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=500, verbose=0)



# evaluate the model
_, train_acc = model.evaluate(trainX, 
                              
                              trainy, verbose=0)

_, test_acc = model.evaluate(testX, 
                             
                             testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))





# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()




--

--

--

---
25.2.3 

Train and Save Sub-Models

--

example of saving sub-models for later use in a stacking ensemble

https://machinelearningmastery.com/stacking-ensemble-for-deep-learning-neural-networks/

---

--

--

--

In [None]:
# example of saving sub-models for 
# later use in a stacking ensemble

from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot
from os import makedirs



# fit model on dataset

def fit_model(trainX, trainy):
	# define model
	model = Sequential()
	model.add(Dense(25, input_dim=2, activation='relu'))
	model.add(Dense(3, activation='softmax'))
 
	model.compile(loss='categorical_crossentropy', 
               
               optimizer='adam', 
               
               metrics=['accuracy'])
	# fit model
	model.fit(trainX, trainy, epochs=500, verbose=0)
	return model




# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, 
                  
                  centers=3, n_features=2, 
                  
                  cluster_std=2, 
                  
                  random_state=2)




# one hot encode output variable

y = to_categorical(y)



# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]

print(trainX.shape, testX.shape)



# create directory for models

makedirs('blend_models')




# fit and save models

n_members = 5

for i in range(n_members):

	# fit model

	model = fit_model(trainX, trainy)
 
	# save model

	filename = 'blend_models/model_' + str(i + 1) + '.h5'

	model.save(filename)
 
	print('>Saved %s' % filename)
 


--

--

--

---
25.2.4 

Separate Stacking Model

--

stacked generalization with linear meta model on blobs dataset

https://machinelearningmastery.com/stacking-ensemble-for-deep-learning-neural-networks/

---

--

--

--

In [None]:
# stacked generalization with linear meta 
# model on blobs dataset

from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from numpy import dstack



# load models from file

def load_all_models(n_models):

	all_models = list()
 
	for i in range(n_models):
     
		# define filename for this ensemble

		filename = 'blend_models/model_' + str(i + 1) + '.h5'

		# load model from file

		model = load_model(filename)
  
		# add to list of members

		all_models.append(model)
  
		print('>loaded %s' % filename)
  
	return all_models




# create stacked model input dataset 
# as outputs from the ensemble

def stacked_dataset(members, inputX):

	stackX = None

	for model in members:

		# make prediction

		yhat = model.predict(inputX, verbose=0)
  
		# stack predictions into [rows, members, probabilities]

		if stackX is None:

			stackX = yhat

		else:

			stackX = dstack((stackX, yhat))
   
	# flatten predictions to [rows, members x probabilities]

	stackX = stackX.reshape((stackX.shape[0], 
                          
                          stackX.shape[1]*stackX.shape[2]))
 
	return stackX



# fit a model based on the outputs from the ensemble members

def fit_stacked_model(members, inputX, inputy):

	# create dataset using ensemble

	stackedX = stacked_dataset(members, inputX)
 
	# fit standalone model

	model = LogisticRegression()
 
	model.fit(stackedX, inputy)
 
	return model




# make a prediction with the stacked model

def stacked_prediction(members, model, inputX):

	# create dataset using ensemble

	stackedX = stacked_dataset(members, inputX)
 
	# make a prediction

	yhat = model.predict(stackedX)
 
	return yhat




# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, 
                  
                  centers=3, n_features=2, 
                  
                  cluster_std=2, 
                  
                  random_state=2)



# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]

print(trainX.shape, testX.shape)




# load all models

n_members = 5

members = load_all_models(n_members)

print('Loaded %d models' % len(members))





# evaluate standalone models on test dataset

for model in members:

	testy_enc = to_categorical(testy)
 
	_, acc = model.evaluate(testX, testy_enc, verbose=0)
 
	print('Model Accuracy: %.3f' % acc)
 




# fit stacked model using the ensemble

model = fit_stacked_model(members, testX, testy)




# evaluate model on test set

yhat = stacked_prediction(members, model, testX)

acc = accuracy_score(testy, yhat)

print('Stacked Test Accuracy: %.3f' % acc)




--

--

--

---
25.2.5 

Integrated Stacking Model

--

stacked generalization with neural net meta model on blobs dataset

https://machinelearningmastery.com/stacking-ensemble-for-deep-learning-neural-networks/

---

--

--

--

In [None]:
# stacked generalization with neural 
# net meta model on blobs dataset

from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from keras.layers.merge import concatenate
from numpy import argmax



# load models from file

def load_all_models(n_models):
	all_models = list()
	for i in range(n_models):
		# define filename for this ensemble
		filename = 'blend_models/model_' + str(i + 1) + '.h5'
		# load model from file
		model = load_model(filename)
		# add to list of members
		all_models.append(model)
		print('>loaded %s' % filename)
	return all_models




# define stacked model from multiple member input models

def define_stacked_model(members):
	# update all layers in all models to not be trainable
	for i in range(len(members)):
		model = members[i]
		for layer in model.layers:
			# make not trainable
			layer.trainable = False
			# rename to avoid 'unique layer name' issue
			layer._name = 'ensemble_' + str(i+1) + '_' + layer.name

	# define multi-headed input
	ensemble_visible = [model.input for model in members]

	# concatenate merge output from each model
	ensemble_outputs = [model.output for model in members]
	merge = concatenate(ensemble_outputs)
	hidden = Dense(10, activation='relu')(merge)
	output = Dense(3, activation='softmax')(hidden)
	model = Model(inputs=ensemble_visible, outputs=output)
 
	# plot graph of ensemble
	plot_model(model, show_shapes=True, to_file='model_graph.png')
 
	# compile
	model.compile(loss='categorical_crossentropy', 
               
               optimizer='adam', 
               
               metrics=['accuracy'])
 
	return model




# fit a stacked model

def fit_stacked_model(model, inputX, inputy):
	# prepare input data
	X = [inputX for _ in range(len(model.input))]
	# encode output data
	inputy_enc = to_categorical(inputy)
	# fit model
	model.fit(X, inputy_enc, epochs=300, verbose=0)
 



# make a prediction with a stacked model

def predict_stacked_model(model, inputX):
	# prepare input data
	X = [inputX for _ in range(len(model.input))]
	# make prediction
	return model.predict(X, verbose=0)
 



# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)



# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]
print(trainX.shape, testX.shape)




# load all models

n_members = 5

members = load_all_models(n_members)

print('Loaded %d models' % len(members))




# define ensemble model

stacked_model = define_stacked_model(members)




# fit stacked model on test dataset

fit_stacked_model(stacked_model, testX, testy)




# make predictions and evaluate

yhat = predict_stacked_model(stacked_model, testX)

yhat = argmax(yhat, axis=1)

acc = accuracy_score(testy, yhat)

print('Stacked Test Accuracy: %.3f' % acc)




--

--

--

---
26.2.2 

Multi-Layer Perceptron Model

--

develop an MLP for blobs dataset

https://machinelearningmastery.com/polyak-neural-network-model-weight-ensemble/


Also look at:

https://machinelearningmastery.com/weighted-average-ensemble-for-deep-learning-neural-networks/
---

--

--

--

In [None]:
# develop an mlp for blobs dataset

from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot



# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)



# one hot encode output variable

y = to_categorical(y)



# split into train and test
n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]

print(trainX.shape, testX.shape)




# define model

model = Sequential()

model.add(Dense(25, input_dim=2, activation='relu'))

model.add(Dense(3, activation='softmax'))

model.compile(loss='categorical_crossentropy', 
              
              optimizer='adam', 
              
              metrics=['accuracy'])




# fit model

history = model.fit(trainX, trainy, 
                    
                    validation_data=(testX, testy), 
                    
                    epochs=500, verbose=0)



# evaluate the model

_, train_acc = model.evaluate(trainX, trainy, verbose=0)
_, test_acc = model.evaluate(testX, testy, verbose=0)

print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))




# plot loss learning curves

plt.subplot(211)

plt.title('Cross-Entropy Loss', pad=-40)

plt.plot(history.history['loss'], label='train')

plt.plot(history.history['val_loss'], label='test')

plt.legend()

# plt.show()



# plot accuracy learning curves

plt.subplot(212)

plt.title('Accuracy', pad=-40)

plt.plot(history.history['accuracy'], label='train')

plt.plot(history.history['val_accuracy'], label='test')



plt.legend()

plt.show()





--

--

--

---
26.2.3 

Save Multiple Models to File

--

Save Models to File towards the end of a training sum

https://machinelearningmastery.com/polyak-neural-network-model-weight-ensemble/

Also look at:

https://machinelearningmastery.com/weighted-average-ensemble-for-deep-learning-neural-networks/

---

--

--

--

In [None]:
# save models to file toward the end of a training run

from sklearn.datasets import make_blobs
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense



# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)




# one hot encode output variable

y = to_categorical(y)




# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]




# define model

model = Sequential()

model.add(Dense(25, input_dim=2, activation='relu'))

model.add(Dense(3, activation='softmax'))

model.compile(loss='categorical_crossentropy', 
              
              optimizer='adam', 
              
              metrics=['accuracy'])




# fit model

n_epochs, n_save_after = 500, 490

for i in range(n_epochs):

	# fit model for a single epoch

	model.fit(trainX, trainy, epochs=1, verbose=0)
 
	# check if we should save the model

	if i >= n_save_after:

		model.save('model_' + str(i) + '.h5')
  

  


--

--

--

---
26.2.4 

New Model with Average Model Weights

--

Average the weights of multiple loaded models

https://machinelearningmastery.com/polyak-neural-network-model-weight-ensemble/

Also look at:

https://machinelearningmastery.com/weighted-average-ensemble-for-deep-learning-neural-networks/

---

--

--

--

In [None]:
# average the weights of multiple loaded models

from tensorflow.keras.models import load_model
from tensorflow.keras.models import clone_model
from numpy import average
from numpy import array




# load models from file

def load_all_models(n_start, n_end):

	all_models = list()
 
	for epoch in range(n_start, n_end):
     
		# define filename for this ensemble

		filename = 'model_' + str(epoch) + '.h5'

		# load model from file

		model = load_model(filename)
  
		# add to list of members

		all_models.append(model)
  
		print('>loaded %s' % filename)
  
	return all_models




# create a model from the weights of multiple models

def model_weight_ensemble(members, weights):

	# determine how many layers need to be averaged

	n_layers = len(members[0].get_weights())
 
	# create an set of average model weights

	avg_model_weights = list()
 
	for layer in range(n_layers):
     
		# collect this layer from each model

		layer_weights = array([model.get_weights()[layer] for model in members])
  
		# weighted average of weights for this layer

		avg_layer_weights = average(layer_weights, 
                              
                              axis=0, weights=weights)
  
		# store average layer weights

		avg_model_weights.append(avg_layer_weights)
  
	# create a new model with the same structure

	model = clone_model(members[0])
 
	# set the weights in the new

	model.set_weights(avg_model_weights)
 
	model.compile(loss='categorical_crossentropy', 
               
               optimizer='adam', metrics=['accuracy'])
 
	return model





# load all models into memory

members = load_all_models(490, 500)

print('Loaded %d models' % len(members))





# prepare an array of equal weights

n_models = len(members)

weights = [1/n_models for i in range(1, n_models+1)]





# create a new model with the weighted average of all model weights



model = model_weight_ensemble(members, weights)


# summarize the created model


model.summary()

--

--

--

---
26.2.5 

Predicting with an average model weight ensemble

--

Average of model weights on blobs problem

https://machinelearningmastery.com/polyak-neural-network-model-weight-ensemble/

Also look at:

https://machinelearningmastery.com/weighted-average-ensemble-for-deep-learning-neural-networks/

---

--

--

--

In [None]:
# average of model weights on blobs problem

from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
from tensorflow.keras.models import clone_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot
from numpy import average
from numpy import array




# load models from file

def load_all_models(n_start, n_end):

	all_models = list()
 
	for epoch in range(n_start, n_end):
     
		# define filename for this ensemble

		filename = 'model_' + str(epoch) + '.h5'

		# load model from file

		model = load_model(filename)
  
		# add to list of members

		all_models.append(model)
  
		print('>loaded %s' % filename)
  
	return all_models





# # create a model from the weights of multiple models

def model_weight_ensemble(members, weights):

	# determine how many layers need to be averaged

	n_layers = len(members[0].get_weights())
 
	# create an set of average model weights

	avg_model_weights = list()
 
	for layer in range(n_layers):
     
		# collect this layer from each model

		layer_weights = array([model.get_weights()[layer] for model in members])
  
		# weighted average of weights for this layer

		avg_layer_weights = average(layer_weights, 
                              
                              axis=0, weights=weights)
  
		# store average layer weights

		avg_model_weights.append(avg_layer_weights)
  
	# create a new model with the same structure

	model = clone_model(members[0])
 
	# set the weights in the new

	model.set_weights(avg_model_weights)
 
	model.compile(loss='categorical_crossentropy', 
               
               optimizer='adam', 
               
               metrics=['accuracy'])
 
	return model




# evaluate a specific number of members in an ensemble

def evaluate_n_members(members, n_members, testX, testy):

	# select a subset of members

	subset = members[:n_members]

	# prepare an array of equal weights

	weights = [1.0/n_members for i in range(1, n_members+1)]

	# create a new model with the weighted average of all model weights

	model = model_weight_ensemble(subset, weights)
 
	# make predictions and evaluate accuracy

	_, test_acc = model.evaluate(testX, 
                              
                              testy, verbose=0)
 
	return test_acc




# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)




# one hot encode output variable

y = to_categorical(y)




# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]




# load models in order

members = load_all_models(490, 500)

print('Loaded %d models' % len(members))




# reverse loaded models so we build the ensemble 
# with the last models first

members = list(reversed(members))




# evaluate different numbers of ensembles on hold out set

single_scores, ensemble_scores = list(), list()

for i in range(1, len(members)+1):

	# evaluate model with i members

	ensemble_score = evaluate_n_members(members, i, testX, testy)
 
	# evaluate the i'th model standalone

	_, single_score = members[i-1].evaluate(testX, testy, verbose=0)
 
	# summarize this step

	print('> %d: single=%.3f, ensemble=%.3f' % (i, 
                                             
                                             single_score, 
                                             
                                             ensemble_score))
 
	ensemble_scores.append(ensemble_score)
 
	single_scores.append(single_score)
 



# plot score vs number of ensemble members

x_axis = [i for i in range(1, len(members)+1)]

pyplot.plot(x_axis, single_scores, marker='o', linestyle='None')

pyplot.plot(x_axis, ensemble_scores, marker='o')

pyplot.show()




--

--

--

---
26.2.6 

Linearly and Exponentially Decreasing Weighted Average

--

linearly decreasing weighted average of models on blobs dataset

https://machinelearningmastery.com/polyak-neural-network-model-weight-ensemble/

Also look at:

https://machinelearningmastery.com/weighted-average-ensemble-for-deep-learning-neural-networks/

---

--

--

--

In [None]:
# linearly decreasing weighted average 
# of models on blobs problem

from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
from tensorflow.keras.models import clone_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot
from numpy import average
from numpy import array




# load models from file

def load_all_models(n_start, n_end):

	all_models = list()
 
	for epoch in range(n_start, n_end):
     
		# define filename for this ensemble

		filename = 'model_' + str(epoch) + '.h5'

		# load model from file

		model = load_model(filename)
  
		# add to list of members

		all_models.append(model)
  
		print('>loaded %s' % filename)
  
	return all_models




# create a model from the weights of multiple models

def model_weight_ensemble(members, weights):

	# determine how many layers need to be averaged

	n_layers = len(members[0].get_weights())
 
	# create an set of average model weights

	avg_model_weights = list()
 
	for layer in range(n_layers):
     
		# collect this layer from each model

		layer_weights = array([model.get_weights()[layer] for model in members])
  
		# weighted average of weights for this layer

		avg_layer_weights = average(layer_weights, 
                              
                              axis=0, weights=weights)
  
		# store average layer weights

		avg_model_weights.append(avg_layer_weights)
  
	# create a new model with the same structure

	model = clone_model(members[0])
 
	# set the weights in the new

	model.set_weights(avg_model_weights)
 
	model.compile(loss='categorical_crossentropy', 
               
               optimizer='adam', metrics=['accuracy'])
 
	return model




# evaluate a specific number of members in an ensemble

def evaluate_n_members(members, n_members, testX, testy):

	# select a subset of members

	subset = members[:n_members]

	# prepare an array of linearly decreasing weights

	weights = [i/n_members for i in range(n_members, 0, -1)]

	# create a new model with the weighted average of all model weights

	model = model_weight_ensemble(subset, weights)
 
	# make predictions and evaluate accuracy

	_, test_acc = model.evaluate(testX, testy, verbose=0)
 
	return test_acc




# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, centers=3, 
                  
                  n_features=2, cluster_std=2, 
                  
                  random_state=2)



# one hot encode output variable

y = to_categorical(y)



# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]




# load models in order

members = load_all_models(490, 500)

print('Loaded %d models' % len(members))





# reverse loaded models so we build the ensemble with the last models first

members = list(reversed(members))



# evaluate different numbers of ensembles on hold out set

single_scores, ensemble_scores = list(), list()

for i in range(1, len(members)+1):

	# evaluate model with i members
    
	ensemble_score = evaluate_n_members(members, i, testX, testy)
 
	# evaluate the i'th model standalone

	_, single_score = members[i-1].evaluate(testX, testy, verbose=0)
 
	# summarize this step

	print('> %d: single=%.3f, ensemble=%.3f' % (i, 
                                             
                                             single_score, 
                                             
                                             ensemble_score))
 
	ensemble_scores.append(ensemble_score)
 
	single_scores.append(single_score)
 


# plot score vs number of ensemble members

x_axis = [i for i in range(1, len(members)+1)]

pyplot.plot(x_axis, single_scores, marker='o', linestyle='None')

pyplot.plot(x_axis, ensemble_scores, marker='o')

pyplot.show()




--

--

--

---
26.2.6 

Linearly and Exponentially Decreasing Weighted Average

--
B.

We can also experiment with an exponential decay of the contribution of models. This requires that a decay rate (alpha) is specified. The example below creates weights for an exponential decay with a decrease rate of 2.

--

exponentially decaying weighted average of models on blobs problem

--
https://machinelearningmastery.com/polyak-neural-network-model-weight-ensemble/

Also look at:

https://machinelearningmastery.com/weighted-average-ensemble-for-deep-learning-neural-networks/

---

--

--

--

In [None]:
# exponentially decreasing weighted average of models on blobs problem
from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
from tensorflow.keras.models import clone_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from matplotlib import pyplot
from numpy import average
from numpy import array
from math import exp

# load models from file

def load_all_models(n_start, n_end):

	all_models = list()
 
	for epoch in range(n_start, n_end):
     
		# define filename for this ensemble

		filename = 'model_' + str(epoch) + '.h5'

		# load model from file

		model = load_model(filename)
  
		# add to list of members

		all_models.append(model)
  
		print('>loaded %s' % filename)
  
	return all_models




# create a model from the weights of multiple models

def model_weight_ensemble(members, weights):

	# determine how many layers need to be averaged

	n_layers = len(members[0].get_weights())
 
	# create an set of average model weights

	avg_model_weights = list()
 
	for layer in range(n_layers):
     
		# collect this layer from each model

		layer_weights = array([model.get_weights()[layer] for model in members])
  
		# weighted average of weights for this layer

		avg_layer_weights = average(layer_weights, 
                              
                              axis=0, weights=weights)
  
		# store average layer weights

		avg_model_weights.append(avg_layer_weights)
  
	# create a new model with the same structure

	model = clone_model(members[0])
 
	# set the weights in the new

	model.set_weights(avg_model_weights)
 
	model.compile(loss='categorical_crossentropy', 
               
               optimizer='adam', 
               
               metrics=['accuracy'])
 
	return model




# evaluate a specific number of members in an ensemble

def evaluate_n_members(members, n_members, testX, testy):

	# select a subset of members

	subset = members[:n_members]

	# prepare an array of exponentially decreasing weights

	alpha = 2.0

	weights = [exp(-i/alpha) for i in range(1, n_members+1)]

	# create a new model with the weighted average of all model weights

	model = model_weight_ensemble(subset, weights)
 
	# make predictions and evaluate accuracy

	_, test_acc = model.evaluate(testX, testy, verbose=0)
 
	return test_acc




# generate 2d classification dataset

X, y = make_blobs(n_samples=1100, 
                  
                  centers=3, n_features=2, 
                  
                  cluster_std=2, 
                  
                  random_state=2)

# one hot encode output variable

y = to_categorical(y)




# split into train and test

n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]



# load models in order

members = load_all_models(490, 500)

print('Loaded %d models' % len(members))



# reverse loaded models so we build the 
# ensemble with the last models first

members = list(reversed(members))




# evaluate different numbers of ensembles on hold out set

single_scores, ensemble_scores = list(), list()

for i in range(1, len(members)+1):

    # evaluate model with i members

    ensemble_score = evaluate_n_members(members, i, testX, testy)

    # evaluate the i'th model standalone

    _, single_score = members[i-1].evaluate(testX, testy, verbose=0)

    # summarize this step

    print('> %d: single=%.3f, ensemble=%.3f' % (i, 
                                                
                                                single_score, 
                                                
                                                ensemble_score))

    ensemble_scores.append(ensemble_score)

    single_scores.append(single_score)

# plot score vs number of ensemble members

x_axis = [i for i in range(1, len(members)+1)]

pyplot.plot(x_axis, single_scores, marker='o', linestyle='None')

pyplot.plot(x_axis, ensemble_scores, marker='o')

pyplot.show()

