In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras.datasets import mnist #to import our dataset
from tensorflow.keras.models import Sequential, Model # imports our type of network
from tensorflow.keras.layers import Dense, Flatten, Dropout, Input, BatchNormalization # imports our layers we want to use

from tensorflow.python.keras.losses import categorical_crossentropy #loss function
from tensorflow.keras.optimizers.legacy import Adam, SGD #optimisers
from tensorflow.keras.utils import to_categorical #some function for data preparation

from tensorflow.keras.callbacks import ModelCheckpoint #checkpoints used to keep track of best model


In [21]:
batch_size = 128
num_classes = 10
epochs = 20

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()


x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28)
60000 train samples
10000 test samples


In [22]:
input_network = Input(shape=(28,28))
x = Flatten()(input_network)
x = Dense(200, activation='relu')(x)
x = Dense(200, activation='relu')(x)
y = Dense(num_classes, activation='softmax')(x)

model= Model(input_network,outputs=y)
opt = Adam(learning_rate=0.001)
model.compile(loss='mse',optimizer=Adam(), metrics=['accuracy'])
model.summary()

Model: "model_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_10 (InputLayer)       [(None, 28, 28)]          0         
                                                                 
 flatten_9 (Flatten)         (None, 784)               0         
                                                                 
 dense_27 (Dense)            (None, 200)               157000    
                                                                 
 dense_28 (Dense)            (None, 200)               40200     
                                                                 
 dense_29 (Dense)            (None, 10)                2010      
                                                                 
Total params: 199210 (778.16 KB)
Trainable params: 199210 (778.16 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [23]:
history1=model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [24]:
# Let's add some regularisation:

from tensorflow.keras.regularizers import l1, l2

dropout_rate = 0.2

input_network = Input(shape=(28,28))
x = Flatten()(input_network)
x = Dense(200, activation='relu',activity_regularizer=l1(0.1))(x)
x = Dense(200, activation='relu',activity_regularizer=l1(0.2))(x)
y = Dense(num_classes, activation='softmax')(x)

model= Model(input_network,outputs=y)
opt = Adam(learning_rate=0.001)
model.compile(loss='mse',optimizer=opt, metrics=['accuracy'])
model.summary()

Model: "model_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_11 (InputLayer)       [(None, 28, 28)]          0         
                                                                 
 flatten_10 (Flatten)        (None, 784)               0         
                                                                 
 dense_30 (Dense)            (None, 200)               157000    
                                                                 
 dense_31 (Dense)            (None, 200)               40200     
                                                                 
 dense_32 (Dense)            (None, 10)                2010      
                                                                 
Total params: 199210 (778.16 KB)
Trainable params: 199210 (778.16 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [25]:
# Dropout

dropout_rate = 0.2

input_network = Input(shape=(28,28))
x = Flatten()(input_network)
x = Dense(200, activation='relu',activity_regularizer=l1(0.1))(x)
x = Dropout(rate=dropout_rate)(x)
x = Dense(200, activation='relu',activity_regularizer=l1(0.2))(x)
x = Dropout(rate=dropout_rate)(x)
y = Dense(num_classes, activation='softmax')(x)

model_dropout= Model(input_network,outputs=y)
opt = Adam(learning_rate=0.001)
model_dropout.compile(loss='mse',optimizer=opt, metrics=['accuracy'])
model_dropout.summary()


Model: "model_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_12 (InputLayer)       [(None, 28, 28)]          0         
                                                                 
 flatten_11 (Flatten)        (None, 784)               0         
                                                                 
 dense_33 (Dense)            (None, 200)               157000    
                                                                 
 dropout_6 (Dropout)         (None, 200)               0         
                                                                 
 dense_34 (Dense)            (None, 200)               40200     
                                                                 
 dropout_7 (Dropout)         (None, 200)               0         
                                                                 
 dense_35 (Dense)            (None, 10)                201

In [26]:
# Batch-Normalisation

dropout_rate = 0.002

input_network = Input(shape=(28,28))
x = Flatten()(input_network)
x = Dense(200, activation='relu')(x)
x = BatchNormalization()(x)
x = Dense(200, activation='relu')(x)
x = Dropout(rate=dropout_rate)(x)
y = Dense(num_classes, activation='softmax')(x)

model_batch_normalisation= Model(input_network,outputs=y)
opt = Adam(learning_rate=0.001)
model_batch_normalisation.compile(loss='mse',optimizer=opt, metrics=['accuracy'])
model_batch_normalisation.summary()

Model: "model_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_13 (InputLayer)       [(None, 28, 28)]          0         
                                                                 
 flatten_12 (Flatten)        (None, 784)               0         
                                                                 
 dense_36 (Dense)            (None, 200)               157000    
                                                                 
 batch_normalization_4 (Bat  (None, 200)               800       
 chNormalization)                                                
                                                                 
 dense_37 (Dense)            (None, 200)               40200     
                                                                 
 dropout_8 (Dropout)         (None, 200)               0         
                                                          

In [27]:
history4=model_batch_normalisation.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [30]:
# Saving a model

# Save the model
#model_batch_normalisation.save('mnist_model_batch_normalisation.keras')


# Load the model
#loaded_model = tf.keras.models.load_model('mnist_model_batch_normalisation.keras')
#print("Model loaded from 'mnist_model_batch_normalisation.keras")

# Evaluate the loaded model
#loss, accuracy = loaded_model.evaluate(x_test, y_test)

In [31]:
# keeping the best model on the test/validation data.

# Define a ModelCheckpoint callback
#checkpoint = ModelCheckpoint('best_model.keras', 
#                             monitor='val_accuracy',  # You can monitor 'val_loss' or any other metric
#                             save_best_only=True, 
 #                            mode='max',  # Use 'max' if monitoring accuracy; 'min' if monitoring loss
#                             verbose=1)

#history4=model_batch_normalisation.fit(x_train, y_train,
 #         batch_size=batch_size,
 #         epochs=epochs,
 #         verbose=1,
 #         callbacks=[checkpoint],
  #        validation_data=(x_test, y_test))

#best_model = tf.keras.models.load_model('best_model.keras')

In [36]:
import optuna

# Optuna objective function
def objective(trial):
    # Suggest hyperparameters
    num_layers = trial.suggest_int('num_layers', 1, 3)
    activation = trial.suggest_categorical('activation', ['relu', 'sigmoid'])
    dropout_rate = trial.suggest_float('dropout_rate', 0.0, 0.5)
    units = trial.suggest_int('units', 32, 128)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    
    # Build the model
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28)))
    
    for _ in range(num_layers):
        model.add(Dense(units=units, activation=activation))
        model.add(Dropout(rate=dropout_rate))
    
    model.add(Dense(10, activation='softmax'))
    
    # Compile the model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    # Train the model
    history = model.fit(x_train, y_train, 
                        validation_split=0.2,
                        epochs=5,  # To keep it quick for demonstration; you can increase it
                        batch_size=128, 
                        verbose=0)
    
    # Evaluate the model
    score = model.evaluate(x_test, y_test, verbose=0)
    accuracy = score[1]
    return accuracy

# Create an Optuna study and optimize the objective function
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)  # Set the number of trials

# Print the best trial
print('Best trial:')
print(f' Value: {study.best_trial.value}')
print(' Params: ')
for key, value in study.best_trial.params.items():
    print(f'    {key}: {value}')

[I 2024-11-18 16:36:55,195] A new study created in memory with name: no-name-6459e751-3d93-462e-be3e-9e83ab6a99c7
[I 2024-11-18 16:36:57,594] Trial 0 finished with value: 0.9355000257492065 and parameters: {'num_layers': 1, 'activation': 'relu', 'dropout_rate': 0.3794008514995079, 'units': 77, 'learning_rate': 0.00025401499644562805}. Best is trial 0 with value: 0.9355000257492065.
[I 2024-11-18 16:37:00,843] Trial 1 finished with value: 0.8758999705314636 and parameters: {'num_layers': 3, 'activation': 'sigmoid', 'dropout_rate': 0.08375632201903516, 'units': 97, 'learning_rate': 0.00016597222285836603}. Best is trial 0 with value: 0.9355000257492065.
[I 2024-11-18 16:37:03,754] Trial 2 finished with value: 0.5436999797821045 and parameters: {'num_layers': 3, 'activation': 'sigmoid', 'dropout_rate': 0.26771297084120765, 'units': 78, 'learning_rate': 3.996994844620452e-05}. Best is trial 0 with value: 0.9355000257492065.
[I 2024-11-18 16:37:06,481] Trial 3 finished with value: 0.8906999

[W 2024-11-18 16:37:32,462] Trial 13 failed with value None.


InvalidArgumentError: Graph execution error:

Detected at node Adam/mul_15 defined at (most recent call last):
  File "/Users/conkey/anaconda3/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/Users/conkey/anaconda3/lib/python3.10/runpy.py", line 86, in _run_code

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 736, in start

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 195, in start

  File "/Users/conkey/anaconda3/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/Users/conkey/anaconda3/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/Users/conkey/anaconda3/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 516, in dispatch_queue

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 505, in process_one

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 412, in dispatch_shell

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 740, in execute_request

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 422, in do_execute

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 546, in run_cell

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3024, in run_cell

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3079, in _run_cell

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3284, in run_cell_async

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3466, in run_ast_nodes

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3526, in run_code

  File "/var/folders/yk/83x2smm96b9_trlwmwsyv1g00000gn/T/ipykernel_55045/3145014312.py", line 41, in <module>

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/optuna/study/study.py", line 475, in optimize

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/optuna/study/_optimize.py", line 63, in _optimize

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/optuna/study/_optimize.py", line 160, in _optimize_sequential

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/optuna/study/_optimize.py", line 197, in _run_trial

  File "/var/folders/yk/83x2smm96b9_trlwmwsyv1g00000gn/T/ipykernel_55045/3145014312.py", line 28, in objective

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/keras/src/engine/training.py", line 1783, in fit

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/keras/src/engine/training.py", line 1377, in train_function

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/keras/src/engine/training.py", line 1360, in step_function

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/keras/src/engine/training.py", line 1349, in run_step

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/keras/src/engine/training.py", line 1130, in train_step

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 544, in minimize

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 1223, in apply_gradients

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 652, in apply_gradients

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 1253, in _internal_apply_gradients

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 1345, in _distributed_apply_gradients_fn

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 1342, in apply_grad_to_update_var

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 241, in _update_step

  File "/Users/conkey/anaconda3/lib/python3.10/site-packages/keras/src/optimizers/adam.py", line 204, in update_step

Incompatible shapes: [10] vs. [0]
	 [[{{node Adam/mul_15}}]] [Op:__inference_train_function_726029]