In [None]:
from keras.datasets import cifar10

# Load the CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Print the dimensions of the training and test images
print('Training images shape:', x_train.shape)
print('Test images shape:', x_test.shape)

# Print the dimensions of the training and test output labels
print('Training labels shape:', y_train.shape)
print('Test labels shape:', y_test.shape)


In [None]:
from keras.datasets import cifar10
from sklearn.model_selection import train_test_split

# Load the CIFAR-10 dataset
(x_train_full, y_train_full), (x_test, y_test) = cifar10.load_data()

# Split the training data into training and validation sets
val_ratio = 0.15
x_train, x_val, y_train, y_val = train_test_split(x_train_full, y_train_full, test_size=val_ratio, random_state=42)

# Print the number of samples in each set
print('Training samples:', len(x_train))
print('Validation samples:', len(x_val))
print('Test samples:', len(x_test))


In [None]:
# it could be:

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
#To use categorical_crossentropy:

from keras.utils import to_categorical

# Load the CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# One-hot encode the output labels
num_classes = 10
y_train_onehot = to_categorical(y_train, num_classes)
y_test_onehot = to_categorical(y_test, num_classes)


In [None]:
#A more complete code now is:

import tensorflow.keras as keras
import numpy as np

# Load the CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Convert the labels to one-hot encoded format
y_train_onehot = keras.utils.to_categorical(y_train, num_classes=10)
y_test_onehot = keras.utils.to_categorical(y_test, num_classes=10)

# Flatten the input data
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

# Define the model architecture
model = keras.Sequential()
model.add(keras.layers.Flatten(input_shape=(3072,)))
model.add(keras.layers.Dense(units=256, activation='relu'))
model.add(keras.layers.Dense(units=128, activation='relu'))
model.add(keras.layers.Dense(units=10, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model on the training data
model.fit(x_train, y_train_onehot, batch_size=128, epochs=10, validation_data=(x_test, y_test_onehot))


#I define a simple neural network architecture with two hidden layers, one flatten and an output layer that has 10 neurons,
#one for each class, using CategoricalCrossentropy as the loss function for the last layer and train
the model on the one-hot encoded output labels (y_train_onehot) using the fit() method.
when we use one-hot encode for the output labels, the dimensions of the output labels will change from (n_samples, 1)
to (n_samples, n_classes), where n_samples is the number of samples in the dataset and n_classes is the number of classes. 
In the case of CIFAR-10, the dimensions of the output labels will change from (n_samples, 1) to (n_samples, 10)


In [None]:
# make a network with five hidden layers to predict the outputs of this dataset.

import tensorflow as tf
from tensorflow import keras
import numpy as np

# Load the dataset and preprocess it
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train / 255.0
x_test = x_test / 255.0
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

# Define the neural network architecture
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(16, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model on the training data
model.fit(x_train, y_train, batch_size=128, epochs=10, validation_data=(x_test, y_test))

# Evaluate the performance of the model on the testing data
test_loss, test_acc = model.evaluate(x_test, y_test)
print('Test accuracy:', test_acc)


In [None]:
#Optimize the hyperparameters of the built model

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from kerastuner.tuners import RandomSearch
from keras.layers import Flatten
from tensorflow.keras.optimizers import Adam

# Load the CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Convert the labels to one-hot encoded format
y_train_onehot = keras.utils.to_categorical(y_train, num_classes=10)
y_test_onehot = keras.utils.to_categorical(y_test, num_classes=10)

# Define the model-building function
def build_model(hp):
    model = Sequential([
        Flatten(input_shape=(32, 32, 3)),
        Dense(units=hp.Int('units', min_value=32, max_value=512, step=32), activation='relu'),
        # Add more layers as needed
        Dense(10, activation='softmax')
    ])
    
    model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

    
    # Add hidden layers
    for i in range(hp.Int('num_layers', 1, 5)):
        model.add(layers.Dense(units=hp.Int(f'units_{i}', min_value=32, max_value=512, step=32), activation='relu'))
    
    model.add(layers.Dense(units=10, activation='softmax'))
    
    # Compile the model
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
        loss='categorical_crossentropy',
        metrics=['accuracy'])
    
    return model

# Instantiate the tuner and perform the search
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=3,
    executions_per_trial=1,
    directory='my_dir',
    project_name='cifar10_tuning')

tuner.search(x=x_train, y=y_train_onehot, epochs=10, validation_data=(x_test, y_test_onehot))

# Print the best model's summary
tuner.results_summary()
best_model = tuner.get_best_models(num_models=1)[0]
best_model.summary()


Please note I intentinally reduce the max_trials to 3 to code to be runnable, I know it should be a higher number


In [None]:
#train the same process with mnist. as it is easier

import numpy as np
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical

# Load the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Normalize the pixel values
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# One-hot encode the target variables
num_classes = 10
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

# Define the model
model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(128, activation='relu'),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, validation_split=0.1)

# Define a function to evaluate the model on accuracy, F1 score, and ROC AUC score
def evaluate_model(model, X, y):
    y_pred = model.predict(X)
    y_pred_labels = np.argmax(y_pred, axis=1)
    y_true_labels = np.argmax(y, axis=1)
    acc = accuracy_score(y_true_labels, y_pred_labels)
    f1 = f1_score(y_true_labels, y_pred_labels, average='weighted')
    y_pred_prob = model.predict(X)
    roc_auc = roc_auc_score(y, y_pred_prob, average='weighted', multi_class='ovo')
    return acc, f1, roc_auc

# Evaluate the model on the train, validation, and test data
train_acc, train_f1, train_roc_auc = evaluate_model(model, X_train, y_train)
val_acc, val_f1, val_roc_auc = evaluate_model(model, X_test, y_test)
test_acc, test_f1, test_roc_auc = evaluate_model(model, X_test, y_test)

print('Train Accuracy:', train_acc)
print('Train F1 Score:', train_f1)
print('Train ROC AUC Score:', train_roc_auc)
print('Validation Accuracy:', val_acc)
print('Validation F1 Score:', val_f1)
print('Validation ROC AUC Score:', val_roc_auc)
print('Test Accuracy:', test_acc)
print('Test F1 Score:', test_f1)
print('Test ROC AUC Score:', test_roc_auc)


In [None]:
import pandas as pd

# Create a dictionary with the evaluation metrics
metrics_dict = {
    'Accuracy': [train_acc, val_acc, test_acc],
    'F1 Score': [train_f1, val_f1, test_f1],
    'ROC AUC Score': [train_roc_auc, val_roc_auc, test_roc_auc]
}

# Create a pandas DataFrame with the dictionary
df_metrics = pd.DataFrame(metrics_dict, index=['Train', 'Validation', 'Test'])

# Display the DataFrame
print(df_metrics)
df_metrics

In [None]:
#writing the accuracy, f1-score, roc_auc, finding code on cifar10 now:

import numpy as np
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical

# Load the CIFAR-10 dataset
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

# Normalize the pixel values
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# One-hot encode the target variables
num_classes = 10
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

# Define the model
model = Sequential([
    Conv2D(32, (3,3), activation='relu', padding='same', input_shape=(32,32,3)),
    MaxPooling2D((2,2)),
    Conv2D(64, (3,3), activation='relu', padding='same'),
    MaxPooling2D((2,2)),
    Conv2D(128, (3,3), activation='relu', padding='same'),
    MaxPooling2D((2,2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, validation_split=0.1)

# Define a function to evaluate the model on accuracy, F1 score, and ROC AUC score
def evaluate_model(model, X_test, y_test):
    y_pred_probs = model.predict(X_test)
    y_pred_labels = np.argmax(y_pred_probs, axis=1)
    y_true_labels = np.argmax(y_test, axis=1)
    acc = accuracy_score(y_true_labels, y_pred_labels)
    f1 = f1_score(y_true_labels, y_pred_labels, average='weighted')
    roc_auc = roc_auc_score(y_test, y_pred_probs, average='macro', multi_class='ovo')
    return acc, f1, roc_auc, y_true_labels, y_pred_labels

# Evaluate the model
acc, f1, roc_auc, y_true_labels, y_pred_labels = evaluate_model(model, X_test, y_test)
print("Accuracy: {:.4f}, F1 score: {:.4f}, ROC AUC score: {:.4f}".format(acc, f1, roc_auc))


As it was difficult to run this code on my computer becuase lack of space, I did it on colab abd put results here:

Accuracy: 0.7203

F1 score: 0.7182 

ROC AUC score: 0.9584

In [None]:
#Wide and deep model:

import numpy as np
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.layers import Input, Dense, Flatten, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical

# Load the CIFAR-10 dataset
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

# Normalize the pixel values
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# One-hot encode the target variables
num_classes = 10
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

# Define the wide and deep network architecture
input_layer = Input(shape=(32, 32, 3))
flatten_layer = Flatten()(input_layer)
wide_layer = Dense(128, activation='relu')(flatten_layer)
deep_layer_1 = Dense(256, activation='relu')(flatten_layer)
deep_layer_2 = Dense(128, activation='relu')(deep_layer_1)
deep_layer_3 = Dense(64, activation='relu')(deep_layer_2)
concat_layer = Concatenate()([wide_layer, deep_layer_3])
output_layer = Dense(num_classes, activation='softmax')(concat_layer)

model = Model(inputs=input_layer, outputs=output_layer)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

# Evaluate the model
y_pred_probs = model.predict(X_test)
y_pred_labels = np.argmax(y_pred_probs, axis=1)
y_true_labels = np.argmax(y_test, axis=1)
acc = accuracy_score(y_true_labels, y_pred_labels)
f1 = f1_score(y_true_labels, y_pred_labels, average='weighted')
roc_auc = roc_auc_score(y_true_labels, y_pred_labels, average='macro', multi_class='ovo')
print("Wide and deep network accuracy: {:.3f}".format(acc))
print("Wide and deep network F1 score: {:.3f}".format(f1))
print("Wide and deep network ROC AUC score: {:.3f}".format(roc_auc))


As it was difficult to run this code on my computer becuase lack of space, I did it on colab abd put results here:

Wide and deep network accuracy: 0.694

Wide and deep network F1 score: 0.692

Wide and deep network ROC AUC score: 0.825

In comparison to the previous step's deep network, the broad and deep network obtains slightly higher accuracy and F1 score but significantly lower ROC AUC score. However, because the changes between the two models are minor, it is feasible that their performance is equivalent.

In [None]:
!pip install sklearn_genetic


In [None]:
#optimize the parameters of the previously defined model using the sklearn_genetic library:
import sklearn_genetic
from sklearn_genetic import GASearchCV
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
  

# Define the function to create the Keras model
def create_model(optimizer='adam', activation='relu', dropout_rate=0.2):
    model = Sequential([
        Flatten(input_shape=(32, 32, 3)),
        Dense(128, activation=activation),
        Dropout(dropout_rate),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Wrap the Keras model with the scikit-learn API
keras_clf = KerasClassifier(build_fn=create_model, epochs=10, batch_size=64, verbose=0)

# Define the parameter space to be searched
params = {
    'optimizer': ['adam', 'sgd'],
    'activation': ['relu', 'sigmoid'],
    'dropout_rate': [0.2, 0.4, 0.6]
}

# Create the genetic algorithm search object
ga_search = GASearchCV(estimator=keras_clf, cv=StratifiedKFold(), scoring='accuracy', n_population=10, 
                       generations_number=5, params=params)

# Fit the genetic algorithm search object to the data
ga_search.fit(X_train, y_train)

# Print the best parameters found
print("Best parameters found: ", ga_search.best_params_)

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.callbacks import EarlyStopping
from kerastuner.tuners import RandomSearch
import time

# Load the dataset and preprocess it
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train / 255.0
x_test = x_test / 255.0
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

# Define the neural network architecture
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.Conv2D(filters=hp.Int('conv1_filters', min_value=32, max_value=256, step=32),
                            kernel_size=hp.Choice('conv1_kernel', values=[3, 5]),
                            activation='relu',
                            input_shape=(32, 32, 3)))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Conv2D(filters=hp.Int('conv2_filters', min_value=32, max_value=256, step=32),
                            kernel_size=hp.Choice('conv2_kernel', values=[3, 5]),
                            activation='relu'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(units=hp.Int('dense1_units', min_value=32, max_value=512, step=32),
                           activation='relu'))
    model.add(layers.Dropout(rate=hp.Float('dropout1', min_value=0.0, max_value=0.5, step=0.1)))
    model.add(layers.Dense(10, activation='softmax'))
    model.compile(optimizer=keras.optimizers.Adam(hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Create a Keras tuner object and define the search space
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=1,
    directory='tuner_dir',
    project_name='cifar10_cnn')

# Define the early stopping callback
early_stop = EarlyStopping(monitor='val_loss', patience=3)

# Start the hyperparameter search
start_time = time.time()
tuner.search(x_train, y_train, epochs=5, validation_data=(x_test, y_test), callbacks=[early_stop])
end_time = time.time()

# Print the best results
best_model = tuner.get_best_models(num_models=1)[0]
test_loss, test_acc = best_model.evaluate(x_test, y_test)
print('Best model:')
best_model.summary()
print('Test accuracy:', test_acc)

# Print the elapsed time
elapsed_time = end_time - start_time
print('Elapsed time:', elapsed_time, 'seconds')


In [None]:
#after running above code, we obtained best hyperparameters and rewite the code:
#Best learning rate: 0.0008793685539613403
#Best conv1_filters: 64
#Best conv1_kernel: 5
#Best conv2_filters: 32
#Best conv2_kernel: 3
#Best dense1_units: 480
#Best dropout1: 0.4
    
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import cifar10

# Load the dataset and preprocess it
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train / 255.0
x_test = x_test / 255.0
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

# Define the neural network architecture with the best hyperparameters
model = keras.Sequential()
model.add(layers.Conv2D(filters=64, kernel_size=5, activation='relu', input_shape=(32, 32, 3)))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(units=480, activation='relu'))
model.add(layers.Dropout(rate=0.4))
model.add(layers.Dense(10, activation='softmax'))
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(x_train, y_train, epochs=100, validation_data=(x_test, y_test))

# Evaluate the model
acc, f1, roc_auc, y_true_labels, y_pred_labels = evaluate_model(model, x_test, y_test)
print("Accuracy: {:.4f}, F1 score: {:.4f}, ROC AUC score: {:.4f}".format(acc, f1, roc_auc))
