# Optimization of Neural Network Hyperparameters using PSO and Grid Search

In this notebook, we compare Particle Swarm Optimization (PSO) with Grid Search for optimizing the hyperparameters and architecture of a neural network using a subset of the CIFAR-10 dataset.

In [1]:
import numpy as np
import time
import psutil  # For monitoring memory usage
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import ParameterGrid
from pyswarm import pso

# Define maximum available memory in bytes
max_memory_bytes = psutil.virtual_memory().available

# Set maximum data size
max_train_samples = 2000
max_test_samples = 500

# Load CIFAR-10 data
(x_train_full, y_train_full), (x_test_full, y_test_full) = cifar10.load_data()

# Use only a portion of the data for acceleration and reduced memory consumption
x_train = x_train_full[:max_train_samples]
y_train = y_train_full[:max_train_samples]
x_test = x_test_full[:max_test_samples]
y_test = y_test_full[:max_test_samples]

# Normalize data
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Convert labels to one-hot encoding
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [2]:
# Function to create a model
def create_model(num_neurons, learning_rate, dropout_rate, l2_rate, num_layers):
    model = Sequential()
    model.add(Flatten(input_shape=(32, 32, 3)))
    for _ in range(int(num_layers)):
        model.add(Dense(int(num_neurons), activation='relu', kernel_regularizer=l2(l2_rate)))
        model.add(Dropout(dropout_rate))
    model.add(Dense(10, activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Function to evaluate the model
def evaluate_model(num_neurons, learning_rate, dropout_rate, l2_rate, num_layers):
    model = create_model(num_neurons, learning_rate, dropout_rate, l2_rate, num_layers)
    history = model.fit(x_train, y_train, epochs=1, batch_size=32, verbose=0, validation_data=(x_test, y_test))
    loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
    tf.keras.backend.clear_session()  # Ensure to clear session properly
    return accuracy

In [3]:
# Function for Grid Search considering memory constraint
def grid_search(params_grid):
    best_params = None
    best_accuracy = 0.0
    for params in ParameterGrid(params_grid):
        memory_estimate = params['num_neurons'] * params['num_layers'] * 4 * max_train_samples
        if memory_estimate > max_memory_bytes:
            continue  # Skip parameters if exceeding available memory
        accuracy = evaluate_model(params['num_neurons'], params['learning_rate'], params['dropout_rate'], params['l2_rate'], params['num_layers'])
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_params = params
    return best_params, best_accuracy

# Function for PSO
def pso_objective(params):
    num_neurons, learning_rate, dropout_rate, l2_rate, num_layers = params
    memory_estimate = num_neurons * num_layers * 4 * max_train_samples
    if memory_estimate > max_memory_bytes:
        return -1  # Return negative value if exceeding available memory
    accuracy = evaluate_model(num_neurons, learning_rate, dropout_rate, l2_rate, num_layers)
    return -accuracy  # PSO minimizes the objective function, so use negative accuracy

In [4]:
# Define the parameter grid for Grid Search
params_grid = {
    'num_neurons': [64, 128, 256],
    'learning_rate': [0.001, 0.01, 0.1],
    'dropout_rate': [0.2, 0.4, 0.6],
    'l2_rate': [0.01, 0.001],
    'num_layers': [1, 2, 3]
}

# Run Grid Search
start_time = time.time()
best_params_grid, best_accuracy_grid = grid_search(params_grid)
grid_search_time = time.time() - start_time
print(f"Grid Search took {grid_search_time:.3f} seconds.")
print(f"Best parameters: {best_params_grid}")
print(f"Best accuracy: {best_accuracy_grid:.4f}")

In [5]:
# Define the lower and upper bounds for PSO
lb = [64, 0.001, 0.2, 0.001, 1]  # Lower bounds
ub = [256, 0.1, 0.6, 0.01, 3]   # Upper bounds

# Run PSO
start_time = time.time()
best_params_pso, best_score_pso = pso(pso_objective, lb, ub, swarmsize=10, maxiter=5)
pso_time = time.time() - start_time
print(f"PSO took {pso_time:.3f} seconds.")
print(f"Best parameters: {best_params_pso}")
print(f"Best accuracy: {-best_score_pso:.4f}")  # Convert back to positive accuracy

In [6]:
# Visualization of results
labels = ['Grid Search', 'PSO']
accuracies = [best_accuracy_grid, -best_score_pso]  # Convert PSO score to positive
times = [grid_search_time, pso_time]

fig, ax1 = plt.subplots()
color = 'tab:blue'
ax1.set_xlabel('Method')
ax1.set_ylabel('Accuracy', color=color)
ax1.bar(labels, accuracies, color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()
color = 'tab:red'
ax2.set_ylabel('Time (s)', color=color)
ax2.plot(labels, times, color=color, marker='o')
ax2.tick_params(axis='y', labelcolor=color)

plt.title('Comparison of Grid Search and PSO')
fig.tight_layout()
plt.show()