EVOLUTIONARY

In [2]:
import tensorflow as tf
import numpy as np
import sklearn
import keras

versions = {
    "tensorflow": tf.__version__,
    "numpy": np.__version__,
    "scikit-learn": sklearn.__version__,
    "keras": keras.__version__,
}

versions

{'tensorflow': '2.16.1',
 'numpy': '1.26.4',
 'scikit-learn': '1.1.2',
 'keras': '3.1.1'}

In [1]:
import random
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, LSTM, Reshape
from keras.optimizers import Adam, SGD
from keras.datasets import mnist
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

# Load and preprocess dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()  #loading mnist dataset 28X28 images of handwritten 0 to 9
x_train = x_train.astype('float32') / 255   #scaled pixels from 0 to 1
x_test = x_test.astype('float32') / 255

x_train, _, y_train, _ = train_test_split(x_train, y_train, train_size=5000)   # _ to excluse the remaining data as we only consider 5k. 
                                                                                # It holds rest of not needed data
y_train = to_categorical(y_train, 10)  # one hot encode, each label is converted into a vector of size 10
y_test = to_categorical(y_test, 10)   #

def build_model(params):
    model = Sequential()   # linear model
    if params['layer_type'] == 'conv':
        model.add(Reshape((28, 28, 1), input_shape=(28, 28)))
        model.add(Conv2D(params['layers'][0], (3, 3), activation=params['activation'], padding='same'))
        model.add(MaxPooling2D(pool_size=(2, 2)))    #to reduce the spatial dimensions of the data
        for units in params['layers'][1:]:
            model.add(Conv2D(units, (3, 3), activation=params['activation'], padding='same'))
            model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
    elif params['layer_type'] == 'lstm':
        model.add(LSTM(params['layers'][0], activation=params['activation'], input_shape=(28, 28), return_sequences=True))
        for units in params['layers'][1:-1]:
            model.add(LSTM(units, activation=params['activation'], return_sequences=True))
        model.add(LSTM(params['layers'][-1], activation=params['activation']))
    else:  # Dense model
        model.add(Flatten(input_shape=(28, 28)))
        for units in params['layers']:
            model.add(Dense(units, activation=params['activation']))
    model.add(Dense(10, activation='softmax'))
    optimizer = Adam() if params['optimizer'] == 'adam' else SGD()
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def create_population(pop_size):
    population = []
    for _ in range(pop_size):
        params = {
            'num_layers': random.choice([2, 3, 4]),
            'layers': [random.choice([32, 64, 128]) for _ in range(random.choice([2, 3, 4]))],
            'activation': random.choice(['relu', 'tanh']),
            'optimizer': random.choice(['adam', 'sgd']),
            'batch_size': random.choice([32, 64]),
            'epochs': random.choice([5, 10]),
            'layer_type': random.choice(['dense', 'lstm', 'conv'])
        }
        population.append(params)
    return population

def fitness(model, x_train, y_train, x_test, y_test, batch_size, epochs):
    model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)
    _, accuracy = model.evaluate(x_test, y_test, verbose=0)
    return accuracy

def selection(population, fitness_scores):
    selected = np.random.choice(len(population), size=len(population)//2, p=fitness_scores/fitness_scores.sum()) # fitness_scores/fitness_scores.sum() normalizes the fitness scores to sum to 1 
    return [population[i] for i in selected]

def crossover(parent1, parent2):
    child = parent1.copy()
    for key in parent1.keys():
        if random.random() < 0.5:  #there's a 50% chance of choosing the corresponding value from parent2 rather than parent1
            child[key] = parent2[key]
    return child

def mutation(params, mutation_rate):
    if random.random() < mutation_rate:
        params['layers'][random.randint(0, len(params['layers'])-1)] = random.choice([32, 64, 128])
        params['activation'] = random.choice(['relu', 'tanh'])
        params['optimizer'] = random.choice(['adam', 'sgd'])
        params['batch_size'] = random.choice([32, 64])
        params['epochs'] = random.choice([5, 10])
        params['layer_type'] = random.choice(['dense', 'lstm', 'conv'])
    return params

# Run evolutionary algorithm
population_size = 4 
mutation_rate = 0.1 
generations = 4

population = create_population(population_size)

# Track initial population and their accuracies
print("Initial Population Architectures and Accuracies:")
initial_scores = []
for params in population:
    model = build_model(params)
    score = fitness(model, x_train, y_train, x_test, y_test, params['batch_size'], params['epochs'])
    initial_scores.append(score)
    print(f"Architecture: {params}, Accuracy: {score * 100:.2f}%")

# Select the initial parents explicitly and print them
initial_parents = selection(population, np.array(initial_scores))
print("\nInitial Parents for Generation 1:")
for parent in initial_parents:
    print(f"Architecture: {parent}")

# Evolutionary process
for generation in range(generations):
    print(f"\nGeneration {generation+1}/{generations}")
    fitness_scores = []
    for params in population:
        model = build_model(params)
        score = fitness(model, x_train, y_train, x_test, y_test, params['batch_size'], params['epochs'])
        fitness_scores.append(score)
    fitness_scores = np.array(fitness_scores)
    
    print("Current Generation Architectures and Accuracies:")
    for i, (params, score) in enumerate(zip(population, fitness_scores)):
        print(f"Architecture: {params}, Accuracy: {score * 100:.2f}%")
    
    selected_parents = selection(population, fitness_scores)
    next_generation = []
    
    while len(next_generation) < population_size:
        parent1, parent2 = random.sample(selected_parents, 2)
        child = crossover(parent1, parent2)
        child = mutation(child, mutation_rate)
        next_generation.append(child)
    
    population = next_generation

# Final model
best_model_params = population[np.argmax(fitness_scores)]
best_model = build_model(best_model_params)
final_accuracy = best_model.evaluate(x_test, y_test, verbose=0)[1]
print(f"\nFinal Model Architecture: {best_model_params}")
print(f"Final Model Accuracy on Test Set: {final_accuracy * 100:.2f}%")


Initial Population Architectures and Accuracies:


  super().__init__(**kwargs)


Architecture: {'num_layers': 3, 'layers': [32, 32, 128], 'activation': 'relu', 'optimizer': 'adam', 'batch_size': 64, 'epochs': 5, 'layer_type': 'conv'}, Accuracy: 96.95%


  super().__init__(**kwargs)


Architecture: {'num_layers': 4, 'layers': [32, 32, 32], 'activation': 'relu', 'optimizer': 'adam', 'batch_size': 64, 'epochs': 5, 'layer_type': 'lstm'}, Accuracy: 80.27%
Architecture: {'num_layers': 2, 'layers': [32, 128, 128, 64], 'activation': 'tanh', 'optimizer': 'adam', 'batch_size': 32, 'epochs': 5, 'layer_type': 'lstm'}, Accuracy: 92.30%
Architecture: {'num_layers': 2, 'layers': [64, 64, 128], 'activation': 'relu', 'optimizer': 'adam', 'batch_size': 64, 'epochs': 5, 'layer_type': 'conv'}, Accuracy: 95.94%

Initial Parents for Generation 1:
Architecture: {'num_layers': 3, 'layers': [32, 32, 128], 'activation': 'relu', 'optimizer': 'adam', 'batch_size': 64, 'epochs': 5, 'layer_type': 'conv'}
Architecture: {'num_layers': 2, 'layers': [32, 128, 128, 64], 'activation': 'tanh', 'optimizer': 'adam', 'batch_size': 32, 'epochs': 5, 'layer_type': 'lstm'}

Generation 1/4
Current Generation Architectures and Accuracies:
Architecture: {'num_layers': 3, 'layers': [32, 32, 128], 'activation': '

  super().__init__(**kwargs)


Current Generation Architectures and Accuracies:
Architecture: {'num_layers': 4, 'layers': [128, 32, 32], 'activation': 'relu', 'optimizer': 'adam', 'batch_size': 64, 'epochs': 5, 'layer_type': 'lstm'}, Accuracy: 87.34%
Architecture: {'num_layers': 4, 'layers': [128, 32, 32], 'activation': 'relu', 'optimizer': 'adam', 'batch_size': 64, 'epochs': 5, 'layer_type': 'lstm'}, Accuracy: 86.18%
Architecture: {'num_layers': 4, 'layers': [32, 32, 128], 'activation': 'relu', 'optimizer': 'adam', 'batch_size': 64, 'epochs': 10, 'layer_type': 'dense'}, Accuracy: 92.55%
Architecture: {'num_layers': 3, 'layers': [32, 32, 128], 'activation': 'relu', 'optimizer': 'adam', 'batch_size': 64, 'epochs': 5, 'layer_type': 'conv'}, Accuracy: 97.17%

Final Model Architecture: {'num_layers': 4, 'layers': [32, 32, 128], 'activation': 'relu', 'optimizer': 'adam', 'batch_size': 64, 'epochs': 10, 'layer_type': 'dense'}
Final Model Accuracy on Test Set: 10.23%


RANDOM

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist
import random

# Load dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# One-hot encode labels
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

# Hyperparameter search space
param_grid = {
    'num_layers': [2, 3, 4],
    'units': [32, 64, 128],
    'activation': ['relu', 'tanh'],
    'optimizer': ['adam', 'sgd'],
    'batch_size': [32, 64],
    'epochs': [5, 10],
    'layer_type': ['dense', 'lstm', 'conv']
}

# Function to randomly sample hyperparameters from the grid
def random_sample(param_grid):
    params = {key: random.choice(value) for key, value in param_grid.items()}
    return params

# Create a model with varying layers (Dense, LSTM, Conv)
def create_model(num_layers, units, activation, optimizer, layer_type):
    model = keras.Sequential()

    if layer_type == 'conv':
        # Ensure input shape includes the channel dimension
        model.add(keras.layers.Reshape((28, 28, 1), input_shape=(28, 28)))  
        
        for _ in range(num_layers):
            model.add(keras.layers.Conv2D(units, kernel_size=(3, 3), activation=activation, padding='same'))
            model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

        model.add(keras.layers.Flatten())  # Flatten before dense layers
    elif layer_type == 'lstm':
        model.add(keras.layers.Reshape((28, 28), input_shape=(28, 28)))  # Ensure correct shape for LSTM
        for _ in range(num_layers):
            model.add(keras.layers.LSTM(units, activation=activation, return_sequences=True))
        model.add(keras.layers.LSTM(units, activation=activation))  # Final LSTM layer without return_sequences
    elif layer_type == 'dense':
        model.add(keras.layers.Flatten(input_shape=(28, 28)))  # Flatten for fully connected layers
        for _ in range(num_layers):
            model.add(keras.layers.Dense(units, activation=activation))

    model.add(keras.layers.Dense(10, activation='softmax'))  # Output layer for classification

    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Random search
best_accuracy = 0
best_model = None
best_params = None
model_counter = 1  # Initialize model counter
n_iter = 5  # Number of random samples

for _ in range(n_iter):  # Loop for random search iterations
    params = random_sample(param_grid)
    print(f"Evaluating Model {model_counter} with params: {params}")
    
    model = create_model(params['num_layers'], params['units'], params['activation'], params['optimizer'], params['layer_type'])
    history = model.fit(x_train, y_train, epochs=params['epochs'], batch_size=params['batch_size'], validation_data=(x_test, y_test), verbose=0)

    val_accuracy = history.history['val_accuracy'][-1]
    print(f"Validation Accuracy: {val_accuracy}")
    
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        best_model = model
        best_params = params

    model_counter += 1  # Increment model counter

# Output the best model and parameters
print(f"Best Accuracy: {best_accuracy}")
print(f"Best Params: {best_params}")


Evaluating Model 1 with params: {'num_layers': 2, 'units': 32, 'activation': 'tanh', 'optimizer': 'sgd', 'batch_size': 64, 'epochs': 5, 'layer_type': 'conv'}


  super().__init__(**kwargs)


Validation Accuracy: 0.9473000168800354
Evaluating Model 2 with params: {'num_layers': 2, 'units': 64, 'activation': 'tanh', 'optimizer': 'sgd', 'batch_size': 64, 'epochs': 5, 'layer_type': 'dense'}


  super().__init__(**kwargs)


Validation Accuracy: 0.9254000186920166
Evaluating Model 3 with params: {'num_layers': 4, 'units': 64, 'activation': 'tanh', 'optimizer': 'sgd', 'batch_size': 32, 'epochs': 5, 'layer_type': 'dense'}
Validation Accuracy: 0.9527000188827515
Evaluating Model 4 with params: {'num_layers': 2, 'units': 32, 'activation': 'relu', 'optimizer': 'adam', 'batch_size': 32, 'epochs': 5, 'layer_type': 'dense'}
Validation Accuracy: 0.9678000211715698
Evaluating Model 5 with params: {'num_layers': 4, 'units': 128, 'activation': 'tanh', 'optimizer': 'adam', 'batch_size': 64, 'epochs': 5, 'layer_type': 'dense'}
Validation Accuracy: 0.972000002861023
Best Accuracy: 0.972000002861023
Best Params: {'num_layers': 4, 'units': 128, 'activation': 'tanh', 'optimizer': 'adam', 'batch_size': 64, 'epochs': 5, 'layer_type': 'dense'}
