In [None]:
import random
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
import pandas as pd
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization


In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("crawford/emnist")

print("Path to dataset files:", path)

In [None]:
emnist = pd.read_csv("/root/.cache/kagglehub/datasets/crawford/emnist/versions/3/emnist-balanced-train.csv")
emnist_test = pd.read_csv("/root/.cache/kagglehub/datasets/crawford/emnist/versions/3/emnist-balanced-test.csv")

In [None]:
train_labels = emnist.iloc[:, 0].values  # First column contains labels
train_images = emnist.iloc[:, 1:].values  # Remaining columns contain pixel values

# Extract labels and images for test data
test_labels = emnist_test.iloc[:, 0].values  # First column contains labels
test_images = emnist_test.iloc[:, 1:].values  # Remaining columns contain pixel values

In [None]:
print("Unique labels in training data:", np.unique(train_labels))
print("Unique labels in test data:", np.unique(test_labels))


In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Normalize pixel values (0 to 1 range)
train_images, test_images = train_images / 255.0, test_images / 255.0

# **✅ Reshape the images to (28, 28, 1) instead of (784, 1, 1)**
train_images = train_images.reshape(-1, 28, 28, 1)
test_images = test_images.reshape(-1, 28, 28, 1)

num_classes = np.max(train_labels) + 1  # This will be 47

# Convert labels to one-hot encoding
train_labels = to_categorical(train_labels, num_classes)
test_labels = to_categorical(test_labels, num_classes)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

num_classes = 47  # Number of character classes

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    BatchNormalization(),
    MaxPooling2D(2, 2),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),

    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),  # Prevent overfitting
    Dense(num_classes, activation='softmax')  # Ensure 47 output classes
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [None]:
model.fit(train_images, train_labels, epochs=30, batch_size=32, validation_data=(test_images, test_labels))


Epoch 1/30
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 7ms/step - accuracy: 0.5930 - loss: 1.4003 - val_accuracy: 0.8114 - val_loss: 0.5590
Epoch 2/30
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 5ms/step - accuracy: 0.7978 - loss: 0.5959 - val_accuracy: 0.8302 - val_loss: 0.4862
Epoch 3/30
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 5ms/step - accuracy: 0.8204 - loss: 0.5186 - val_accuracy: 0.8288 - val_loss: 0.4873
Epoch 4/30
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 5ms/step - accuracy: 0.8328 - loss: 0.4779 - val_accuracy: 0.8488 - val_loss: 0.4404
Epoch 5/30
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 5ms/step - accuracy: 0.8429 - loss: 0.4442 - val_accuracy: 0.8500 - val_loss: 0.4368
Epoch 6/30
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 5ms/step - accuracy: 0.8459 - loss: 0.4234 - val_accuracy: 0.8579 - val_loss: 0.4237
Epoch 7/30

<keras.src.callbacks.history.History at 0x7972f8c391d0>

In [None]:
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f"Test Accuracy: {test_acc:.4f}")

[1m588/588[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8531 - loss: 0.5172
Test Accuracy: 0.8557


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

from tensorflow.keras.utils import to_categorical

# Define the Wolf Optimization Algorithm (WOA)
def woa_optimize(iterations=10, wolves=5):
    best_lr = None
    best_filters = None
    best_accuracy = 0

    learning_rates = np.random.uniform(0.0001, 0.01, wolves)
    filters = np.random.choice([32, 64, 128], wolves)

    for i in range(iterations):
        for j in range(wolves):
            model = Sequential([
                Conv2D(filters[j], (3, 3), activation='relu', input_shape=(28, 28, 1)),
                BatchNormalization(),
                MaxPooling2D(2, 2),
                Conv2D(64, (3, 3), activation='relu'),
                BatchNormalization(),
                MaxPooling2D(2, 2),
                Flatten(),
                Dense(256, activation='relu'),
                Dropout(0.5),
                Dense(num_classes, activation='softmax')
            ])

            model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rates[j]),
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])

            model.fit(train_images, train_labels, epochs=3, batch_size=32, verbose=0)
            _, accuracy = model.evaluate(test_images, test_labels, verbose=0)

            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_lr = learning_rates[j]
                best_filters = filters[j]

    print(f'Best Learning Rate: {best_lr}, Best Filters: {best_filters}, Best Accuracy: {best_accuracy}')
    return best_lr, best_filters

# Run WOA optimization
best_lr, best_filters = woa_optimize()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Best Learning Rate: 0.0007554813929312193, Best Filters: 64, Best Accuracy: 0.8732379674911499


In [None]:
def create_model(params):
    model = Sequential([
        Conv2D(params['filters'], (3, 3), activation='relu', input_shape=(28, 28, 1)),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(params['dropout']),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=params['learning_rate']),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Train model with best WOA parameters
best_woa_params = {'filters': 64, 'dropout': 0.5, 'learning_rate': 0.0007554813929312193}
best_woa_model = create_model(best_woa_params)
best_woa_model.fit(train_images, train_labels, epochs=30, batch_size=32, verbose=1)


Epoch 1/30
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 5ms/step - accuracy: 0.6148 - loss: 1.3771
Epoch 2/30
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 4ms/step - accuracy: 0.8081 - loss: 0.5789
Epoch 3/30
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 4ms/step - accuracy: 0.8348 - loss: 0.4888
Epoch 4/30
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 4ms/step - accuracy: 0.8496 - loss: 0.4316
Epoch 5/30
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 4ms/step - accuracy: 0.8604 - loss: 0.3895
Epoch 6/30
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 4ms/step - accuracy: 0.8677 - loss: 0.3637
Epoch 7/30
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 4ms/step - accuracy: 0.8747 - loss: 0.3422
Epoch 8/30
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 4ms/step - accuracy: 0.8835 - loss: 0.3184
Epoch 9/30
[1m3

In [None]:
# Evaluate final model
test_loss, test_acc = best_woa_model.evaluate(test_images, test_labels, batch_size=1024)

print(f"Test Accuracy: {test_acc:.4f}")


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.8631 - loss: 0.7093
Test Accuracy: 0.8636


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical

# Load dataset from Kaggle CSV files
train_data = pd.read_csv("/root/.cache/kagglehub/datasets/crawford/emnist/versions/3/emnist-balanced-train.csv")
test_data = pd.read_csv("/root/.cache/kagglehub/datasets/crawford/emnist/versions/3/emnist-balanced-test.csv")

# Extract labels and pixel values
train_labels = train_data.iloc[:, 0].values
test_labels = test_data.iloc[:, 0].values
train_images = train_data.iloc[:, 1:].values
test_images = test_data.iloc[:, 1:].values

# Normalize pixel values
train_images = train_images / 255.0
test_images = test_images / 255.0

# Convert labels to one-hot encoding
num_classes = len(np.unique(train_labels))  # Should be 47 for EMNIST Balanced
train_labels = to_categorical(train_labels, num_classes)
test_labels = to_categorical(test_labels, num_classes)

# Define Ant Colony Optimization (ACO) for Feature Selection
def aco_feature_selection(num_ants=10, iterations=5, num_features=784, alpha=1, beta=1, evaporation=0.5):
    pheromone = np.ones(num_features)  # Initialize pheromone levels for all features
    best_features = None
    best_accuracy = 0

    for _ in range(iterations):
        for ant in range(num_ants):
            selected_features = np.random.choice(range(num_features), size=784, replace=False)


            # Create new dataset with selected features
            train_subset = train_images[:, selected_features]
            test_subset = test_images[:, selected_features]

            # Reshape for CNN compatibility
            train_subset = train_subset.reshape(-1, 28, 28, 1)
            test_subset = test_subset.reshape(-1, 28, 28, 1)

            # Define CNN model
            model = Sequential([
                Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
                BatchNormalization(),
                MaxPooling2D(2, 2),
                Conv2D(64, (3, 3), activation='relu'),
                BatchNormalization(),
                MaxPooling2D(2, 2),
                Flatten(),
                Dense(256, activation='relu'),
                Dropout(0.5),
                Dense(num_classes, activation='softmax')
            ])

            model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])

            model.fit(train_subset, train_labels, epochs=3, batch_size=32, verbose=0)
            _, accuracy = model.evaluate(test_subset, test_labels, verbose=0)

            # Update pheromone levels
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_features = selected_features
            pheromone[selected_features] *= (1 - evaporation) + accuracy  # Update pheromone levels

    print(f'Best Feature Set Selected: {len(best_features)} features')
    print(f'Best Accuracy: {best_accuracy}')
    return best_features

# Run ACO for feature selection
best_features = aco_feature_selection()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Best Feature Set Selected: 784 features
Best Accuracy: 0.7928613424301147


In [None]:
def create_model():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Create and train the model
model = create_model()



In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
import random

# Load dataset from Kaggle CSV files
train_data = pd.read_csv("/root/.cache/kagglehub/datasets/crawford/emnist/versions/3/emnist-balanced-train.csv")
test_data = pd.read_csv("/root/.cache/kagglehub/datasets/crawford/emnist/versions/3/emnist-balanced-test.csv")

# Extract labels and pixel values
train_labels = train_data.iloc[:, 0].values
test_labels = test_data.iloc[:, 0].values
train_images = train_data.iloc[:, 1:].values
test_images = test_data.iloc[:, 1:].values

# Normalize pixel values
train_images = train_images / 255.0
test_images = test_images / 255.0

# Reshape images to (28, 28, 1)
train_images = train_images.reshape(-1, 28, 28, 1)
test_images = test_images.reshape(-1, 28, 28, 1)

# Convert labels to one-hot encoding
num_classes = len(np.unique(train_labels))  # Should be 47 for EMNIST Balanced
train_labels = to_categorical(train_labels, num_classes)
test_labels = to_categorical(test_labels, num_classes)

# Define Bee Algorithm for CNN Architecture Optimization
def bee_algorithm(num_bees=10, iterations=5):
    best_model = None
    best_accuracy = 0
    best_hyperparams = {}

    for _ in range(iterations):
        for bee in range(num_bees):
            # Randomly choose hyperparameters
            num_filters = np.random.choice([32, 64, 128])
            dropout_rate = np.random.uniform(0.3, 0.6)
            learning_rate = np.random.uniform(0.0001, 0.01)

            # Define CNN model
            model = Sequential([
                Conv2D(num_filters, (3, 3), activation='relu', input_shape=(28, 28, 1)),
                BatchNormalization(),
                MaxPooling2D(2, 2),
                Conv2D(64, (3, 3), activation='relu'),
                BatchNormalization(),
                MaxPooling2D(2, 2),
                Flatten(),
                Dense(256, activation='relu'),
                Dropout(dropout_rate),
                Dense(num_classes, activation='softmax')
            ])

            model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])

            model.fit(train_images, train_labels, epochs=3, batch_size=32, verbose=0)
            _, accuracy = model.evaluate(test_images, test_labels, verbose=0)

            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_model = model
                best_hyperparams = {'filters': num_filters, 'dropout': dropout_rate, 'learning_rate': learning_rate}

    print(f'Best Hyperparameters: {best_hyperparams}')
    print(f'Best Accuracy: {best_accuracy}')
    return best_model, best_hyperparams

# Run Bee Algorithm for CNN optimization
best_model, best_hyperparams = bee_algorithm()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


KeyboardInterrupt: 

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
import random

# Load dataset from Kaggle CSV files
train_data = pd.read_csv("/root/.cache/kagglehub/datasets/crawford/emnist/versions/3/emnist-balanced-train.csv")
test_data = pd.read_csv("/root/.cache/kagglehub/datasets/crawford/emnist/versions/3/emnist-balanced-test.csv")

# Extract labels and pixel values
train_labels = train_data.iloc[:, 0].values
test_labels = test_data.iloc[:, 0].values
train_images = train_data.iloc[:, 1:].values
test_images = test_data.iloc[:, 1:].values

# Normalize pixel values
train_images = train_images / 255.0
test_images = test_images / 255.0

# Reshape images to (28, 28, 1)
train_images = train_images.reshape(-1, 28, 28, 1)
test_images = test_images.reshape(-1, 28, 28, 1)

# Convert labels to one-hot encoding
num_classes = len(np.unique(train_labels))  # Should be 47 for EMNIST Balanced
train_labels = to_categorical(train_labels, num_classes)
test_labels = to_categorical(test_labels, num_classes)

# Define Genetic Algorithm (GA) for Hyperparameter Optimization
def create_model(params):
    model = Sequential([
        Conv2D(params['filters'], (3, 3), activation='relu', input_shape=(28, 28, 1)),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(params['dropout']),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=params['learning_rate']),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Genetic Algorithm for Optimization
def genetic_algorithm(pop_size=10, generations=5, mutation_rate=0.2):
    population = [{'filters': random.choice([32, 64, 128]),
                   'dropout': random.uniform(0.3, 0.6),
                   'learning_rate': random.uniform(0.0001, 0.01)} for _ in range(pop_size)]

    for generation in range(generations):
        scores = []
        for individual in population:
            model = create_model(individual)
            model.fit(train_images, train_labels, epochs=3, batch_size=32, verbose=0)
            _, accuracy = model.evaluate(test_images, test_labels, verbose=0)
            scores.append((individual, accuracy))

        scores.sort(key=lambda x: x[1], reverse=True)
        print(f'Generation {generation + 1} - Best Accuracy: {scores[0][1]}')

        # Select top individuals
        new_population = [x[0] for x in scores[:pop_size // 2]]

        # Crossover
        for _ in range(pop_size // 2):
            parent1, parent2 = random.sample(new_population, 2)
            child = {key: random.choice([parent1[key], parent2[key]]) for key in parent1}

            # Mutation
            if random.random() < mutation_rate:
                child['filters'] = random.choice([32, 64, 128])
                child['dropout'] = random.uniform(0.3, 0.6)
                child['learning_rate'] = random.uniform(0.0001, 0.01)

            new_population.append(child)

        population = new_population

    best_params = scores[0][0]
    print(f'Best Hyperparameters: {best_params}')
    return best_params

# Run GA optimization
best_params = genetic_algorithm()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Generation 1 - Best Accuracy: 0.8398318886756897
Generation 2 - Best Accuracy: 0.8608436584472656
Generation 3 - Best Accuracy: 0.8623862862586975
Generation 4 - Best Accuracy: 0.8621734976768494
Generation 5 - Best Accuracy: 0.8628118634223938
Best Hyperparameters: {'filters': 64, 'dropout': 0.38660366275446795, 'learning_rate': 0.0002248029413911395}


In [None]:
emnist.shape

(112799, 785)

In [None]:
emnist_test.shape

(18799, 785)

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
import random

# Load dataset from Kaggle CSV files
train_data = pd.read_csv("/root/.cache/kagglehub/datasets/crawford/emnist/versions/3/emnist-balanced-train.csv")
test_data = pd.read_csv("/root/.cache/kagglehub/datasets/crawford/emnist/versions/3/emnist-balanced-test.csv")

# Extract labels and pixel values
train_labels = train_data.iloc[:, 0].values
test_labels = test_data.iloc[:, 0].values
train_images = train_data.iloc[:, 1:].values
test_images = test_data.iloc[:, 1:].values

# Normalize pixel values
train_images = train_images / 255.0
test_images = test_images / 255.0

# Reshape images to (28, 28, 1)
train_images = train_images.reshape(-1, 28, 28, 1)
test_images = test_images.reshape(-1, 28, 28, 1)

# Convert labels to one-hot encoding
num_classes = len(np.unique(train_labels))  # Should be 47 for EMNIST Balanced
train_labels = to_categorical(train_labels, num_classes)
test_labels = to_categorical(test_labels, num_classes)

# Define Genetic Algorithm (GA) for Hyperparameter Optimization
def create_model():
    model = Sequential([
        Conv2D(64, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.38660366275446795),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002248029413911395),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model



In [None]:
best_model = create_model()
best_model.fit(train_images, train_labels, epochs=15, batch_size=32, validation_data=(train_images, train_labels))

# Evaluate final model
test_loss, test_acc = best_model.evaluate(test_images, test_labels)
print(f"Test Accuracy: {test_acc:.4f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/15
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 7ms/step - accuracy: 0.6218 - loss: 1.3458 - val_accuracy: 0.8671 - val_loss: 0.4001
Epoch 2/15
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 6ms/step - accuracy: 0.8248 - loss: 0.5294 - val_accuracy: 0.8846 - val_loss: 0.3286
Epoch 3/15
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 6ms/step - accuracy: 0.8541 - loss: 0.4268 - val_accuracy: 0.9021 - val_loss: 0.2690
Epoch 4/15
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 6ms/step - accuracy: 0.8690 - loss: 0.3669 - val_accuracy: 0.9163 - val_loss: 0.2324
Epoch 5/15
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 6ms/step - accuracy: 0.8859 - loss: 0.3167 - val_accuracy: 0.9226 - val_loss: 0.2085
Epoch 6/15
[1m3525/3525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 6ms/step - accuracy: 0.8944 - loss: 0.2879 - val_accuracy: 0.9330 - val_loss: 0.1797
Epoch 7/15