# Import Libraries

In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import time
import os
from sklearn.metrics import confusion_matrix

# Check for GPU availability

In [2]:
# Check for GPU availability
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU is available and memory growth is enabled.")
    except RuntimeError as e:
        print(e)
else:
    print("No GPU found. Running on CPU.")

No GPU found. Running on CPU.


# Disable eager execution for TensorFlow v1 compatibility

- to use graphs of tensorflow v-1.x

In [3]:
# Disable eager execution for TensorFlow v1 compatibility
tf.compat.v1.disable_eager_execution()




# Load MNIST dataset

In [4]:
# Load MNIST dataset
mnist, info = tfds.load('mnist', with_info=True, as_supervised=True)

# Create results directory

In [5]:
# Create results directory
folder = "results_lr0.01"
os.makedirs(folder, exist_ok=True)

# defining activation function and preprocess function

In [6]:
def activation_function(x):
    return tf.nn.relu(x)

def preprocess(image, label):
    image = tf.reshape(image, [784])
    image = tf.cast(image, tf.float32) / 255.0
    label = tf.one_hot(label, 10)
    return image, label

# made a function to plot_and_save_results

In [7]:
def plot_and_save_results(loss_history, accuracy_history, cm, filename):
    plt.figure(figsize=(18, 5))

    # Loss Curve
    plt.subplot(1, 3, 1)
    plt.plot(loss_history, label='Loss', color='blue')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss Curve')
    plt.legend()

    # Accuracy Curve
    plt.subplot(1, 3, 2)
    plt.plot(accuracy_history, label='Accuracy', color='green')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Accuracy Curve')
    plt.legend()

    # Confusion Matrix
    plt.subplot(1, 3, 3)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=range(10), yticklabels=range(10))
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title('Confusion Matrix')

    # Save combined image
    plt.savefig(filename)
    plt.close()

# defining train_and_evaluate function

In [8]:
def train_and_evaluate(hidden_layers, learning_rate):
    start_time = time.time()
    train_data = mnist['train'].map(preprocess).shuffle(60000).batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)
    test_data = mnist['test'].map(preprocess).batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)

    iterator = tf.compat.v1.data.make_initializable_iterator(train_data)
    next_element = iterator.get_next()

    X = tf.compat.v1.placeholder(tf.float32, [None, 784])
    Y = tf.compat.v1.placeholder(tf.float32, [None, 10])

    weights = {}
    biases = {}
    prev_size = 784
    layer = X

    for i, size in enumerate(hidden_layers):
        weights[f'h{i+1}'] = tf.Variable(tf.random.normal([prev_size, size]))
        biases[f'b{i+1}'] = tf.Variable(tf.random.normal([size]))
        layer = activation_function(tf.add(tf.matmul(layer, weights[f'h{i+1}']), biases[f'b{i+1}']))
        prev_size = size

    weights['out'] = tf.Variable(tf.random.normal([prev_size, 10]))
    biases['out'] = tf.Variable(tf.random.normal([10]))
    logits = tf.add(tf.matmul(layer, weights['out']), biases['out'])

    loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))
    optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss_op)
    correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    init = tf.compat.v1.global_variables_initializer()
    loss_history = []
    accuracy_history = []

    with tf.compat.v1.Session() as sess:
        sess.run(init)
        sess.run(iterator.initializer)
        for epoch in range(epochs):
            epoch_loss = []
            epoch_acc = []
            try:
                while True:
                    batch_x, batch_y = sess.run(next_element)
                    _, loss, acc = sess.run([train_op, loss_op, accuracy], feed_dict={X: batch_x, Y: batch_y})
                    epoch_loss.append(loss)
                    epoch_acc.append(acc)
            except tf.errors.OutOfRangeError:
                sess.run(iterator.initializer)

            # Compute and store average loss/accuracy per epoch
            loss_history.append(np.mean(epoch_loss))
            accuracy_history.append(np.mean(epoch_acc))


        print(f"Completed: Layers={hidden_layers}, LR={learning_rate}")
        test_iterator = tf.compat.v1.data.make_initializable_iterator(test_data)
        next_test_element = test_iterator.get_next()
        sess.run(test_iterator.initializer)
        test_acc = 0
        test_count = 0
        y_true, y_pred = [], []

        while True:
            try:
                test_images, test_labels = sess.run(next_test_element)
                acc, preds = sess.run([accuracy, tf.argmax(logits, 1)], feed_dict={X: test_images, Y: test_labels})
                y_true.extend(np.argmax(test_labels, axis=1))
                y_pred.extend(preds)
                test_acc += acc
                test_count += 1
            except tf.errors.OutOfRangeError:
                break

        test_acc /= test_count
        cm = confusion_matrix(y_true, y_pred)
        execution_time = time.time() - start_time

        # Ensure the directory exists
        subfolder = f"{folder}/relu_{hidden_layers}_{learning_rate}"
        os.makedirs(subfolder, exist_ok=True)

        plot_filename = f"{subfolder}/results_{hidden_layers}_{learning_rate}.png"

        # Save combined loss, accuracy, and confusion matrix plot
        plot_and_save_results(loss_history, accuracy_history, cm, plot_filename)

        return loss_history, accuracy_history, test_acc, cm, execution_time, plot_filename

# Define Parameters ()

In [9]:
# Parameters
batch_size = 10 # Updated batch size
epochs = 50
activations = ['relu']
hidden_layer_sizes_double_layer = [(160,100), (100,160), (100,100), (100,60), (60,60)]
# learning_rates = [1, 0.1 , 0.01, 0.001]
learning_rates = [0.01]

# looping through all variation defined

In [10]:
results_double_layer = []
for hidden_sizes in hidden_layer_sizes_double_layer:
    for lr in learning_rates:
        loss_hist, acc_hist, test_acc, cm, exec_time, plot_file = train_and_evaluate(hidden_sizes, lr)
        results_double_layer.append([hidden_sizes, lr, loss_hist[-1], acc_hist[-1], test_acc, exec_time])

columns = ["Hidden Layers", "Learning Rate", "Final Loss", "Final Accuracy", "Test Accuracy", "Execution Time"]
df_double_layer = pd.DataFrame(results_double_layer, columns=columns)

# Save results as CSV file
df_double_layer.to_csv(f"{folder}/training_results.csv", index=False)

print("Results saved to training_results.csv")













Completed: Layers=(160, 100), LR=0.01
Completed: Layers=(100, 160), LR=0.01
Completed: Layers=(100, 100), LR=0.01
Completed: Layers=(100, 60), LR=0.01
Completed: Layers=(60, 60), LR=0.01
Results saved to training_results.csv
