In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds

# Load and preprocess MNIST dataset
ds, info = tfds.load("mnist", as_supervised=True, with_info=True)
train_dataset, test_dataset = ds["train"], ds["test"]

BATCH_SIZE = 10
LEARNING_RATE = 0.1
EPOCHS = 50
HIDDEN_UNITS = 256

# Preprocessing function
def preprocess(image, label):
    image = tf.cast(image, tf.float32) / 255.0  # Normalize
    image = tf.reshape(image, [-1])  # Flatten to 784
    label = tf.one_hot(label, depth=10)
    return image, label

train_dataset = train_dataset.map(preprocess).shuffle(10000).batch(BATCH_SIZE)
test_dataset = test_dataset.map(preprocess).batch(BATCH_SIZE)

# Model parameters
input_dim = 784
output_dim = 10

# Initialize weights and biases
W1 = tf.Variable(tf.random.normal([input_dim, HIDDEN_UNITS], stddev=0.1))
b1 = tf.Variable(tf.zeros([HIDDEN_UNITS]))
W2 = tf.Variable(tf.random.normal([HIDDEN_UNITS, output_dim], stddev=0.1))
b2 = tf.Variable(tf.zeros([output_dim]))

# Forward pass
def model(x):
    hidden = tf.nn.relu(tf.matmul(x, W1) + b1)
    logits = tf.matmul(hidden, W2) + b2
    return logits

# Loss function
def compute_loss(logits, labels):
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))

# Accuracy function
def compute_accuracy(dataset):
    correct, total = 0, 0
    for x, y in dataset:
        logits = model(x)
        correct += tf.reduce_sum(tf.cast(tf.equal(tf.argmax(logits, axis=1), tf.argmax(y, axis=1)), tf.float32)).numpy()
        total += x.shape[0]
    return correct / total

# Optimizer
optimizer = tf.optimizers.SGD(learning_rate=LEARNING_RATE)

# Training step
def train_step(x, y):
    with tf.GradientTape() as tape:
        logits = model(x)
        loss = compute_loss(logits, y)
    grads = tape.gradient(loss, [W1, b1, W2, b2])
    optimizer.apply_gradients(zip(grads, [W1, b1, W2, b2]))
    return loss

# Training loop
for epoch in range(EPOCHS):
    total_loss = 0
    for x_batch, y_batch in train_dataset:
        total_loss += train_step(x_batch, y_batch).numpy()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

# Evaluation
train_acc = compute_accuracy(train_dataset)
test_acc = compute_accuracy(test_dataset)

print(f"Training Accuracy: {train_acc:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")


Epoch 1, Loss: 1236.5002
Epoch 2, Loss: 526.6897
Epoch 3, Loss: 353.7360
Epoch 4, Loss: 253.7513
Epoch 5, Loss: 185.6968
Epoch 6, Loss: 132.8225
Epoch 7, Loss: 87.4903
Epoch 8, Loss: 62.7368
Epoch 9, Loss: 33.7113
Epoch 10, Loss: 26.2955
Epoch 11, Loss: 15.6682
Epoch 12, Loss: 10.4945
Epoch 13, Loss: 8.2548
Epoch 14, Loss: 6.6649
Epoch 15, Loss: 5.8708
Epoch 16, Loss: 5.2598
Epoch 17, Loss: 4.7984
Epoch 18, Loss: 4.3838
Epoch 19, Loss: 3.9996
Epoch 20, Loss: 3.7404
Epoch 21, Loss: 3.5173
Epoch 22, Loss: 3.2855
Epoch 23, Loss: 3.1075
Epoch 24, Loss: 2.9654
Epoch 25, Loss: 2.8098
Epoch 26, Loss: 2.6599
Epoch 27, Loss: 2.5424
Epoch 28, Loss: 2.4251
Epoch 29, Loss: 2.3370
Epoch 30, Loss: 2.2296
Epoch 31, Loss: 2.1379
Epoch 32, Loss: 2.0657
Epoch 33, Loss: 1.9849
Epoch 34, Loss: 1.9239
Epoch 35, Loss: 1.8466
Epoch 36, Loss: 1.7988
Epoch 37, Loss: 1.7408
Epoch 38, Loss: 1.6834
Epoch 39, Loss: 1.6330
Epoch 40, Loss: 1.5833
Epoch 41, Loss: 1.5393
Epoch 42, Loss: 1.4980
Epoch 43, Loss: 1.4573
E

In [4]:
#First Combination : Hidden Layers: (160, 100), Learning Rate: 0.001

import tensorflow as tf
import tensorflow_datasets as tfds

ds, info = tfds.load("mnist", as_supervised=True, with_info=True)
train_dataset, test_dataset = ds["train"], ds["test"]

BATCH_SIZE = 10
EPOCHS = 50

def preprocess(image, label):
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.reshape(image, [-1])
    label = tf.one_hot(label, depth=10)
    return image, label

train_dataset = train_dataset.map(preprocess).shuffle(10000).batch(BATCH_SIZE)
test_dataset = test_dataset.map(preprocess).batch(BATCH_SIZE)

input_dim = 784
output_dim = 10

W1 = tf.Variable(tf.random.normal([input_dim, 160], stddev=0.1))
b1 = tf.Variable(tf.zeros([160]))
W2 = tf.Variable(tf.random.normal([160, 100], stddev=0.1))
b2 = tf.Variable(tf.zeros([100]))
W3 = tf.Variable(tf.random.normal([100, output_dim], stddev=0.1))
b3 = tf.Variable(tf.zeros([output_dim]))

def model(x):
    hidden1 = tf.nn.relu(tf.matmul(x, W1) + b1)
    hidden2 = tf.nn.relu(tf.matmul(hidden1, W2) + b2)
    logits = tf.matmul(hidden2, W3) + b3
    return logits

def compute_loss(logits, labels):
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))

def compute_accuracy(dataset):
    correct, total = 0, 0
    for x, y in dataset:
        logits = model(x)
        correct += tf.reduce_sum(tf.cast(tf.equal(tf.argmax(logits, axis=1), tf.argmax(y, axis=1)), tf.float32)).numpy()
        total += x.shape[0]
    return correct / total

optimizer = tf.optimizers.SGD(learning_rate=0.001)

def train_step(x, y):
    with tf.GradientTape() as tape:
        logits = model(x)
        loss = compute_loss(logits, y)
    grads = tape.gradient(loss, [W1, b1, W2, b2, W3, b3])
    optimizer.apply_gradients(zip(grads, [W1, b1, W2, b2, W3, b3]))
    return loss

for epoch in range(EPOCHS):
    total_loss = 0
    for x_batch, y_batch in train_dataset:
        total_loss += train_step(x_batch, y_batch).numpy()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")
train_acc = compute_accuracy(train_dataset)
test_acc = compute_accuracy(test_dataset)

print(f"Training Accuracy: {train_acc:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")


Epoch 1, Loss: 6629.4944
Epoch 2, Loss: 2703.2928
Epoch 3, Loss: 2173.2896
Epoch 4, Loss: 1928.7537
Epoch 5, Loss: 1768.4554
Epoch 6, Loss: 1647.1029
Epoch 7, Loss: 1548.5870
Epoch 8, Loss: 1463.8393
Epoch 9, Loss: 1390.2822
Epoch 10, Loss: 1325.1940
Epoch 11, Loss: 1265.1961
Epoch 12, Loss: 1213.1845
Epoch 13, Loss: 1163.2986
Epoch 14, Loss: 1118.5109
Epoch 15, Loss: 1076.2018
Epoch 16, Loss: 1037.7070
Epoch 17, Loss: 1002.8625
Epoch 18, Loss: 969.1000
Epoch 19, Loss: 937.2247
Epoch 20, Loss: 908.4850
Epoch 21, Loss: 879.9602
Epoch 22, Loss: 853.3430
Epoch 23, Loss: 828.6375
Epoch 24, Loss: 805.7184
Epoch 25, Loss: 783.0736
Epoch 26, Loss: 761.7456
Epoch 27, Loss: 741.3450
Epoch 28, Loss: 721.4231
Epoch 29, Loss: 704.4133
Epoch 30, Loss: 686.5747
Epoch 31, Loss: 669.6128
Epoch 32, Loss: 653.5264
Epoch 33, Loss: 637.9632
Epoch 34, Loss: 622.6029
Epoch 35, Loss: 608.6524
Epoch 36, Loss: 594.3446
Epoch 37, Loss: 581.8305
Epoch 38, Loss: 568.5124
Epoch 39, Loss: 555.9770
Epoch 40, Loss: 5

In [5]:
#Second Combination : Hidden Layers: (100, 100), Learning Rate: 0.001

import tensorflow as tf
import tensorflow_datasets as tfds

# Load and preprocess MNIST dataset
dataset, info = tfds.load("mnist", as_supervised=True, with_info=True)
train_dataset, test_dataset = dataset["train"], dataset["test"]

BATCH_SIZE = 10

def preprocess(image, label):
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.reshape(image, [-1])
    label = tf.one_hot(label, depth=10)
    return image, label

train_dataset = train_dataset.map(preprocess).shuffle(10000).batch(BATCH_SIZE)
test_dataset = test_dataset.map(preprocess).batch(BATCH_SIZE)

# Model parameters
input_dim = 784
hidden_dim1 = 100
hidden_dim2 = 100
output_dim = 10

# Weights and biases
W1 = tf.Variable(tf.random.normal([input_dim, hidden_dim1], stddev=0.1))
b1 = tf.Variable(tf.zeros([hidden_dim1]))
W2 = tf.Variable(tf.random.normal([hidden_dim1, hidden_dim2], stddev=0.1))
b2 = tf.Variable(tf.zeros([hidden_dim2]))
W3 = tf.Variable(tf.random.normal([hidden_dim2, output_dim], stddev=0.1))
b3 = tf.Variable(tf.zeros([output_dim]))

# Forward pass
def model(x):
    hidden_layer1 = tf.nn.relu(tf.matmul(x, W1) + b1)
    hidden_layer2 = tf.nn.relu(tf.matmul(hidden_layer1, W2) + b2)
    logits = tf.matmul(hidden_layer2, W3) + b3
    return logits

# Loss
def compute_loss(logits, labels):
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))

# Accuracy
def compute_accuracy(dataset):
    correct_preds, total_samples = 0, 0
    for images, labels in dataset:
        logits = model(images)
        correct_preds += tf.reduce_sum(
            tf.cast(tf.equal(tf.argmax(logits, axis=1), tf.argmax(labels, axis=1)), tf.float32)
        ).numpy()
        total_samples += images.shape[0]
    return correct_preds / total_samples

# Optimizer
optimizer = tf.optimizers.SGD(learning_rate=0.001, momentum=0.9)

# Training step
def train_step(images, labels):
    with tf.GradientTape() as tape:
        logits = model(images)
        loss = compute_loss(logits, labels)
    gradients = tape.gradient(loss, [W1, b1, W2, b2, W3, b3])
    optimizer.apply_gradients(zip(gradients, [W1, b1, W2, b2, W3, b3]))
    return loss

# Training loop
epochs = 50
for epoch in range(epochs):
    total_loss = 0.0
    for images, labels in train_dataset:
        loss = train_step(images, labels)
        total_loss += loss.numpy()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

# Final Accuracy
train_accuracy = compute_accuracy(train_dataset)
test_accuracy = compute_accuracy(test_dataset)

print(f"Final Training Accuracy: {train_accuracy:.4f}")
print(f"Final Test Accuracy: {test_accuracy:.4f}")


Epoch 1, Loss: 2689.8076
Epoch 2, Loss: 1293.6915
Epoch 3, Loss: 960.7744
Epoch 4, Loss: 771.6429
Epoch 5, Loss: 651.0601
Epoch 6, Loss: 556.9665
Epoch 7, Loss: 491.0601
Epoch 8, Loss: 428.8547
Epoch 9, Loss: 385.7844
Epoch 10, Loss: 346.3165
Epoch 11, Loss: 310.9530
Epoch 12, Loss: 276.7634
Epoch 13, Loss: 253.1478
Epoch 14, Loss: 225.5344
Epoch 15, Loss: 205.9539
Epoch 16, Loss: 187.3332
Epoch 17, Loss: 167.7503
Epoch 18, Loss: 149.6699
Epoch 19, Loss: 137.9636
Epoch 20, Loss: 124.3720
Epoch 21, Loss: 110.6074
Epoch 22, Loss: 100.1208
Epoch 23, Loss: 88.6851
Epoch 24, Loss: 80.7181
Epoch 25, Loss: 72.9712
Epoch 26, Loss: 64.9328
Epoch 27, Loss: 57.0403
Epoch 28, Loss: 51.9564
Epoch 29, Loss: 46.0276
Epoch 30, Loss: 40.5365
Epoch 31, Loss: 37.7831
Epoch 32, Loss: 33.8392
Epoch 33, Loss: 30.7200
Epoch 34, Loss: 27.5514
Epoch 35, Loss: 24.8358
Epoch 36, Loss: 22.6352
Epoch 37, Loss: 21.7637
Epoch 38, Loss: 19.4568
Epoch 39, Loss: 18.1118
Epoch 40, Loss: 16.3238
Epoch 41, Loss: 14.9880
E

In [6]:
#Third Combination : Hidden Layers: (100, 100), Learning Rate: 0.1

import tensorflow as tf
import tensorflow_datasets as tfds

# Load and preprocess MNIST dataset
dataset, info = tfds.load("mnist", as_supervised=True, with_info=True)
train_dataset, test_dataset = dataset["train"], dataset["test"]

BATCH_SIZE = 10

def preprocess(image, label):
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.reshape(image, [-1])
    label = tf.one_hot(label, depth=10)
    return image, label

train_dataset = train_dataset.map(preprocess).shuffle(10000).batch(BATCH_SIZE)
test_dataset = test_dataset.map(preprocess).batch(BATCH_SIZE)

# Model parameters
input_dim = 784
hidden_dim1 = 100
hidden_dim2 = 100
output_dim = 10

# Initialize weights and biases
W1 = tf.Variable(tf.random.normal([input_dim, hidden_dim1], stddev=0.1))
b1 = tf.Variable(tf.zeros([hidden_dim1]))
W2 = tf.Variable(tf.random.normal([hidden_dim1, hidden_dim2], stddev=0.1))
b2 = tf.Variable(tf.zeros([hidden_dim2]))
W3 = tf.Variable(tf.random.normal([hidden_dim2, output_dim], stddev=0.1))
b3 = tf.Variable(tf.zeros([output_dim]))

# Forward pass
def model(x):
    hidden_layer1 = tf.nn.relu(tf.matmul(x, W1) + b1)
    hidden_layer2 = tf.nn.relu(tf.matmul(hidden_layer1, W2) + b2)
    logits = tf.matmul(hidden_layer2, W3) + b3
    return logits

# Loss function
def compute_loss(logits, labels):
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))

# Accuracy function
def compute_accuracy(dataset):
    correct_preds, total_samples = 0, 0
    for images, labels in dataset:
        logits = model(images)
        correct_preds += tf.reduce_sum(
            tf.cast(tf.equal(tf.argmax(logits, axis=1), tf.argmax(labels, axis=1)), tf.float32)
        ).numpy()
        total_samples += images.shape[0]
    return correct_preds / total_samples

# Optimizer
optimizer = tf.optimizers.SGD(learning_rate=0.1, momentum=0.9)

# Training step
def train_step(images, labels):
    with tf.GradientTape() as tape:
        logits = model(images)
        loss = compute_loss(logits, labels)
    gradients = tape.gradient(loss, [W1, b1, W2, b2, W3, b3])
    optimizer.apply_gradients(zip(gradients, [W1, b1, W2, b2, W3, b3]))
    return loss

# Training loop
epochs = 50
for epoch in range(epochs):
    total_loss = 0.0
    for images, labels in train_dataset:
        loss = train_step(images, labels)
        total_loss += loss.numpy()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

# Evaluation
train_accuracy = compute_accuracy(train_dataset)
test_accuracy = compute_accuracy(test_dataset)

print(f"Final Training Accuracy: {train_accuracy:.4f}")
print(f"Final Test Accuracy: {test_accuracy:.4f}")


Epoch 1, Loss: 12708.3774
Epoch 2, Loss: 13936.1695
Epoch 3, Loss: 13943.1357
Epoch 4, Loss: 13946.1079
Epoch 5, Loss: 13943.6817
Epoch 6, Loss: 13942.0946
Epoch 7, Loss: 13944.3285
Epoch 8, Loss: 13939.1488
Epoch 9, Loss: 13942.9906
Epoch 10, Loss: 13944.3337
Epoch 11, Loss: 13947.6421
Epoch 12, Loss: 13939.2160
Epoch 13, Loss: 13948.8333
Epoch 14, Loss: 13942.3898
Epoch 15, Loss: 13947.9921
Epoch 16, Loss: 13952.3817
Epoch 17, Loss: 13942.8294
Epoch 18, Loss: 13945.2337
Epoch 19, Loss: 13945.3121
Epoch 20, Loss: 13943.2357
Epoch 21, Loss: 13947.4088
Epoch 22, Loss: 13939.5406
Epoch 23, Loss: 13939.1945
Epoch 24, Loss: 13941.3369
Epoch 25, Loss: 13937.5437
Epoch 26, Loss: 13940.6041
Epoch 27, Loss: 13948.4225
Epoch 28, Loss: 13946.8216
Epoch 29, Loss: 13940.1649
Epoch 30, Loss: 13943.2302
Epoch 31, Loss: 13939.6182
Epoch 32, Loss: 13943.6881
Epoch 33, Loss: 13943.9917
Epoch 34, Loss: 13945.4014
Epoch 35, Loss: 13940.7708
Epoch 36, Loss: 13938.9744
Epoch 37, Loss: 13944.9024
Epoch 38, 

In [7]:
#Fouth Combination : Hidden Layers: (100, 100), Learning Rate: 1

import tensorflow as tf
import tensorflow_datasets as tfds

# Load and preprocess MNIST dataset
dataset, info = tfds.load("mnist", as_supervised=True, with_info=True)
train_dataset, test_dataset = dataset["train"], dataset["test"]

BATCH_SIZE = 10

def preprocess(image, label):
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.reshape(image, [-1])
    label = tf.one_hot(label, depth=10)
    return image, label

train_dataset = train_dataset.map(preprocess).shuffle(10000).batch(BATCH_SIZE)
test_dataset = test_dataset.map(preprocess).batch(BATCH_SIZE)

# Model parameters
input_dim = 784
hidden_dim1 = 100
hidden_dim2 = 100
output_dim = 10

# Initialize weights and biases
W1 = tf.Variable(tf.random.normal([input_dim, hidden_dim1], stddev=0.1))
b1 = tf.Variable(tf.zeros([hidden_dim1]))
W2 = tf.Variable(tf.random.normal([hidden_dim1, hidden_dim2], stddev=0.1))
b2 = tf.Variable(tf.zeros([hidden_dim2]))
W3 = tf.Variable(tf.random.normal([hidden_dim2, output_dim], stddev=0.1))
b3 = tf.Variable(tf.zeros([output_dim]))

# Forward pass
def model(x):
    hidden_layer1 = tf.nn.relu(tf.matmul(x, W1) + b1)
    hidden_layer2 = tf.nn.relu(tf.matmul(hidden_layer1, W2) + b2)
    logits = tf.matmul(hidden_layer2, W3) + b3
    return logits

# Loss function
def compute_loss(logits, labels):
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))

# Accuracy function
def compute_accuracy(dataset):
    correct_preds, total_samples = 0, 0
    for images, labels in dataset:
        logits = model(images)
        correct_preds += tf.reduce_sum(
            tf.cast(tf.equal(tf.argmax(logits, axis=1), tf.argmax(labels, axis=1)), tf.float32)
        ).numpy()
        total_samples += images.shape[0]
    return correct_preds / total_samples

# Optimizer
optimizer = tf.optimizers.SGD(learning_rate=1.0, momentum=0.9)

# Training step
def train_step(images, labels):
    with tf.GradientTape() as tape:
        logits = model(images)
        loss = compute_loss(logits, labels)
    gradients = tape.gradient(loss, [W1, b1, W2, b2, W3, b3])
    optimizer.apply_gradients(zip(gradients, [W1, b1, W2, b2, W3, b3]))
    return loss

# Training loop
epochs = 50
for epoch in range(epochs):
    total_loss = 0.0
    for images, labels in train_dataset:
        loss = train_step(images, labels)
        total_loss += loss.numpy()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

# Evaluation
train_accuracy = compute_accuracy(train_dataset)
test_accuracy = compute_accuracy(test_dataset)

print(f"Final Training Accuracy: {train_accuracy:.4f}")
print(f"Final Test Accuracy: {test_accuracy:.4f}")


Epoch 1, Loss: 15323.1669
Epoch 2, Loss: 15234.9803
Epoch 3, Loss: 15468.4656
Epoch 4, Loss: 15249.9448
Epoch 5, Loss: 15296.8790
Epoch 6, Loss: 15310.8297
Epoch 7, Loss: 15340.0094
Epoch 8, Loss: 15282.5170
Epoch 9, Loss: 15314.9356
Epoch 10, Loss: 15287.7777
Epoch 11, Loss: 15294.5677
Epoch 12, Loss: 15284.1191
Epoch 13, Loss: 15289.7431
Epoch 14, Loss: 15322.3182
Epoch 15, Loss: 15294.1694
Epoch 16, Loss: 15268.8878
Epoch 17, Loss: 15317.5412
Epoch 18, Loss: 15266.9840
Epoch 19, Loss: 15318.1845
Epoch 20, Loss: 15298.5966
Epoch 21, Loss: 15294.5437
Epoch 22, Loss: 15307.0999
Epoch 23, Loss: 15267.2593
Epoch 24, Loss: 15300.7331
Epoch 25, Loss: 15249.0122
Epoch 26, Loss: 15260.6246
Epoch 27, Loss: 15329.3653
Epoch 28, Loss: 15272.5815
Epoch 29, Loss: 15304.8840
Epoch 30, Loss: 15296.4084
Epoch 31, Loss: 15303.9543
Epoch 32, Loss: 15275.1298
Epoch 33, Loss: 15326.1797
Epoch 34, Loss: 15278.3619
Epoch 35, Loss: 15317.1415
Epoch 36, Loss: 15290.5596
Epoch 37, Loss: 15328.3029
Epoch 38, 