implementing a binary logistic regression model using TensorFlow for classifying handwritten digits from the MNIST dataset focusing on the digit "1" versus others and this model includes multiple hidden layers (ReLU activations) and uses the Adam optimizer for updating weights during training and evaluating the model's accuracy after each epoch and on the test set.

CUSTOM ANN CODE ON MNIST DATASET

In [1]:
import tensorflow as tf
mnist = tf.keras.datasets.mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

import numpy as np
class LogisticRegression:
    def __init__(self, input_dim, h1, h2, h3, learning_rate=0.01):
        self.w1 = tf.Variable(tf.random.normal([input_dim, h1], stddev=0.1, dtype=tf.float32))
        self.w2 = tf.Variable(tf.random.normal([h1, h2], stddev=0.1, dtype=tf.float32))
        self.w3 = tf.Variable(tf.random.normal([h2, h3], stddev=0.1, dtype=tf.float32))
        self.w4 = tf.Variable(tf.random.normal([h3, 1], stddev=0.1, dtype=tf.float32))

        self.b1 = tf.Variable(tf.zeros([h1]))
        self.b2 = tf.Variable(tf.zeros([h2]))
        self.b3 = tf.Variable(tf.zeros([h3]))
        self.b4 = tf.Variable(tf.zeros([1]))

        self.learning_rate = learning_rate
        self.optimizer = tf.optimizers.Adam(learning_rate)

    def predict(self, x):
        hidden_output = tf.nn.relu(tf.matmul(x, self.w1) + self.b1)
        hidden_output2 = tf.nn.relu(tf.matmul(hidden_output, self.w2) + self.b2)
        hidden_output3 = tf.nn.relu(tf.matmul(hidden_output2, self.w3) + self.b3)
        logits = tf.matmul(hidden_output3, self.w4) + self.b4
        return tf.nn.sigmoid(logits)

    def loss(self, x, y):
        y_pred = self.predict(x)
        return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_pred, labels=y))

    def train(self, x, y):
        with tf.GradientTape() as tape:
            loss_value = self.loss(x, y)

        grads = tape.gradient(loss_value, [self.w1, self.w2, self.w3, self.w4, self.b1, self.b2, self.b3, self.b4])
        self.optimizer.apply_gradients(zip(grads, [self.w1, self.w2, self.w3, self.w4, self.b1, self.b2, self.b3, self.b4]))

    def evaluate(self, x, y):
        y_pred = self.predict(x)
        y_pred_class = tf.round(y_pred)
        accuracy = tf.reduce_mean(tf.cast(tf.equal(y_pred_class, y), tf.float32))
        return accuracy.numpy()

x_train_flat = x_train.reshape(-1, 28 * 28).astype(np.float32)
x_test_flat = x_test.reshape(-1, 28 * 28).astype(np.float32)

y_train_binary = (y_train == 1).astype(np.float32)
y_test_binary = (y_test == 1).astype(np.float32)

input_dim = 28 * 28
h1, h2, h3 = 128, 64, 32
model = LogisticRegression(input_dim, h1, h2, h3)

epochs = 5
for epoch in range(epochs):
    for i in range(0, len(x_train_flat), 32):
        x_batch = x_train_flat[i:i + 32]
        y_batch = y_train_binary[i:i + 32].reshape(-1, 1)
        model.train(x_batch, y_batch)
    train_accuracy = model.evaluate(x_train_flat, y_train_binary.reshape(-1, 1))
    print(f'Epoch {epoch + 1}, Training Accuracy: {train_accuracy:.4f}')

test_accuracy = model.evaluate(x_test_flat, y_test_binary.reshape(-1, 1))
print(f'Test Accuracy: {test_accuracy:.4f}')


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1, Training Accuracy: 0.8876
Epoch 2, Training Accuracy: 0.8876
Epoch 3, Training Accuracy: 0.8876
Epoch 4, Training Accuracy: 0.8876
Epoch 5, Training Accuracy: 0.8876
Test Accuracy: 0.8865


USING KERAS

In [2]:
import tensorflow as tf
import numpy as np

mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

x_train_flat = x_train.reshape(-1, 28 * 28).astype(np.float32)
x_test_flat = x_test.reshape(-1, 28 * 28).astype(np.float32)
y_train_binary = (y_train == 1).astype(np.float32)
y_test_binary = (y_test == 1).astype(np.float32)

model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(28 * 28,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

epochs = 10
model.fit(x_train_flat, y_train_binary, epochs=epochs, batch_size=32)

test_loss, test_accuracy = model.evaluate(x_test_flat, y_test_binary)
print(f'Test Accuracy: {test_accuracy:.4f}')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9823 - loss: 0.0478
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9960 - loss: 0.0115
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9978 - loss: 0.0075
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9983 - loss: 0.0051
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9987 - loss: 0.0040
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.9986 - loss: 0.0035
Epoch 7/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9985 - loss: 0.0037
Epoch 8/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.9993 - loss: 0.0018
Epoch 9/10
[1m1875/1