<a href="https://colab.research.google.com/github/Redcoder815/Deep_Learning_TensorFlow/blob/main/05MultilayerPerceptron.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf

In [2]:
def relu(X):
  return tf.math.maximum(X, 0)

In [3]:
class MultilayerPerceptron(tf.keras.Model):
  def __init__(self, num_inputs, num_outputs, num_hiddens, sigma = 0.01):
    super().__init__()
    self.num_inputs = num_inputs
    self.w1 = self.add_weight(shape=(num_inputs, num_hiddens), initializer = tf.random_normal_initializer(stddev=sigma), trainable=True)
    self.b1 = self.add_weight(shape=(num_hiddens,), initializer = tf.zeros_initializer(), trainable = True)
    self.w2 = self.add_weight(shape=(num_hiddens, num_outputs), initializer = tf.random_normal_initializer(stddev=sigma), trainable = True)
    self.b2 = self.add_weight(shape=(num_outputs,), initializer = tf.zeros_initializer(), trainable = True)

  def call(self, X):
    X = tf.reshape(X, (-1, self.num_inputs))
    H = relu(tf.matmul(X, self.w1) + self.b1)
    return tf.matmul(H, self.w2) + self.b2

In [4]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

# Preprocess data
# Reshape images to (num_samples, 784) and normalize to [0, 1]
X_train = tf.cast(tf.reshape(X_train, (-1, 784)), tf.float32) / 255.0
X_test = tf.cast(tf.reshape(X_test, (-1, 784)), tf.float32) / 255.0

# Cast labels to int64 as expected by some TF operations
y_train = tf.cast(y_train, tf.int64)
y_test = tf.cast(y_test, tf.int64)

# Hyperparameters (from original notebook context)
batch_size = 256
lr = 0.1
num_epochs = 10

# Create tf.data.Dataset objects for efficient data pipeline
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(buffer_size=1024).batch(batch_size)
val_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size)


In [5]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.SGD(learning_rate=lr)

In [6]:
model = MultilayerPerceptron(num_inputs=784, num_outputs=10, num_hiddens=256)

In [7]:
model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

In [8]:
history = model.fit(train_dataset, epochs=num_epochs, validation_data=val_dataset)

Epoch 1/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.5288 - loss: 1.4644 - val_accuracy: 0.7329 - val_loss: 0.7131
Epoch 2/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.7762 - loss: 0.6299 - val_accuracy: 0.7853 - val_loss: 0.6013
Epoch 3/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.8160 - loss: 0.5291 - val_accuracy: 0.8025 - val_loss: 0.5542
Epoch 4/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.8313 - loss: 0.4810 - val_accuracy: 0.8262 - val_loss: 0.4942
Epoch 5/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.8394 - loss: 0.4559 - val_accuracy: 0.8334 - val_loss: 0.4743
Epoch 6/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.8476 - loss: 0.4360 - val_accuracy: 0.8253 - val_loss: 0.4942
Epoch 7/10
[1m235/235[0m

In [9]:
val_loss, val_acc = model.evaluate(val_dataset, verbose=0)
print(f"Final validation accuracy: {val_acc:.4f}")

Final validation accuracy: 0.8482


In [10]:
class MLP(tf.keras.Model):
    def __init__(self, num_outputs, num_hiddens):
        super().__init__()
        self.net = tf.keras.models.Sequential([
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(num_hiddens, activation='relu'),
            tf.keras.layers.Dense(num_outputs)])

    def call(self, X):
        # X = tf.reshape(X, (-1, self.num_inputs)) # if I don't use Flatten()
        return self.net(X)

In [11]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.SGD(learning_rate=lr)

In [12]:
model_sequential = MLP(num_outputs=10, num_hiddens=256)

In [13]:
model_sequential.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

In [14]:
history = model_sequential.fit(train_dataset, epochs=num_epochs, validation_data=val_dataset)

Epoch 1/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.6814 - loss: 0.9638 - val_accuracy: 0.7626 - val_loss: 0.6541
Epoch 2/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.8147 - loss: 0.5343 - val_accuracy: 0.8170 - val_loss: 0.5336
Epoch 3/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.8355 - loss: 0.4722 - val_accuracy: 0.8405 - val_loss: 0.4597
Epoch 4/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.8476 - loss: 0.4347 - val_accuracy: 0.8341 - val_loss: 0.4680
Epoch 5/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.8537 - loss: 0.4185 - val_accuracy: 0.8431 - val_loss: 0.4445
Epoch 6/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.8588 - loss: 0.3980 - val_accuracy: 0.8459 - val_loss: 0.4313
Epoch 7/10
[1m235/235[0m 

In [15]:
val_loss, val_acc = model_sequential.evaluate(val_dataset, verbose=0)
print(f"Final validation accuracy: {val_acc:.4f}")

Final validation accuracy: 0.8518
