In [71]:
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import layers, models, losses, optimizers

# Load dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

# Normalize
x_train = tf.cast(x_train, tf.float32) / 255.0
x_test = tf.cast(x_test, tf.float32) / 255.0

# Flatten
x_train = tf.reshape(x_train, (-1, 784))
x_test = tf.reshape(x_test, (-1, 784))

# Convert labels
y_train = tf.cast(y_train, tf.int64)
y_test = tf.cast(y_test, tf.int64)


In [2]:
images = tf.cast(x_train, tf.float32) / 255.0
images = tf.expand_dims(images, axis=1)
images.shape

TensorShape([60000, 1, 784])

In [3]:
batch_size = 32
batch_images = images[:batch_size]

print(batch_images.shape)

(32, 1, 784)


In [4]:
flattened_images = tf.reshape(batch_images, (batch_size, -1))

print( flattened_images.shape)


(32, 784)


In [5]:
input_features = 784
output_neurons = 10  # Fashion-MNIST has 10 classes

W = tf.Variable(tf.random.normal((output_neurons, input_features)))
B = tf.Variable(tf.random.normal((output_neurons,)))

In [6]:
Y = tf.matmul(flattened_images, tf.transpose(W)) + B
Y.shape

TensorShape([32, 10])

In [7]:
with tf.GradientTape() as tape:
    
    Y = tf.matmul(flattened_images, tf.transpose(W)) + B
    
    # Dummy loss
    loss = tf.reduce_mean(Y)

# Compute gradients
grad_W, grad_B = tape.gradient(loss, [W, B])

print("Gradient Shape (W):", grad_W.shape)
print("Gradient Shape (B):", grad_B.shape)


Gradient Shape (W): (10, 784)
Gradient Shape (B): (10,)


In [62]:
import tensorflow as tf

class SimpleANN(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        
        self.fc1 = tf.keras.layers.Dense(256, activation='relu')
        self.fc2 = tf.keras.layers.Dense(128, activation='relu')
        self.dropout = tf.keras.layers.Dropout(0.5)
        self.out = tf.keras.layers.Dense(10)  # logits
    
    def call(self, x, training=False):
        
        x = self.fc1(x)
        x = self.fc2(x)
        
        return self.out(x)


In [9]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model = SimpleANN()

learning_rate = 0.001
epochs = 5
batch_size = 32


In [12]:
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
dataset = dataset.shuffle(60000).batch(batch_size)

for epoch in range(epochs):
    
    total_loss = 0
    batches = 0
    
    for X_batch, y_batch in dataset:
        
        with tf.GradientTape() as tape:
            
            # Use model() instead of model.call()
            predictions = model(X_batch, training=True)
            
            loss = loss_fn(y_batch, predictions)
        
        # ðŸ”¥ Get ALL trainable variables automatically
        grads = tape.gradient(loss, model.trainable_variables)
        
        # ðŸ”¥ Manual Gradient Descent
        for param, grad in zip(model.trainable_variables, grads):
            param.assign_sub(learning_rate * grad)
        
        total_loss += loss.numpy()
        batches += 1
    
    print(f"Epoch {epoch+1}, Loss: {total_loss/batches:.4f}")


Epoch 1, Loss: 1.7903
Epoch 2, Loss: 1.2053
Epoch 3, Loss: 1.0087
Epoch 4, Loss: 0.9150
Epoch 5, Loss: 0.8470


In [13]:
total_loss = 0
total_correct = 0
total_samples = 0
    

predicted_classes = tf.argmax(predictions, axis=1)
        
correct = tf.equal(predicted_classes, y_batch)
total_correct += tf.reduce_sum(tf.cast(correct, tf.int32))
total_samples += y_batch.shape[0]

total_loss += loss.numpy()
epoch_accuracy = total_correct / total_samples
print(f"Accuracy: {epoch_accuracy:.4f}")

Accuracy: 0.6875


In [63]:
train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train[:100], y_train[:100])
).batch(16)

val_ds = tf.data.Dataset.from_tensor_slices((x_test[:1000],y_test[:1000])).batch(16)

In [64]:
model = SimpleANN()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)


In [65]:
EPOCHS = 30
LR = 0.01

train_acc_history = []
val_acc_history = []
train_loss_history = []
val_loss_history = []


In [66]:
def compute_accuracy(model, dataset):
    
    correct = 0
    total = 0
    
    for images, labels in dataset:
        
        logits = model(images, training=False)  # VERY important!
        preds = tf.argmax(logits, axis=1, output_type=tf.int64)
        
        correct += tf.reduce_sum(tf.cast(preds == labels, tf.int32))
        total += labels.shape[0]
    
    return (correct / total).numpy()


In [67]:
for epoch in range(EPOCHS):

    total_loss = 0
    batches = 0

    
    for images, labels in train_ds:
        
        with tf.GradientTape() as tape:
            
            logits = model(images, training=True)
            loss = loss_fn(labels, logits)

        grads = tape.gradient(loss, model.trainable_variables)

        for param, grad in zip(model.trainable_variables, grads):
            param.assign_sub(LR * grad)

        total_loss += loss.numpy()
        batches += 1

    avg_train_loss = total_loss / batches
    
    
    val_loss_total = 0
    val_batches = 0
    
    for images, labels in val_ds:
        
        logits = model(images, training=False)
        loss = loss_fn(labels, logits)
        
        val_loss_total += loss.numpy()
        val_batches += 1

    avg_val_loss = val_loss_total / val_batches 
    
    train_acc = compute_accuracy(model, train_ds)
    val_acc = compute_accuracy(model, val_ds)
    
    train_loss_history.append(avg_train_loss)
    val_loss_history.append(avg_val_loss)
    
    train_acc_history.append(train_acc)
    val_acc_history.append(val_acc)

    
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    print(f"Train Loss: {avg_train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.4f}")



Epoch 1/30
Train Loss: 2.3131 | Train Acc: 0.1500
Val Loss: 2.1634 | Val Acc: 0.1930

Epoch 2/30
Train Loss: 2.0688 | Train Acc: 0.2600
Val Loss: 2.0491 | Val Acc: 0.2440

Epoch 3/30
Train Loss: 1.9025 | Train Acc: 0.3900
Val Loss: 1.9509 | Val Acc: 0.3440

Epoch 4/30
Train Loss: 1.7695 | Train Acc: 0.5100
Val Loss: 1.8600 | Val Acc: 0.4050

Epoch 5/30
Train Loss: 1.6555 | Train Acc: 0.5400
Val Loss: 1.7772 | Val Acc: 0.4330

Epoch 6/30
Train Loss: 1.5532 | Train Acc: 0.5900
Val Loss: 1.7016 | Val Acc: 0.4790

Epoch 7/30
Train Loss: 1.4603 | Train Acc: 0.6900
Val Loss: 1.6357 | Val Acc: 0.4960

Epoch 8/30
Train Loss: 1.3794 | Train Acc: 0.7300
Val Loss: 1.5755 | Val Acc: 0.5230

Epoch 9/30
Train Loss: 1.3059 | Train Acc: 0.7500
Val Loss: 1.5214 | Val Acc: 0.5470

Epoch 10/30
Train Loss: 1.2392 | Train Acc: 0.7700
Val Loss: 1.4699 | Val Acc: 0.5700

Epoch 11/30
Train Loss: 1.1766 | Train Acc: 0.7900
Val Loss: 1.4230 | Val Acc: 0.5900

Epoch 12/30
Train Loss: 1.1212 | Train Acc: 0.8100


In [68]:
 for images, labels in train_ds:
    print(labels[:10])
    break


tf.Tensor([9 0 0 3 0 2 7 2 5 5], shape=(10,), dtype=int64)


In [69]:
class SimpleCNN(tf.keras.Model):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = layers.Conv2D(32, 3, activation='relu')
        self.pool1 = layers.MaxPooling2D()
        self.conv2 = layers.Conv2D(64, 3, activation='relu')
        self.pool2 = layers.MaxPooling2D()
        self.flatten = layers.Flatten()
        self.fc1 = layers.Dense(128, activation='relu')
        self.dropout = layers.Dropout(0.3)
        self.fc2 = layers.Dense(10)  # logits

    def call(self, x, training=False):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.dropout(x, training=training)
        return self.fc2(x)
     

In [72]:
EPOCHS = 5
LR = 0.001

model_cnn = SimpleCNN()
loss_fn = losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = optimizers.Adam(learning_rate=LR)
train_acc_history = []
val_acc_history = []
train_loss_history = []
val_loss_history = []



In [75]:
vall_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

In [76]:
for epoch in range(EPOCHS):

    total_loss = 0
    batches = 0

    
    for images, labels in dataset:
        
        with tf.GradientTape() as tape:
            
            logits = model(images, training=True)
            loss = loss_fn(labels, logits)

        grads = tape.gradient(loss, model.trainable_variables)

        for param, grad in zip(model.trainable_variables, grads):
            param.assign_sub(LR * grad)

        total_loss += loss.numpy()
        batches += 1

    avg_train_loss = total_loss / batches
    
    
    val_loss_total = 0
    val_batches = 0
    
    for images, labels in vall_ds:
        
        logits = model(images, training=False)
        loss = loss_fn(labels, logits)
        
        val_loss_total += loss.numpy()
        val_batches += 1

    avg_val_loss = val_loss_total / val_batches 
    
    train_acc = compute_accuracy(model, dataset)
    val_acc = compute_accuracy(model, vall_ds)
    
    train_loss_history.append(avg_train_loss)
    val_loss_history.append(avg_val_loss)
    
    train_acc_history.append(train_acc)
    val_acc_history.append(val_acc)

    
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    print(f"Train Loss: {avg_train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.4f}")



Epoch 1/5
Train Loss: 0.8305 | Train Acc: 0.7605
Val Loss: 0.7548 | Val Acc: 0.7497

Epoch 2/5
Train Loss: 0.6921 | Train Acc: 0.7855
Val Loss: 0.6681 | Val Acc: 0.7755

Epoch 3/5
Train Loss: 0.6259 | Train Acc: 0.8015
Val Loss: 0.6203 | Val Acc: 0.7925

Epoch 4/5
Train Loss: 0.5842 | Train Acc: 0.8133
Val Loss: 0.5858 | Val Acc: 0.8023

Epoch 5/5
Train Loss: 0.5549 | Train Acc: 0.8204
Val Loss: 0.5628 | Val Acc: 0.8102
