In [2]:
import torch
from torchvision import datasets , transforms

In [3]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [4]:
train_data = datasets.FashionMNIST(
    root='./data',
    train=True,
    transform=transform,
    download=True
)
test_data = datasets.FashionMNIST(
    root='./data',
    train=False,
    transform=transform,
    download=True
)

100%|██████████| 26.4M/26.4M [00:01<00:00, 18.2MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 268kB/s]
100%|██████████| 4.42M/4.42M [00:00<00:00, 5.06MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 30.6MB/s]


In [5]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_data , batch_size = 64 , shuffle = True )
test_dataloader = DataLoader(test_data  , shuffle = False  , batch_size = 64)

In [6]:
# image , label = (next(iter(train_dataloader)))
images, labels = next(iter(train_dataloader))
print(images.shape)   # torch.Size([32, 1, 28, 28])
print(labels.shape)

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [7]:
from torch import nn
class ANN(nn.Module) :
  def __init__(self , input_size ):

    super(ANN , self).__init__()

    self.model = nn.Sequential(

      nn.Linear(in_features=input_size , out_features=64) ,
      nn.BatchNorm1d(64) ,
      nn.ReLU() ,
      nn.Dropout(p = 0.3) ,


      nn.Linear(in_features=64 , out_features=128) ,
      nn.BatchNorm1d(128) ,
      nn.ReLU() ,
      nn.Dropout(p = 0.3) ,

      nn.Linear(in_features=128 , out_features=64) ,
      nn.BatchNorm1d(64) ,
      nn.ReLU() ,
      nn.Dropout(p = 0.3) ,

      nn.Linear(in_features=64 , out_features=10)

    )

  def forward(self , x) :

    x = x.view(x.size(0), -1)   # flatten from [B, 1, 28, 28] → [B, 784]
    return self.model(x)



In [8]:
model = ANN(input_size= 28 * 28 )

In [9]:
import torch
epochs = 10
lr = 0.01
loss_fn = nn.CrossEntropyLoss()
Adam_optim = torch.optim.Adam(model.parameters() , lr = lr )
SGD_optim = torch.optim.SGD(model.parameters() , lr = lr )
RMSProp_optim = torch.optim.RMSprop(model.parameters() , lr = lr )

In [12]:
print("TRAINING WITH ADAM OPTIMIZER")
for epoch in range(epochs):
    model.train()
    Adam_train_loss = 0.0
    Adam_test_loss = 0.0
    for X_batch, y_batch in train_dataloader:
        outputs = model(X_batch)
        loss = loss_fn(outputs, y_batch)
        Adam_optim.zero_grad()
        loss.backward()
        Adam_optim.step()
        Adam_train_loss += loss.item()

    model.eval()
    with torch.inference_mode():
        for X_batch, y_batch in test_dataloader:
            outputs = model(X_batch)
            loss = loss_fn(outputs, y_batch)
            Adam_test_loss += loss.item()

    avg_train_loss = Adam_train_loss / len(train_dataloader)
    avg_test_loss = Adam_test_loss / len(test_dataloader)
    print(f"Epoch: {epoch} , Train_loss: {avg_train_loss:.4f} , Test_loss: {avg_test_loss:.4f}")



print("\nTRAINING WITH RMSprop OPTIMIZER")
for epoch in range(epochs):
    model.train()
    RMS_train_loss = 0.0
    RMS_test_loss = 0.0
    for X_batch, y_batch in train_dataloader:
        outputs = model(X_batch)
        loss = loss_fn(outputs, y_batch)
        RMSProp_optim.zero_grad()
        loss.backward()
        RMSProp_optim.step()
        RMS_train_loss += loss.item()

    model.eval()
    with torch.inference_mode():
        for X_batch, y_batch in test_dataloader:
            outputs = model(X_batch)
            loss = loss_fn(outputs, y_batch)
            RMS_test_loss += loss.item()

    avg_train_loss = RMS_train_loss / len(train_dataloader)
    avg_test_loss = RMS_test_loss / len(test_dataloader)
    print(f"Epoch: {epoch} , Train_loss: {avg_train_loss:.4f} , Test_loss: {avg_test_loss:.4f}")



print("\nTRAINING WITH SGD OPTIMIZER")
for epoch in range(epochs):
    model.train()
    SGD_train_loss = 0.0
    SGD_test_loss = 0.0
    for X_batch, y_batch in train_dataloader:
        outputs = model(X_batch)
        loss = loss_fn(outputs, y_batch)
        SGD_optim.zero_grad()
        loss.backward()
        SGD_optim.step()
        SGD_train_loss += loss.item()

    model.eval()
    with torch.inference_mode():
        for X_batch, y_batch in test_dataloader:
            outputs = model(X_batch)
            loss = loss_fn(outputs, y_batch)
            SGD_test_loss += loss.item()

    avg_train_loss = SGD_train_loss / len(train_dataloader)
    avg_test_loss = SGD_test_loss / len(test_dataloader)
    print(f"Epoch: {epoch} , Train_loss: {avg_train_loss:.4f} , Test_loss: {avg_test_loss:.4f}")



TRAINING WITH ADAM OPTIMIZER
Epoch: 0 , Train_loss: 0.4121 , Test_loss: 0.3644
Epoch: 1 , Train_loss: 0.4075 , Test_loss: 0.3696
Epoch: 2 , Train_loss: 0.4066 , Test_loss: 0.3541
Epoch: 3 , Train_loss: 0.3941 , Test_loss: 0.3590
Epoch: 4 , Train_loss: 0.3944 , Test_loss: 0.3609
Epoch: 5 , Train_loss: 0.3867 , Test_loss: 0.3541
Epoch: 6 , Train_loss: 0.3813 , Test_loss: 0.3515
Epoch: 7 , Train_loss: 0.3829 , Test_loss: 0.3380
Epoch: 8 , Train_loss: 0.3754 , Test_loss: 0.3434
Epoch: 9 , Train_loss: 0.3715 , Test_loss: 0.3446

TRAINING WITH RMSprop OPTIMIZER
Epoch: 0 , Train_loss: 0.3836 , Test_loss: 0.3490
Epoch: 1 , Train_loss: 0.3723 , Test_loss: 0.3447
Epoch: 2 , Train_loss: 0.3659 , Test_loss: 0.3418
Epoch: 3 , Train_loss: 0.3631 , Test_loss: 0.3380
Epoch: 4 , Train_loss: 0.3609 , Test_loss: 0.3401
Epoch: 5 , Train_loss: 0.3586 , Test_loss: 0.3406
Epoch: 6 , Train_loss: 0.3593 , Test_loss: 0.3378
Epoch: 7 , Train_loss: 0.3538 , Test_loss: 0.3376
Epoch: 8 , Train_loss: 0.3535 , Test_l

* `SGD` performed the best overall — its test loss is lowest and stable by the end.
* `Adam` showed fastest convergence, but plateaued early around 0.34–0.35 test loss.
* `RMSprop` sits between Adam and SGD — moderate speed and stability.