In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [2]:
train_dataset=datasets.FashionMNIST(
    './data',train=True,download=False,transform=ToTensor()
)
test_dataset=datasets.FashionMNIST(
    './data',train=False,download=False,transform=ToTensor()
)

In [3]:
batch_size=64
train_loader=DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
test_loader=DataLoader(test_dataset,batch_size=batch_size,shuffle=False)

In [4]:
for X, y in train_loader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


In [5]:
x=[1,2,3,4]
ans=[i**2 if i%2==0 else i for i in x]
ans

[1, 4, 3, 16]

In [6]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

print(f"Using {device} device")

Using cpu device


In [7]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten=nn.Flatten()
        self.linear_stack=nn.Sequential(
            nn.Linear(28*28,512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,10)
        )
    def forward(self,x):
        x=self.flatten(x)#->[64,28*28]
        x=self.linear_stack(x)#->[64,10]
        return x

In [8]:
nn_model = Model().to(device)
print(nn_model)


Model(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [9]:
nn_model.parameters

<bound method Module.parameters of Model(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)>

In [10]:
criterion=nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(nn_model.parameters(),lr=0.003)

In [11]:
for batch,(data,target) in enumerate(train_loader):
    X, y = data.to(device), target.to(device)
    if batch % 100 == 0:
        print(batch,(batch + 1) * len(X),len(X))

0 64 64
100 6464 64
200 12864 64
300 19264 64
400 25664 64
500 32064 64
600 38464 64
700 44864 64
800 51264 64
900 57664 64


In [12]:
def training_loop(model,train_loader):
    size=len(train_loader.dataset)
    model.train()
    running_loss=0
    for batch,(data,target) in enumerate(train_loader):
        X, y = data.to(device), target.to(device)
        optimizer.zero_grad()
        output=model(data)
        loss=criterion(output,y)
        running_loss=loss
        
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss = loss.item()
            running_loss+=loss
            print(f"loss: {running_loss/100:>7f}")

In [13]:
def evaluation(model,test_loader):
    model.eval()
    num_batches=len(test_loader)
    size=len(test_loader.dataset)
    test_loss, correct = 0, 0
    with torch.no_grad():
        for data,tar in test_loader:
            img,tar=data.to(device),tar.to(device)
            output=nn_model(img)

            correct+=(output.argmax(1)==tar).type(torch.float).sum().item()
            test_loss+=criterion(output,tar).item()

    correct/=size
    test_loss/=num_batches
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")


In [14]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    training_loop(nn_model,train_loader)
    evaluation(nn_model,test_loader)
print("Done!")

Epoch 1
-------------------------------
loss: 0.046302
loss: 0.045263
loss: 0.044385
loss: 0.043640
loss: 0.042709
loss: 0.041282
loss: 0.039390
loss: 0.036677
loss: 0.036076
loss: 0.032331
Test Error: 
 Accuracy: 59.4%, Avg loss: 1.579214 

Epoch 2
-------------------------------
loss: 0.030645
loss: 0.029764
loss: 0.029112
loss: 0.026650
loss: 0.024622
loss: 0.024720
loss: 0.024380
loss: 0.022289
loss: 0.021303
loss: 0.021664
Test Error: 
 Accuracy: 66.2%, Avg loss: 1.001348 

Epoch 3
-------------------------------
loss: 0.018482
loss: 0.016219
loss: 0.019030
loss: 0.016446
loss: 0.020312
loss: 0.018464
loss: 0.014621
loss: 0.016406
loss: 0.015059
loss: 0.017541
Test Error: 
 Accuracy: 68.3%, Avg loss: 0.828362 

Epoch 4
-------------------------------
loss: 0.017061
loss: 0.015882
loss: 0.014189
loss: 0.016206
loss: 0.016012
loss: 0.014179
loss: 0.017628
loss: 0.014901
loss: 0.013993
loss: 0.014891
Test Error: 
 Accuracy: 73.2%, Avg loss: 0.743117 

Epoch 5
------------------------

In [17]:
torch.save(nn_model.state_dict(),"basic_model.pth")
print("Saved PyTorch Model State to basic_model.pth")

Saved PyTorch Model State to basic_model.pth


In [19]:
nn_model_new=Model().to(device)
nn_model_new.load_state_dict(torch.load("basic_model.pth"))

<All keys matched successfully>

In [20]:
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]


In [23]:
test_dataset[0][0].shape,test_dataset[0][1].shape

torch.Size([1, 28, 28])

In [None]:
x_new,y_new=test_loader[0][0],test_dataset[0][1]

In [25]:
nn_model_new.eval()
x, y = test_dataset[0][0], test_dataset[0][1]
with torch.no_grad():
    x = x.to(device)
    pred = nn_model_new(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Predicted: "Ankle boot", Actual: "Ankle boot"
