In [1]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn as nn
import torch.optim as optim


In [2]:
transform=transforms.ToTensor()

In [3]:
train_data=datasets.MNIST(root='./data',download=True,transform=transform)
test_data=datasets.MNIST(root='./data',download=True,transform=transform)

In [4]:
train_loader=DataLoader(train_data,shuffle=True,batch_size=256)
test_loader=DataLoader(test_data,shuffle=True,batch_size=256)

In [5]:
images,labels=next(iter(train_loader))

In [6]:
images.shape,labels.shape

(torch.Size([256, 1, 28, 28]), torch.Size([256]))

In [16]:
model=nn.Sequential(
    nn.Flatten(),
    nn.Linear(784,128),
    nn.ReLU(),
    nn.Linear(128,10)
)

In [17]:
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=0.001)

In [18]:
device="cuda" if torch.cuda.is_available() else "cpu"

In [19]:
device

'cuda'

In [20]:
model.to(device)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=128, bias=True)
  (2): ReLU()
  (3): Linear(in_features=128, out_features=10, bias=True)
)

In [12]:
device

'cuda'

In [61]:
for epochs in range(12):
    model.train()
    for images,labels in train_loader:
        images,labels=images.to(device),labels.to(device)
        optimizer.zero_grad()
        outputs=model(images)
        loss=criterion(outputs,labels)
        loss.backward()
        optimizer.step()
    
    model.eval()
    correct,total=0,0
    with torch.no_grad():
        for images,labels in test_loader:
            images,labels=images.to(device),labels.to(device)
            outputs=model(images)
            _,predicted=torch.max(outputs,1)
            total+=labels.size(0)
            correct+=(predicted==labels).sum().item()
    accuracy=correct/total
    print("->For Epoch ",epochs+1)
    print(f"         Total accuracy is :{accuracy}")

->For Epoch  1
         Total accuracy is :0.9934166666666666


KeyboardInterrupt: 

In [53]:
images,labels=next(iter(train_loader))
images[0]

tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,

In [65]:
cnn=nn.Sequential(
    nn.Conv2d(1,32,3),#1X28x28->26x26x32
    nn.ReLU(),
    nn.MaxPool2d(2,2),#13x13X32
#since you arent specifying stride or padding the formulae is input size-kernel size+1
    nn.Conv2d(32,64,3),#11x11X64
    nn.ReLU(),
    nn.MaxPool2d(2,2),#5x5X64
    nn.Flatten(),
    nn.Linear(64*5*5,128),
    nn.ReLU(),
    nn.Linear(128,10)
)
cnn=cnn.to(device)

In [None]:
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(cnn.parameters(),lr=0.001)# error bcoz you chose the wrong parameters

In [48]:
device

'cuda'

In [67]:
for epochs in range(12):
    cnn.train()
    for images,labels in train_loader:
        images,labels=images.to(device),labels.to(device)
        optimizer.zero_grad()
        outputs=cnn(images)
        loss=criterion(outputs,labels)
        loss.backward()
        optimizer.step()
    
    cnn.eval()
    correct,total=0,0
    with torch.no_grad():
        for images,labels in test_loader:
            images,labels=images.to(device),labels.to(device)
            outputs=cnn(images)
            _,predicted=torch.max(outputs,1)
            total+=labels.size(0)
            correct+=(predicted==labels).sum().item()
    accuracy=correct/total
    print("->For Epoch ",epochs+1)
    print(f"         Total accuracy is :{accuracy}")

->For Epoch  1
         Total accuracy is :0.9709666666666666
->For Epoch  2
         Total accuracy is :0.9847
->For Epoch  3
         Total accuracy is :0.9855166666666667
->For Epoch  4
         Total accuracy is :0.9892666666666666
->For Epoch  5
         Total accuracy is :0.9923833333333333
->For Epoch  6
         Total accuracy is :0.9934833333333334
->For Epoch  7
         Total accuracy is :0.9935666666666667
->For Epoch  8
         Total accuracy is :0.9958
->For Epoch  9
         Total accuracy is :0.9917
->For Epoch  10
         Total accuracy is :0.9962333333333333
->For Epoch  11
         Total accuracy is :0.9966166666666667
->For Epoch  12
         Total accuracy is :0.9979166666666667
