In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_set = torchvision.datasets.MNIST(root='./data', train = True, download=True, transform=transform)
test_set = torchvision.datasets.MNIST(root='./data', train = False, download=True, transform=transform)

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=1000, shuffle=False)

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv_block = nn.Sequential(
            nn.Conv2d(1,32,kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32,64,kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            )

        self.fc_block = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 128), #fully connected layer
            nn.ReLU(),
            nn.Linear(128,10), # 10 beacause there is 10 class in dataset
         )

    def forward(self, x):
        x = self.conv_block(x)
        x = self.fc_block(x)

        return x
            

conb_block means convolution operation, ReLU, maxpooling

In [None]:
model = CNN().to(device)
print(sum(p.numel() for p in model.parameters() if p.requires_grad))
criterion = nn.CrossEntropyLoss() # Calculating the loss
optimizer = optim.Adam(model.parameters(), lr=0.001) # Gradient descent and learning rate

In [None]:
for epoch in range(5):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in train_loader:
        #inputs, labels = inputs.to(device), labels.to(device) -> Needed for GPU, for CPU no need
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f"Epoch [{epoch+1}/5], "
              f"Loss: {running_loss/len(train_loader):.4f}, "
              f"Train Accuracy: {100 * correct / total:.2f}%")
    

In [None]:
# Evaluation on Test Set
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")