In [None]:
import torch

from PIL import Image

from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor

import torch.nn as nn
import torch.functional as F
import torch.optim as optim

In [13]:
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

print(device)

cpu


In [2]:
train_data = datasets.MNIST(
    root='data',
    train = True,
    transform=ToTensor(),
    download=True
)

test_data = datasets.MNIST(
    root='data',
    train = False,
    transform=ToTensor(),
    download=True
)

In [7]:
test_data.data.shape

torch.Size([10000, 28, 28])

#### Data Loader

In [None]:
data_loader = {
    'train': DataLoader('train_data', batch_size = 100, shuffle=True, num_workers=1),
    'test': DataLoader('test_data', batch_size = 100, shuffle=True, num_workers=1)
}

## <center>CNN Class</center>

In [None]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        # MNIST: goes from 28x28 and since kernel is 3, px size decrease by 2
        # So, use padding to keep features on edges
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1) 
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)


        self.pool = nn.MaxPool2d(2, 2) # Reduce size by 2x
        self.dropout = nn.Dropout2d(0.25) # Disable 1/4 neurons for less overfitting

        self.fc1 = nn.Linear(64 * 3 * 3, 128)
        self.fc2 = nn.Linear(128, 10)  # 10 classes for MNIST digits


    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # -> 16 x 14 x 14
        x = self.pool(F.relu(self.conv2(x)))  # -> 32 x 7 x 7
        x = self.pool(F.relu(self.conv3(x)))  # -> 64 x 3 x 3

        x = self.dropout(x)
        x = x.view(x.size(0), -1) #Flatten

        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return x
    

### Creating the Actual Model & Training Loop

In [20]:
model = Net().to(device)
optimizer = optim.Adam(model.parameters(), lr = 0.001)
loss_func = nn.CrossEntropyLoss() # Has softmax built in, no need to use softmax before CEL

# ------TRAINING LOOP------- :D
def train(num_epochs):

    model.train()

    for epoch in range(num_epochs):
        total_loss = 0

        for batch_idx, (data, target) in enumerate(data_loader['train']):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)

            # CROSS ENTROPY LOSS ^^^
            loss = loss_func(output, target)
            
            loss.backward()
            optimizer.step()

            total_loss+=loss.item()

            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss:.4f}")


#### <center>TRAINING THE MODEL</center>

In [None]:
train(num_epochs=10)

#### Save the trained modle

In [None]:
torch.save(model.state_dict(), 'model.pth')