## Imports

In [178]:
import torch

In [179]:
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim

## Create Fully Connected Network

In [180]:
class NN(nn.Module):
    def __init__(self, input_size, num_classes): ## 28*28 image size
        super(NN,self).__init__()
        self.fc1 = nn.Linear(input_size, 50)
        self.fc2 = nn.Linear(50, num_classes)
        
    def forward(self,x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
        

In [181]:
import torch
import torch.nn as nn

class CNN(nn.Module):
    def __init__(self, input_channels=1, num_classes=10):
        super(CNN, self).__init__()

        # Define the convolutional layers
        self.convolution = nn.Sequential(
            nn.Conv2d(in_channels=input_channels, out_channels=8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2)),
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        # Define the classifier (fully connected layers)
        self.classifier = nn.Sequential(
            nn.Linear(16 * 7 * 7, num_classes),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.convolution(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor for the fully connected layer
        x = self.classifier(x)
        return x

# Example usage
x = torch.rand(64, 1, 28, 28)  # 64 images, 1 channel, 28x28 pixels
model = CNN()
output = model(x)

In [182]:
output.shape

torch.Size([64, 10])

In [183]:
modelx = NN(784,10)
x = torch.rand(64,784)
print(modelx(x).shape)


torch.Size([64, 10])


## set device

In [184]:
device = torch.device('mps')
# device = ('mps' if torch.mps.is_available() else "cpu")

## Hyperparameters

In [185]:
input_channels = 1
input_size = 784
num_classes = 10
lr = 0.001
epochs = 5
batch_size = 64

In [186]:
train_dataset = datasets.MNIST(root = 'datasets/', train = True, transform = transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset = train_dataset, batch_size=batch_size, shuffle=True)

In [187]:
test_dataset = datasets.MNIST(root = 'datasets/', train = False, transform = transforms.ToTensor(), download=True)
test_loader = DataLoader(dataset = test_dataset, batch_size=batch_size, shuffle=True)

## Initialize Network

In [188]:
model = CNN().to(device)

In [189]:
model.parameters

<bound method Module.parameters of CNN(
  (convolution): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=784, out_features=10, bias=True)
    (1): Softmax(dim=1)
  )
)>

In [190]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = lr)

## Train Network

In [191]:
for epoch in range(epochs):
    for idx ,(data,target) in enumerate(train_loader):
        data = data.to(device=device)
        target = target.to(device=device)
        
    print(data.shape)

torch.Size([32, 1, 28, 28])
torch.Size([32, 1, 28, 28])
torch.Size([32, 1, 28, 28])
torch.Size([32, 1, 28, 28])
torch.Size([32, 1, 28, 28])


In [192]:
print(data.reshape(data.shape[0],-1))
print(data.reshape(data.shape[0],-1).shape)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='mps:0')
torch.Size([32, 784])


In [193]:
from tqdm import tqdm

In [194]:
for  epoch in range(epochs):
    num_correct = 0
    num_samples = 0
    running_loss = 0.0
    for idx,(data,target) in tqdm(enumerate(train_loader)):
        data = data.to(device = device)
        target = target.to(device=device)
        
        scores =model(data)
        loss= criterion(scores,target)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        _,predictions = scores.max(1) 
        num_correct = num_correct +  (predictions == target).sum()
        num_samples = num_samples + predictions.size(0)
            
    print(f"Got {num_correct}/{num_samples} with accuracy {float(num_correct)/float (num_samples) * 100:.2f}")
        
    running_loss += loss.item()
    epoch_loss = running_loss / len(data)
    print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")

938it [00:05, 166.72it/s]


Got 52175/60000 with accuracy 86.96
Epoch 1, Loss: 0.0459


938it [00:05, 173.73it/s]


Got 57568/60000 with accuracy 95.95
Epoch 2, Loss: 0.0474


938it [00:05, 171.66it/s]


Got 58165/60000 with accuracy 96.94
Epoch 3, Loss: 0.0457


938it [00:05, 171.11it/s]


Got 58452/60000 with accuracy 97.42
Epoch 4, Loss: 0.0466


938it [00:05, 166.82it/s]

Got 58670/60000 with accuracy 97.78
Epoch 5, Loss: 0.0497





In [202]:
from torchmetrics import Accuracy

In [207]:
for epoch in range(epochs):
    loop = tqdm(enumerate(train_loader), total=len(train_loader))
    for idx,(data, targets) in loop:
        data = data.to(device = device)
        targets = targets.to(device = device)
        
        # Forward
        scores = model(data)
        loss = criterion(scores,targets)
        
        #Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        #Update progress bar
        # Specify the task type explicitly, e.g., "multiclass" or "binary"
        accuracy = Accuracy(task="multiclass", num_classes=num_classes).to(device=device)  # Adjust `task` and `num_classes`

        # Compute accuracy for your scores and targets
        acc_value = accuracy(scores, targets)

        # Update the progress bar
        loop.set_postfix(loss=loss.item(), accuracy=acc_value.item())
        loop.set_description(f"Epoch : [{epoch}/{epochs}]")


Epoch : [0/5]: 100%|██████████| 938/938 [00:13<00:00, 71.82it/s, accuracy=0.938, loss=1.51]
Epoch : [1/5]: 100%|██████████| 938/938 [00:12<00:00, 72.84it/s, accuracy=0.969, loss=1.48]
Epoch : [2/5]: 100%|██████████| 938/938 [00:12<00:00, 73.36it/s, accuracy=1, loss=1.46]    
Epoch : [3/5]: 100%|██████████| 938/938 [00:12<00:00, 73.82it/s, accuracy=1, loss=1.46]    
Epoch : [4/5]: 100%|██████████| 938/938 [00:13<00:00, 72.02it/s, accuracy=1, loss=1.47]    


In [208]:
len(data)

32

In [209]:
print((model.classifier[0].weight).shape)

torch.Size([10, 784])


## Check Accuracy

In [210]:
def check_accuracy(loader,model):
    num_correct = 0
    num_samples = 0
    
    model.eval()
    
    with torch.no_grad():
        for x,y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            # x = x.reshape(x.shape[0],-1)
            scores = model(x)
            _,predictions = scores.max(1) 
            num_correct = num_correct +  (predictions == y).sum()
            num_samples = num_samples + predictions.size(0)
            
        print(f"Got {num_correct}/{num_samples} with accuracy {float(num_correct)/float (num_samples) * 100:.2f}")
    

In [211]:
predictions.size()

torch.Size([32])

In [212]:
check_accuracy(train_loader, model)

Got 59281/60000 with accuracy 98.80


In [213]:
check_accuracy(test_loader, model)

Got 9836/10000 with accuracy 98.36


In [201]:
scores.shape

torch.Size([32, 10])