# Data collection and preparation, building ANNs – linear layer and non-linear activation functions.

## Imports & Configuration

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

In [None]:
torch.manual_seed(42)

<torch._C.Generator at 0x7f8d403472d0>

In [None]:
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device: ", device)

Using device:  cpu


## Data Collection
We use MNIST handwritten digits (28×28 grayscale images, 10 classes).

In [None]:
transform=transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.1307,),(0.3081,))])

In [None]:
train_dataset=datasets.MNIST(root="./data",
                             train=True,
                             download=True,
                             transform=transform)

100%|██████████| 9.91M/9.91M [00:00<00:00, 145MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 29.2MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 106MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.09MB/s]


In [None]:
test_dataset=datasets.MNIST(root="./data",
                            train=False,
                            download=True,
                            transform=transform)

## Data Preparation

In [None]:
train_size=int(0.9*len(train_dataset))
val_size=len(train_dataset)-train_size

In [None]:
train_dataset,val_dataset=random_split(train_dataset,[train_size,val_size])

In [None]:
train_loader=DataLoader(train_dataset,batch_size=64,shuffle=True)
val_loader=DataLoader(val_dataset,batch_size=64,shuffle=False)
test_loader=DataLoader(test_dataset,batch_size=64,shuffle=False)

## ANN Model Definition

In [None]:
class ANN(nn.Module):
  def __init__(self):
    super().__init__()
    self.flatten=nn.Flatten()
    self.fc1=nn.Linear(28*28,256)
    self.relu1=nn.ReLU()
    self.fc2=nn.Linear(256,128)
    self.relu2=nn.ReLU()
    self.fc3=nn.Linear(128,64)
    self.relu3=nn.ReLU()
    self.output=nn.Linear(64, 10)

  def forward(self,x):
    x=self.flatten(x)
    x=self.fc1(x)
    x=self.relu1(x)
    x=self.fc2(x)
    x=self.relu2(x)
    x=self.fc3(x)
    x=self.relu3(x)
    return self.output(x)

## Model Initialization

In [None]:
model=ANN().to(device)

In [None]:
print(model)

ANN(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=128, out_features=64, bias=True)
  (relu3): ReLU()
  (output): Linear(in_features=64, out_features=10, bias=True)
)


## Loss Function & Optimizer

In [None]:
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=0.001)

## Training Loop

In [None]:
def train_one_epoch(model,dataloader):
  model.train()
  running_loss=0.0
  correct=0
  total=0
  for images,labels in dataloader:
    images=images.to(device)
    labels=labels.to(device)
    optimizer.zero_grad()
    outputs=model(images)
    loss=criterion(outputs,labels)
    loss.backward()
    optimizer.step()
    running_loss+=loss.item()
    preds=outputs.argmax(dim=1)
    correct+=(preds==labels).sum().item()
    total+=labels.size(0)
  return running_loss/len(dataloader), correct/total

## Validation Loop

In [None]:
def evaluate(model,dataloader):
  model.eval()
  running_loss=0.0
  correct=0
  total=0
  with torch.no_grad():
    for images,labels in dataloader:
      images=images.to(device)
      labels=labels.to(device)
      outputs=model(images)
      loss=criterion(outputs,labels)
      running_loss+=loss.item()
      preds=outputs.argmax(dim=1)
      correct+=(preds==labels).sum().item()
      total+=labels.size(0)
  return running_loss/len(dataloader), correct/total

## Full Training Process

In [None]:
epochs=10
for epoch in range(epochs):
  train_loss,train_acc=train_one_epoch(model,train_loader)
  val_loss,val_acc=evaluate(model,val_loader)
  print(
      f"Epoch [{epoch+1}/{epochs}] | "
      f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
      f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}"
  )

Epoch [1/10] | Train Loss: 0.1249, Train Acc: 0.9622 | Val Loss: 0.1203, Val Acc: 0.9645
Epoch [2/10] | Train Loss: 0.0835, Train Acc: 0.9734 | Val Loss: 0.1041, Val Acc: 0.9690
Epoch [3/10] | Train Loss: 0.0641, Train Acc: 0.9796 | Val Loss: 0.0891, Val Acc: 0.9727
Epoch [4/10] | Train Loss: 0.0512, Train Acc: 0.9833 | Val Loss: 0.0879, Val Acc: 0.9747
Epoch [5/10] | Train Loss: 0.0410, Train Acc: 0.9869 | Val Loss: 0.0827, Val Acc: 0.9772
Epoch [6/10] | Train Loss: 0.0344, Train Acc: 0.9892 | Val Loss: 0.0946, Val Acc: 0.9740
Epoch [7/10] | Train Loss: 0.0318, Train Acc: 0.9895 | Val Loss: 0.1010, Val Acc: 0.9748
Epoch [8/10] | Train Loss: 0.0279, Train Acc: 0.9906 | Val Loss: 0.1025, Val Acc: 0.9758
Epoch [9/10] | Train Loss: 0.0234, Train Acc: 0.9921 | Val Loss: 0.1158, Val Acc: 0.9720
Epoch [10/10] | Train Loss: 0.0226, Train Acc: 0.9926 | Val Loss: 0.1089, Val Acc: 0.9755


## Test Evaluation

In [None]:
test_loss,test_acc=evaluate(model,test_loader)
print(f"Test Accuracy: {test_acc:.4f}")

Test Accuracy: 0.9747
