In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import pytorch_lightning as pl
from torch.utils.data import DataLoader, random_split

from typing import Dict
from torch.jit.annotations import TensorType

# To-Do's

- [] Add option to store gradient norm of each layer, stored separately
- [X] Change linear layers to: CNN + 1 linear layer
- [X] Make deep model (5 layers), and train it to perfection (up to 99% or higher train accuracy)
- [X] Save the model (we’ll call this the “ground model”) (if time, create 5 ground models)
- [] Then, create 10 models per noise level (pick 10 noise levels, between totally destroyed and basically no impact) (also loop through which layer)→ turns into 500 models. Make them noisy, measure all the things above (robustness, generalization.1, try generalization.2)
- [] Can experiment with gradcam (interesting but not most important)


# Download ``MNIST`` Dataset

Download ``MNIST`` dataset, define training (80%), validation (10%), and test (10%) data sizes, and create the dataloaders.

In [2]:
# MNIST dataset and dataloaders
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
dataset = torchvision.datasets.MNIST(root=os.getcwd(), train=True, transform=transform, download=True)

In [3]:
# Split the dataset into train, val, and test sets
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

In [4]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=os.cpu_count())
val_loader = DataLoader(val_dataset, batch_size=64, num_workers=os.cpu_count())
test_loader = DataLoader(test_dataset, batch_size=64, num_workers=os.cpu_count())

# Define Neural Network and ``LightningModule``

In [5]:
# Define the LightningModule
class ConvNet(pl.LightningModule):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.save_hyperparameters()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        # NOTE: 
        #   Try training with 32 all the way - see if model can still be 99% good
        #   Use a smaller model, smallest non-trivial model
        #   Reduce number of linear layers
        #   Start profiling (draw on piece of paper)
        #   Look for number of weights in each model
        #   Get model training up to 100% 
        # 
        # Want to avoid: did not scale step sizes, 
        # Want to find non-trivial conclusions

        
        self.fc_layer = nn.Sequential(
            # nn.Linear(256 * 3 * 3, 128),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )
        
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        # x = x.view(x.size(0), x.size(1))  # Flatten the tensor along the channel dimension
        x = self.fc_layer(x)
        return x
    
    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=0.001)
    
    # Define the training step method
    def training_step(self, batch, batch_idx):
        inputs, targets = batch
        outputs = self(inputs)
        loss = nn.CrossEntropyLoss()(outputs, targets)
        self.log('train_loss', loss)
        return loss
    
    # Define the validation step method
    def validation_step(self, batch, batch_idx):
        inputs, targets = batch
        outputs = self(inputs)
        loss = nn.CrossEntropyLoss()(outputs, targets)
        self.log('val_loss', loss, prog_bar=True)  # Logging the validation loss
    
    # Define the test step method
    def test_step(self, batch, batch_idx):
        inputs, targets = batch
        outputs = self(inputs)
        loss = nn.CrossEntropyLoss()(outputs, targets)
        self.log('test_loss', loss)  # Logging the test loss
        preds = torch.argmax(outputs, dim=1)
        acc = (preds == targets).float().mean()
        self.log('test_acc', acc, prog_bar=True)  # Logging the test accuracy

# Model Training, Validation, and Testing

In [6]:
# Initialize the Lightning Trainer
model = ConvNet()
trainer = pl.Trainer(accelerator='mps',max_epochs=10,devices=1)  # Set max_epochs and gpus according to your environment

# Train the model using PyTorch Lightning
trainer.fit(model, train_loader, val_loader)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name        | Type       | Params
-------------------------------------------
0 | conv_layers | Sequential | 387 K 
1 | fc_layer    | Sequential | 34.2 K
-------------------------------------------
422 K     Trainable params
0         Non-trainable params
422 K     Total params
1.688     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


In [7]:
# Evaluate the model on the test data
trainer.test(model, dataloaders=test_loader)

Testing: 0it [00:00, ?it/s]

[{'test_loss': 0.04213758185505867, 'test_acc': 0.9900000095367432}]

In [8]:
torch.save(model,'model.1.pt')

In [9]:
def add_noise_to_model(model: pl.LightningModule, noise_factor: float = 0.01) -> None:
    # TODO: Return new updated noise model
    # 
    # Will need to make deep copy of model
    # Create new reference variable to deep copied
    #   model
    with torch.no_grad():
        for param in model.parameters():
            noise = torch.randn_like(param) * noise_factor
            param.add_(noise)