In [11]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import pytorch_lightning as pl
from torch.utils.data import DataLoader, random_split
from torchviz import make_dot

from typing import Dict, Union
from torch.jit.annotations import TensorType

# To-Do's

- [] Add option to store gradient norm of each layer, stored separately
- [X] Change linear layers to: CNN + 1 linear layer
- [X] Make deep model (5 layers), and train it to perfection (up to 99% or higher train accuracy)
- [X] Save the model (we’ll call this the “ground model”) (if time, create 5 ground models)
- [] Then, create 10 models per noise level (pick 10 noise levels, between totally destroyed and basically no impact) (also loop through which layer)→ turns into 500 models. Make them noisy, measure all the things above (robustness, generalization.1, try generalization.2)
- [] Can experiment with gradcam (interesting but not most important)


Before Thr.

- [X] Add option to store gradient norm of each layer, stored separately
- [X] Create table, row -> model, col -> specs (grad norm, layerwise norm, specify train/test accuracy), list number of tunable parameters for each model.
- [X] Add norms of total and/or per layer parameters to the table.
- [] GradCam (wishlist or next step)


# Download ``MNIST`` Dataset

Download ``MNIST`` dataset, define training (80%), validation (10%), and test (10%) data sizes, and create the dataloaders.

In [2]:
# MNIST dataset and dataloaders
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
dataset = torchvision.datasets.MNIST(root=os.getcwd(), train=True, transform=transform, download=True)

In [3]:
# Split the dataset into train, val, and test sets
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

In [4]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=os.cpu_count())
val_loader = DataLoader(val_dataset, batch_size=64, num_workers=os.cpu_count())
test_loader = DataLoader(test_dataset, batch_size=64, num_workers=os.cpu_count())

# Define Neural Network and ``LightningModule``

In [2]:
# Define the LightningModule
class ConvNet(pl.LightningModule):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.save_hyperparameters()
        self.conv_layers = nn.Sequential(
            # # Model 1
            # nn.Conv2d(1, 32, kernel_size=3, padding=1),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Conv2d(32, 64, kernel_size=3, padding=1),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Conv2d(64, 128, kernel_size=3, padding=1),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Conv2d(128, 256, kernel_size=3, padding=1),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2)
            # 
            # # Model 2
            # nn.Conv2d(1, 32, kernel_size=3, padding=1),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Conv2d(32, 32, kernel_size=3, padding=1),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Conv2d(32, 32, kernel_size=3, padding=1),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Conv2d(32, 32, kernel_size=3, padding=1),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2)
            # 
            # # Model 3
            # nn.Conv2d(1, 32, kernel_size=3, padding=1),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Conv2d(32, 32, kernel_size=3, padding=1),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Conv2d(32, 32, kernel_size=3, padding=1),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            # 
            # # Model 4
            # nn.Conv2d(1, 32, kernel_size=3, padding=1),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Conv2d(32, 32, kernel_size=3, padding=1),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            # 
            # Model 5
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        # NOTE: 
        #   [X] Try training with 32 all the way - see if model can still be 99% good
        #   [?] Use a smaller model, smallest non-trivial model
        #   [X] Reduce number of linear layers
        #   [X] Start profiling (draw on piece of paper)
        #   [] Look for number of weights in each model
        #   Get model training up to 100% 
        #   
        #   NOTE: Need atleast 2 linear layers - 
        #   NOTE: Find how to store/see: learning rate, validation error
        # 
        # Want to avoid: did not scale step sizes, 
        # Want to find non-trivial conclusions

        
        self.fc_layer = nn.Sequential(
            # # Model 1
            # nn.Linear(256, 128),
            # nn.ReLU(),
            # nn.Linear(128, 10),
            #
            # # Model 2
            # nn.Linear(32, 10),
            #
            # # Model 3
            # nn.Linear(32 * 3 * 3, 10),
            # # Model 4
            # nn.Linear(32 * 7 * 7, 10),
            #
            # Model 5
            nn.Linear(14 * 14 * 32, 10)
        )
        
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        # x = x.view(x.size(0), x.size(1))  # Flatten the tensor along the channel dimension
        x = self.fc_layer(x)
        return x
    
    def configure_optimizers(self):
        # Lower lr from 0.001
        # Try SGD
        # GOAL: Get to 100% training error
        return optim.Adam(self.parameters(), lr=0.001)
    
    # Define the training step method
    def training_step(self, batch, batch_idx):
        inputs, targets = batch
        outputs = self(inputs)
        loss = nn.CrossEntropyLoss()(outputs, targets)
        self.log('train_loss', loss)
        return loss
    
    # Define the validation step method
    def validation_step(self, batch, batch_idx):
        inputs, targets = batch
        outputs = self(inputs)
        loss = nn.CrossEntropyLoss()(outputs, targets)
        self.log('val_loss', loss, prog_bar=True)  # Logging the validation loss
    
    # Define the test step method
    def test_step(self, batch, batch_idx):
        inputs, targets = batch
        outputs = self(inputs)
        loss = nn.CrossEntropyLoss()(outputs, targets)
        self.log('test_loss', loss)  # Logging the test loss
        preds = torch.argmax(outputs, dim=1)
        acc = (preds == targets).float().mean()
        self.log('test_acc', acc, prog_bar=True)  # Logging the test accuracy

# Model Training, Validation, and Testing

In [3]:
# Initialize the Lightning Trainer
model = ConvNet()
trainer = pl.Trainer(accelerator='mps',max_epochs=10,devices=1)  # Set max_epochs and gpus according to your environment

# Train the model using PyTorch Lightning
trainer.fit(model, train_loader, val_loader)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [7]:
def count_trainable_parameters(model: pl.LightningModule) -> int:
    total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total_params

In [8]:
# Model 1 (6 layers, 4 Conv, 2 Linear)
# 
# Evaluate the model on the test data
trainer.test(model, dataloaders=test_loader)
torch.save(model,'model.1.pt')
print(f"Number of trainable parameters (weights): {count_trainable_parameters(model=model):,}")

Testing: 0it [00:00, ?it/s]

Number of trainable parameters (weights): 422,026


In [13]:
# Use trainer.test with the test_loader [Train Error]
# Model 1 (6 layers, 4 Conv, 2 Linear)
# 
# Evaluate the model on the test data
# 
# NOTE: Stick with model.2
model = torch.load('model.2.pt')
trainer.test(model, dataloaders=train_loader)
print(f"Number of trainable parameters (weights): {count_trainable_parameters(model=model):,}")

  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

Number of trainable parameters (weights): 28,394


In [8]:
# Model 2 (5 layers, 4 Conv, 1 Linear)
# 
# Evaluate the model on the test data
trainer.test(model, dataloaders=test_loader)
torch.save(model,'model.2.pt')
print(f"Number of trainable parameters (weights): {count_trainable_parameters(model=model):,}")

Testing: 0it [00:00, ?it/s]

Number of trainable parameters (weights): 28,394


In [8]:
# Model 3 (4 layers, 3 Conv, 1 Linear)
# 
# Evaluate the model on the test data
trainer.test(model, dataloaders=test_loader)
torch.save(model,'model.3.pt')
print(f"Number of trainable parameters (weights): {count_trainable_parameters(model=model):,}")

Testing: 0it [00:00, ?it/s]

Number of trainable parameters (weights): 21,706


In [9]:
# Model 4 (3 layers, 2 Conv, 1 Linear)
# 
# Evaluate the model on the test data
trainer.test(model, dataloaders=test_loader)
torch.save(model,'model.4.pt')
print(f"Number of trainable parameters (weights): {count_trainable_parameters(model=model):,}")

Testing: 0it [00:00, ?it/s]

Number of trainable parameters (weights): 25,258


In [8]:
# Model 5 (2 layers, 1 Conv, 1 Linear)
# 
# Evaluate the model on the test data
trainer.test(model, dataloaders=test_loader)
torch.save(model,'model.5.pt')
print(f"Number of trainable parameters (weights): {count_trainable_parameters(model=model):,}")

Testing: 0it [00:00, ?it/s]

Number of trainable parameters (weights): 63,050


In [9]:
def add_noise_to_model(model: pl.LightningModule, noise_factor: float = 0.01) -> None:
    # TODO: Return new updated noise model
    # 
    # Will need to make deep copy of model
    # Create new reference variable to deep copied
    #   model
    # NOTE: noise_factor, treat in log-way, logE-8 -> 1
    # 
    # Want to see model learning, just not learning well
    # Need to know the norms of parameters
    with torch.no_grad():
        for param in model.parameters():
            noise = torch.randn_like(param) * noise_factor
            param.add_(noise)

In [9]:
# def save_and_viz_pl_model(model: pl.LightningModule, filename: str) -> None:
#     # Create a dummy input with the same shape as expected input during training
#     dummy_input = torch.randn(1, 1, 28, 28)

#     # Generate the visualization of the model architecture
#     dot = make_dot(model(dummy_input))
#     params = dict(model.named_parameters())

#     # Save the visualization as an image
#     dot.format = 'png'
#     dot.render(filename, cleanup=True)

In [10]:
# save_and_viz_pl_model(model=model,
#                       # filename='model.1.4_ConvLayers.2_LinLayer',
#                       # filename='model.2.4_ConvLayers.1_LinLayer',
#                       # filename='model.3.3_ConvLayers.1_LinLayer',
#                       # filename='model.4.2_ConvLayers.1_LinLayer',
#                       filename='model.5.1_ConvLayers.1_LinLayer',
#                       )

In [5]:
torch.load('model.5.pt')

ConvNet(
  (conv_layers): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layer): Sequential(
    (0): Linear(in_features=6272, out_features=10, bias=True)
  )
)

In [9]:
model

ConvNet(
  (conv_layers): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layer): Sequential(
    (0): Linear(in_features=6272, out_features=10, bias=True)
  )
)

In [10]:
model.state_dict()

OrderedDict([('conv_layers.0.weight',
              tensor([[[[-0.3202, -0.1778,  0.0620],
                        [ 0.2234,  0.0629,  0.1850],
                        [-0.1793, -0.2067, -0.0119]]],
              
              
                      [[[-0.0726, -0.0674, -0.2367],
                        [-0.0923, -0.2490,  0.1725],
                        [-0.2860, -0.0074,  0.1166]]],
              
              
                      [[[-0.0516,  0.3071,  0.2535],
                        [ 0.0649,  0.2776, -0.0090],
                        [-0.2020, -0.1665, -0.2772]]],
              
              
                      [[[-0.1691,  0.1573,  0.1057],
                        [-0.2105, -0.1876, -0.1378],
                        [-0.0705, -0.1706, -0.1636]]],
              
              
                      [[[-0.2299,  0.1070, -0.3222],
                        [-0.1409, -0.2790,  0.1478],
                        [ 0.2875, -0.0482,  0.0777]]],
              
              
       

In [28]:
def _save_and_viz_pl_model(model: Union[nn.Module,pl.LightningModule], filename: str) -> None:
    """Helper function to visualize and plot the model architecture.

    Args:
        model: Input pytorch (lightning) model.
        filename: Output filename (no file extension).
    """
    # Create a dummy input with the same shape as expected input during training
    dummy_input = torch.randn(1, 1, 28, 28)

    # Generate the visualization of the model architecture
    dot = make_dot(model(dummy_input))
    # params = dict(model.named_parameters())

    # Save the visualization as an image
    dot.format = 'png'
    dot.render(filename, cleanup=True)
    
    return None

In [29]:
def save_pt_model(model: Union[nn.Module,pl.LightningModule], filename: str) -> None:
    """Saves pytorch (lightning) model, and creates visualization of model architecture.

    Args:
        model: Input pytorch (lightning) model.
        filename: Output filename.
    """
    # TODO: Save metadata file for the model.
    # Check filename
    filename: str
    ext: str
    
    if ('pt' or 'pth') in filename:
        filename, ext = os.path.splitext(filename)
    else:
        ext: str = ".pt"
    
    # Save model (and model state)
    torch.save(model.state_dict(), f"{filename}{ext}")

    # Save image of model architecture
    _save_and_viz_pl_model(model=model, filename=filename)

    return None

In [64]:
def load_pt_model(model: Union[nn.Module,pl.LightningModule], filename: str) -> Union[nn.Module,pl.LightningModule]:
    """Loads saved/trained model, in which the model class **must** be provided.

    Args:
        model: Input model class objoect.
        filename: Input filename that corresponds to trained saved/trained model.

    Returns:
        Trained model.
    """
    # model = TheModelClass(*args, **kwargs)
    model.load_state_dict(torch.load(filename))
    model.eval() # sets dropout and batch normalization layers to evaluation mode
    return model

In [56]:
m = load_pt_model(model=model,filename='model.5.pt')

TypeError: Expected state_dict to be dict-like, got <class '__main__.ConvNet'>.

In [18]:
filename = 'model.5.pt'

In [21]:
('pt' or 'pth') in filename

True

In [25]:
f,e = os.path.splitext(filename)

In [27]:
f+e

'model.5.pt'

In [34]:
model

ConvNet(
  (conv_layers): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layer): Sequential(
    (0): Linear(in_features=6272, out_features=10, bias=True)
  )
)

In [37]:
torch.save(model, f"model.5.json")

In [39]:
model.named_parameters

<bound method Module.named_parameters of ConvNet(
  (conv_layers): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layer): Sequential(
    (0): Linear(in_features=6272, out_features=10, bias=True)
  )
)>

In [40]:
import json

In [48]:
with open("model.5.json", "w") as f:
    json.dump(model, f, indent=4)

TypeError: Object of type ConvNet is not JSON serializable

In [54]:
model.parameters

<bound method Module.parameters of ConvNet(
  (conv_layers): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layer): Sequential(
    (0): Linear(in_features=6272, out_features=10, bias=True)
  )
)>

In [58]:
model.named_parameters

<bound method Module.named_parameters of ConvNet(
  (conv_layers): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layer): Sequential(
    (0): Linear(in_features=6272, out_features=10, bias=True)
  )
)>

In [61]:
for name, param in model.named_parameters():
    print(name)

conv_layers.0.weight
conv_layers.0.bias
fc_layer.0.weight
fc_layer.0.bias


In [62]:
torch.save(model.named_parameters(), 'test')

TypeError: cannot pickle 'generator' object

In [63]:
model.eval()

ConvNet(
  (conv_layers): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layer): Sequential(
    (0): Linear(in_features=6272, out_features=10, bias=True)
  )
)