In [8]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import pytorch_lightning as pl

from typing import Dict
from torch.jit.annotations import TensorType

In [6]:
from pytorch_lightning import accelerators

In [9]:
torch.device

torch.device

In [10]:
pl.accelerators.mps.get_device_stats()

{'M1_vm_percent': 90.7, 'M1_percent': 9.1, 'M1_swap_percent': 0.0}

In [15]:
mps = pl.accelerators.mps.MPSAccelerator()

In [26]:
device = torch.device
device.type = 'mps'
mps.setup_device(device)

TypeError: cannot set 'type' attribute of immutable type 'torch.device'

In [2]:
import platform

In [3]:
platform.processor() # == "arm"

'arm'

In [4]:
torch.backends.mps.is_available()

True

# Download ``MNIST`` Dataset

In [5]:
# MNIST dataset and dataloaders
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = torchvision.datasets.MNIST(root=os.getcwd(), train=True, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)


# Define Neural Network and ``LightningModule``

In [6]:
class NeuralNetwork(pl.LightningModule):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(28 * 28, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 10)
        )
        
    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.model(x)
    
    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=0.001)
    
    def training_step(self, batch, batch_idx):
        inputs, targets = batch
        outputs = self(inputs)
        loss = nn.CrossEntropyLoss()(outputs, targets)
        self.log('train_loss', loss)
        return loss

# Train the model

In [12]:
# Initialize the Lightning Trainer
model = NeuralNetwork()
trainer = pl.Trainer(accelerator='mps',max_epochs=5,devices=1)  # Set max_epochs and gpus according to your environment

# Train the model using PyTorch Lightning
trainer.fit(model, train_loader)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type       | Params
-------------------------------------
0 | model | Sequential | 175 K 
-------------------------------------
175 K     Trainable params
0         Non-trainable params
175 K     Total params
0.701     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


In [8]:
for name, param in model.named_parameters():
    if param.grad is not None:
        print(f"Parameter: {name}")
        print("Gradient:")
        print(param.grad)
    else:
        print(f"Parameter {name} has no gradient.")

Parameter: model.0.weight
Gradient:
tensor([[ 0.0118,  0.0118,  0.0118,  ...,  0.0118,  0.0118,  0.0118],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0013, -0.0013, -0.0013,  ..., -0.0013, -0.0013, -0.0013],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0063,  0.0063,  0.0063,  ...,  0.0063,  0.0063,  0.0063]])
Parameter: model.0.bias
Gradient:
tensor([-1.1787e-02,  0.0000e+00,  1.2850e-03,  2.0139e-03, -6.4194e-03,
        -9.4879e-03, -6.5451e-05,  4.6064e-03,  0.0000e+00,  0.0000e+00,
        -5.6941e-04,  0.0000e+00, -8.5749e-03,  0.0000e+00,  0.0000e+00,
        -1.9480e-02,  0.0000e+00, -2.3550e-02,  0.0000e+00, -1.8377e-04,
        -7.7269e-07,  4.4411e-05, -8.7891e-04,  0.0000e+00,  1.0721e-02,
         0.0000e+00,  1.0475e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         5.1769e-03,  0.0000e+00, -5.5848e-03,  0.0000e+00

In [17]:
dict(model.named_parameters())['model.0.weight']

Parameter containing:
tensor([[ 0.0117,  0.0008,  0.0325,  ..., -0.0247,  0.0140, -0.0183],
        [ 0.0071, -0.0294,  0.0046,  ...,  0.0281,  0.0297,  0.0217],
        [ 0.0212,  0.0244,  0.0213,  ...,  0.0354,  0.0072, -0.0203],
        ...,
        [ 0.0051, -0.0048,  0.0358,  ...,  0.0052,  0.0282,  0.0235],
        [-0.0054,  0.0394,  0.0329,  ..., -0.0014,  0.0302, -0.0194],
        [-0.0174,  0.0419,  0.0034,  ...,  0.0072, -0.0078,  0.0077]],
       requires_grad=True)

In [24]:
from typing import Dict
from torch.jit.annotations import TensorType

In [68]:
def get_gradients(model: pl.LightningModule) -> Dict[str,str,TensorType]:
    """Given a model, the model's gradients for both the weights, and bias are returned.

    This function is intended to return the gradients of a trained model PyTorch Lightening 
    ``LighteningModule`` object.

    Example usage:
        >>> model: pl.LightningModule = NeuralNetwork() # Type hint added for clarity
        >>> gradients = get_gradients(model=model)
        gradients
        {'weight': {'model.0.weight': tensor([[ 0.0118,  0.0118,  0.0118,  ...,  0.0118,  0.0118,  0.0118],
        ...
        'model.0.bias': tensor([ 6.8496e-04,  6.1421e-05,  7.9987e-04, -2.4953e-02, -2.9005e-02,
          -3.3864e-04, -4.2585e-03, -1.1625e-02,  2.7028e-02,  4.1605e-02])},
        'param_no_grad': {}}

    Args:
        model: Input trained model.

    Returns:
        Dictionary of key mapped strings that consists of: 'weights', 'bias', and 'param_no_grad'.
    """
    # Gradients dictionary
    gradients: Dict[str,str,TensorType] = {
        'weight': None,
        'bias': None,
        'param_no_grad': None
    }

    # Temporary dictionaries
    _tmp_weight: Dict = {}
    _tmp_bias: Dict = {}
    _tmp_none: Dict = {}
    
    # Iterate through model parameters
    for name, param in model.named_parameters():
        if param.grad is not None:
            if 'weight' in name:
                _tmp_weight.update({name: param.grad})
            elif 'bias' in name:
                _tmp_bias.update({name: param.grad})
        else:
            _tmp_none.update({name: f"Parameter {name} has no gradient."})
    
    # Update gradients dictionary
    gradients.update({'weight':_tmp_weight})
    gradients.update({'bias':_tmp_bias})
    gradients.update({'param_no_grad':_tmp_none})
    return gradients

In [69]:
gradients = get_gradients(model=model)

In [70]:
gradients

{'weight': {'model.0.weight': tensor([[ 0.0118,  0.0118,  0.0118,  ...,  0.0118,  0.0118,  0.0118],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [-0.0013, -0.0013, -0.0013,  ..., -0.0013, -0.0013, -0.0013],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0063,  0.0063,  0.0063,  ...,  0.0063,  0.0063,  0.0063]]),
  'model.2.weight': tensor([[-5.9722e-05,  0.0000e+00, -1.4242e-04,  ...,  0.0000e+00,
            0.0000e+00, -3.7346e-04],
          [ 2.2877e-03,  0.0000e+00, -1.2974e-04,  ...,  0.0000e+00,
            0.0000e+00, -3.0023e-04],
          [ 2.4847e-06,  0.0000e+00,  7.1310e-06,  ...,  0.0000e+00,
            0.0000e+00, -3.5155e-02],
          ...,
          [ 4.9927e-02,  0.0000e+00,  5.5324e-04,  ...,  0.0000e+00,
            0.0000e+00,  1.0668e-03],
          [-5.0865e-03,  0.0000e+00, -6.1987e-04,  ...,  0.000

In [73]:
gradients.get('weight').get('model.0.weight')

tensor([[ 0.0118,  0.0118,  0.0118,  ...,  0.0118,  0.0118,  0.0118],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0013, -0.0013, -0.0013,  ..., -0.0013, -0.0013, -0.0013],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0063,  0.0063,  0.0063,  ...,  0.0063,  0.0063,  0.0063]])