### HW 2-4 Using Pytorch C++ API to do model analysis on the transformed model graph

In [6]:
import torch
import torchvision

# An instance of your model.
model = torchvision.models.alexnet(pretrained=True)
model.eval()

# # An example input you would normally provide to your model's forward() method.
example = torch.rand(1, 3, 224, 224)

# Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing.
traced_script_module = torch.jit.trace(model, example)

# Serializing Your Script Module to a File
traced_script_module.save("traced_resnet_model.pt")

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /home/guofangyu/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:08<00:00, 28.7MB/s] 


In [7]:
total_params = sum(p.numel() for p in model.parameters())
print("Total number of parameters: ", total_params)

Total number of parameters:  61100840


In [8]:
61100840*4

244403360

In [2]:
import torchvision.models as models

# 加載 GoogLeNet 模型
model = models.alexnet(pretrained=True)
print(model)

input_shape = (1, 3, 224, 224)


total_params = sum(p.numel() for p in model.parameters())
print("Total number of parameters: ", total_params)

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /home/guofangyu/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [01:19<00:00, 3.08MB/s] 


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [4]:
import torchinfo
input_shape = (3, 224, 224)
torchinfo.summary(model, input_shape, batch_dim = 0, col_names=("input_size", "output_size"), verbose=0)

Layer (type:depth-idx)                   Input Shape               Output Shape
AlexNet                                  [1, 3, 224, 224]          [1, 1000]
├─Sequential: 1-1                        [1, 3, 224, 224]          [1, 256, 6, 6]
│    └─Conv2d: 2-1                       [1, 3, 224, 224]          [1, 64, 55, 55]
│    └─ReLU: 2-2                         [1, 64, 55, 55]           [1, 64, 55, 55]
│    └─MaxPool2d: 2-3                    [1, 64, 55, 55]           [1, 64, 27, 27]
│    └─Conv2d: 2-4                       [1, 64, 27, 27]           [1, 192, 27, 27]
│    └─ReLU: 2-5                         [1, 192, 27, 27]          [1, 192, 27, 27]
│    └─MaxPool2d: 2-6                    [1, 192, 27, 27]          [1, 192, 13, 13]
│    └─Conv2d: 2-7                       [1, 192, 13, 13]          [1, 384, 13, 13]
│    └─ReLU: 2-8                         [1, 384, 13, 13]          [1, 384, 13, 13]
│    └─Conv2d: 2-9                       [1, 384, 13, 13]          [1, 256, 13, 13]
│    └─R

In [6]:
import torchvision.models as models
import torch
activation = {}
# Define a hook function
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

# Load a pre-trained AlexNet model
# model = models.mobilenet_v2(pretrained=True)
# model.eval()
model.eval()
# Dictionary to store activations from each layer
activation = {}

# Register hook to each linear layer
for layer_name, layer in model.named_modules():
    layer.register_forward_hook(get_activation(layer_name))

# Run model inference
data = torch.randn(1, 3, 224, 224)
output = model(data)

# Access the saved activations
local_memory = 0
for layer in activation:
    # 所有 layer 的 tensor 皆為 float32，因此以 4 byte 計算
    local_memory += torch.numel(activation[layer])*4
    # print(f"Activation from layer {layer}: {activation[layer].shape}")

print(f"Activation memory storage requirement: {local_memory} byte ({round(local_memory/1048576, 2)}MB)")

Activation memory storage requirement: 4437728 byte (4.23MB)


In [12]:
import torch.nn.functional as F
import torch.nn as nn
import torch
import torchvision

def calculate_output_shape(input_shape, layer):
    # Calculate the output shape for Conv2d, MaxPool2d, and Linear layers
    if isinstance(layer, (nn.Conv2d, nn.MaxPool2d)):
        kernel_size = (
            layer.kernel_size
            if isinstance(layer.kernel_size, tuple)
            else (layer.kernel_size, layer.kernel_size)
        )
        stride = (
            layer.stride
            if isinstance(layer.stride, tuple)
            else (layer.stride, layer.stride)
        )
        padding = (
            layer.padding
            if isinstance(layer.padding, tuple)
            else (layer.padding, layer.padding)
        )
        dilation = (
            layer.dilation
            if isinstance(layer.dilation, tuple)
            else (layer.dilation, layer.dilation)
        )

        output_height = (
            input_shape[1] + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1
        ) // stride[0] + 1
        output_width = (
            input_shape[2] + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1
        ) // stride[1] + 1
        return (
            layer.out_channels if hasattr(layer, "out_channels") else input_shape[0],
            output_height,
            output_width,
        )
    elif isinstance(layer, nn.Linear):
        # For Linear layers, the output shape is simply the layer's output features
        return (layer.out_features,)
    else:
        return input_shape


def calculate_macs(layer, input_shape, output_shape):
    # Calculate MACs for Conv2d and Linear layers
    if isinstance(layer, nn.Conv2d):
        kernel_ops = (
            layer.kernel_size[0]
            * layer.kernel_size[1]
            * (layer.in_channels / layer.groups)
        )
        output_elements = output_shape[1] * output_shape[2]
        print(f"kernel ops = {kernel_ops}, {layer.kernel_size[0]}*{layer.kernel_size[1]}*{layer.in_channels} / {layer.groups}")
        print(f"output-elements = {output_elements}, 1 {output_shape[1]}*{output_shape[2]}")
        print(f"layer_out_channel = {layer.out_channels}")
        
        macs = int(kernel_ops * output_elements * layer.out_channels)
        return macs
    elif isinstance(layer, nn.Linear):
        # For Linear layers, MACs are the product of input features and output features
        macs = int(layer.in_features * layer.out_features)
        return macs
    else:
        return 0

model = torchvision.models.alexnet(pretrained=True)
model.eval()

# Initial input shape
input_shape = (3, 224, 224)
total_macs = 0

# Iterate through the layers of the model
for name, layer in model.named_modules():
    if isinstance(layer, (nn.Conv2d, nn.MaxPool2d, nn.ReLU, nn.Linear)):
        output_shape = calculate_output_shape(input_shape, layer)
        print(output_shape)
        macs = calculate_macs(layer, input_shape, output_shape)
        total_macs += macs
        if isinstance(layer, (nn.Conv2d, nn.Linear)):
            print(
                f"Layer: {name}, Type: {type(layer).__name__}, Input Shape: {input_shape}, Output Shape: {output_shape}, MACs: {macs}"
            )
            print("===")
        # elif isinstance(layer, nn.MaxPool2d):
        #     # Also print shape transformation for MaxPool2d layers (no MACs calculated)
        #     # print(
        #     #     f"Layer: {name}, Type: {type(layer).__name__}, Input Shape: {input_shape}, Output Shape: {output_shape}, MACs: N/A"
        #     # )
        input_shape = output_shape  # Update the input shape for the next layer

print(f"Total MACs: {total_macs}")

(64, 55, 55)
kernel ops = 363.0, 11*11*3 / 1
output-elements = 3025, 1 55*55
layer_out_channel = 64
Layer: features.0, Type: Conv2d, Input Shape: (3, 224, 224), Output Shape: (64, 55, 55), MACs: 70276800
===
(64, 55, 55)
(64, 27, 27)
(192, 27, 27)
kernel ops = 1600.0, 5*5*64 / 1
output-elements = 729, 1 27*27
layer_out_channel = 192
Layer: features.3, Type: Conv2d, Input Shape: (64, 27, 27), Output Shape: (192, 27, 27), MACs: 223948800
===
(192, 27, 27)
(192, 13, 13)
(384, 13, 13)
kernel ops = 1728.0, 3*3*192 / 1
output-elements = 169, 1 13*13
layer_out_channel = 384
Layer: features.6, Type: Conv2d, Input Shape: (192, 13, 13), Output Shape: (384, 13, 13), MACs: 112140288
===
(384, 13, 13)
(256, 13, 13)
kernel ops = 3456.0, 3*3*384 / 1
output-elements = 169, 1 13*13
layer_out_channel = 256
Layer: features.8, Type: Conv2d, Input Shape: (384, 13, 13), Output Shape: (256, 13, 13), MACs: 149520384
===
(256, 13, 13)
(256, 13, 13)
kernel ops = 2304.0, 3*3*256 / 1
output-elements = 169, 1 13*1

In [1]:
4437728 - 4392864

44864