# Import Libraries

In [1]:
import torch
import torch.nn as nn
import torch.utils.data as data
from torch.profiler import profile, record_function, ProfilerActivity
import numpy as np

# Model

In [2]:
class SimpleModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleModel, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.layer1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.layer2(x)
        return x

# Load Model and Test data

In [53]:
def load_test_data():
    test_x = np.load('/kaggle/input/dmls-ca4/test_data/test_x.npy').astype(np.float32)
    test_y = np.load('/kaggle/input/dmls-ca4/test_data/test_y.npy')

    test_dataset = data.TensorDataset(torch.tensor(test_x), torch.tensor(test_y))
    test_loader = data.DataLoader(test_dataset, batch_size=32, shuffle=False)

    return test_loader

In [54]:
input_size = 512
hidden_size = 32
output_size = 20
model = SimpleModel(input_size, hidden_size, output_size).to("cpu")
test_loader = load_test_data()

# Part 1

### PyTorch profiler - CPU Only

In [55]:
with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof:
    with record_function("model_inference"):
        for i, (inputs, _) in enumerate(test_loader):
            if i >= 100:  # 100 batches
                break
            model(inputs)

In [56]:
print(prof.key_averages().table(sort_by="cpu_time_total"))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                        model_inference        14.93%      17.080ms       100.00%     114.441ms     114.441ms             1  
enumerate(DataLoader)#_SingleProcessDataLoaderIter._...        28.91%      33.083ms        61.94%      70.881ms     701.794us           101  
                                           aten::select        17.17%      19.653ms        19.89%      22.763ms       3.522us          6464  
                                           aten::linear         0.80%     916.856us        13.72%      15.698ms      78.489us           200  
      

### PyTorch profiler - CPU and Memory

In [61]:
with profile(activities=[ProfilerActivity.CPU],profile_memory=True, record_shapes=True) as prof:
        for i, (inputs, _) in enumerate(test_loader):
            if i >= 100:  # 100 batches
                break
            model(inputs)

In [78]:
print(prof.key_averages().table(sort_by="cpu_memory_usage"))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
enumerate(DataLoader)#_SingleProcessDataLoaderIter._...        33.96%      31.544ms        71.97%      66.844ms     661.826us       6.34 Mb           0 b           101  
                                            aten::stack         5.16%       4.791ms        15.16%      14.076ms      69.683us       6.34 Mb           0 b           202  
                                              aten::cat         3.87%       3.598ms         3.87%       3.598ms      17.812us       6.34 Mb       6.34

# Part 2

## Tanh

In [82]:
class SimpleModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleModel, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.sigmoid = nn.Tanh()
        self.layer2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.layer1(x)
        x = self.bn1(x)
        x = self.sigmoid(x)
        x = self.layer2(x)
        return x

model = SimpleModel(input_size, hidden_size, output_size).to("cpu")

with profile(activities=[ProfilerActivity.CPU],profile_memory=True, record_shapes=True) as prof:
        for i, (inputs, _) in enumerate(test_loader):
            if i >= 100:  # 100 batches
                break
            model(inputs)

print(prof.key_averages().table(sort_by="cpu_memory_usage"))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
enumerate(DataLoader)#_SingleProcessDataLoaderIter._...        33.00%      29.237ms        72.03%      63.818ms     631.863us       6.34 Mb           0 b           101  
                                            aten::stack         4.97%       4.402ms        15.19%      13.455ms      66.610us       6.34 Mb           0 b           202  
                                              aten::cat         3.95%       3.500ms         3.95%       3.500ms      17.328us       6.34 Mb       6.34

## Sigmoid

In [83]:
class SimpleModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleModel, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.sigmoid = nn.Sigmoid()
        self.layer2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.layer1(x)
        x = self.bn1(x)
        x = self.sigmoid(x)
        x = self.layer2(x)
        return x

model = SimpleModel(input_size, hidden_size, output_size).to("cpu")

with profile(activities=[ProfilerActivity.CPU],profile_memory=True, record_shapes=True) as prof:
        for i, (inputs, _) in enumerate(test_loader):
            if i >= 100:  # 100 batches
                break
            model(inputs)

print(prof.key_averages().table(sort_by="cpu_memory_usage"))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
enumerate(DataLoader)#_SingleProcessDataLoaderIter._...        32.44%      32.156ms        69.44%      68.821ms     681.401us       6.34 Mb           0 b           101  
                                            aten::stack         4.99%       4.947ms        15.35%      15.211ms      75.302us       6.34 Mb           0 b           202  
                                              aten::cat         3.88%       3.846ms         3.88%       3.846ms      19.040us       6.34 Mb       6.34

## GeLU

In [85]:
class SimpleModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleModel, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.gelu = nn.GELU()
        self.layer2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.layer1(x)
        x = self.bn1(x)
        x = self.gelu(x)
        x = self.layer2(x)
        return x

model = SimpleModel(input_size, hidden_size, output_size).to("cpu")

with profile(activities=[ProfilerActivity.CPU],profile_memory=True, record_shapes=True) as prof:
        for i, (inputs, _) in enumerate(test_loader):
            if i >= 100:  # 100 batches
                break
            model(inputs)

print(prof.key_averages().table(sort_by="cpu_memory_usage"))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
enumerate(DataLoader)#_SingleProcessDataLoaderIter._...        24.57%      31.881ms        53.06%      68.861ms     681.796us       6.34 Mb           0 b           101  
                                            aten::stack         3.73%       4.843ms        11.24%      14.581ms      72.184us       6.34 Mb           0 b           202  
                                              aten::cat         2.93%       3.806ms         2.93%       3.806ms      18.840us       6.34 Mb       6.34