In [5]:
import torch
import torchvision.models as models
import torch.nn as nn
import onnx
from onnx import shape_inference
import sys
from tabulate import tabulate
from onnx import onnx_ml_pb2 as xpb2
import onnx.helper as helper
from onnx import numpy_helper
import numpy as np
from onnx import TensorProto
import onnxruntime as ort
from torch import Tensor
from torch.nn.parameter import Parameter, UninitializedParameter
import math

### Origin Alexnet

In [6]:
class AlexNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 1000),
        )
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        
        return x

In [7]:
model = AlexNet()
input_data = torch.randn(1, 3, 224, 224)  # Assuming batch size is 1
model.eval()

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

### My Linear

In [8]:
class MyLinear(nn.Module):
    def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None:
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.empty((in_features, out_features)))
        if bias:
            self.bias = Parameter(torch.empty(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self) -> None:
        nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
            nn.init.uniform_(self.bias, -bound, bound)
            
    def forward(self, input: Tensor) -> Tensor:
        M_blocks = (input.size(0) + Block_Size - 1) // Block_Size
        N_blocks = (self.in_features + Block_Size - 1) // Block_Size
        K_blocks = (self.out_features + Block_Size - 1) // Block_Size
        
        A_split_row = torch.split(input, split_size_or_sections=Block_Size, dim=0)
        A_blocks = []
        for A_row in A_split_row:
            A_col = torch.split(A_row, split_size_or_sections=Block_Size, dim=1)
            A_blocks.append(A_col)

        B = self.weight
        B_split_row = torch.split(self.weight, split_size_or_sections=Block_Size, dim=0)
        B_blocks = []
        for B_row in B_split_row:
            B_col = torch.split(B_row, split_size_or_sections=Block_Size, dim=1)
            B_blocks.append(B_col)
        
        matmul_blocks =  [[[0 for _ in range(N_blocks)] for _ in range(K_blocks)] for _ in range(M_blocks)]
        for i in range(M_blocks):
            for j in range(K_blocks):
                for k in range(N_blocks):
                    matmul_blocks[i][j][k] = torch.matmul(A_blocks[i][k], B_blocks[k][j])

        C_blocks = [[0 for _ in range(K_blocks)] for _ in range(M_blocks)]
        for i in range(M_blocks):
            for j in range(K_blocks):
                if (N_blocks == 1): 
                    C_blocks[i][j] = matmul_blocks[i][j][0]
                else:
                    temp = matmul_blocks[i][j]
                    while (len(temp) > 2):
                        temp.append(torch.add(temp[0], temp[1]))
                        temp.pop(0)
                        temp.pop(0)
                    C_blocks[i][j] = torch.add(temp[0], temp[1])

        C_row = []
        for i in range(M_blocks):
            C_row.append(torch.cat(C_blocks[i], dim=1))
        C = torch.cat(C_row, dim=0)
        OUT = torch.add(C, self.bias)
        return OUT

        
    def extra_repr(self) -> str:
        return f'in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}'

### My Alexnet

In [10]:
class modified_AlexNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2)
        self.relu1 = nn.ReLU(inplace=True)
        self.mp1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv2 = nn.Conv2d(64, 192, kernel_size=5, padding=2)
        self.relu2 = nn.ReLU(inplace=True)
        self.mp2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)
        self.relu3 = nn.ReLU(inplace=True)
        self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
        self.relu4 = nn.ReLU(inplace=True)
        self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.relu5 = nn.ReLU(inplace=True)
        self.mp3 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.flatten = nn.Flatten(1, -1)

        self.dropout1 = nn.Dropout()
        self.linear1 = MyLinear(9216, 4096)
        self.relu6 = nn.ReLU(inplace=True)
        self.linear2 = MyLinear(4096, 4096)
        self.dropout2 = nn.Dropout()
        self.relu7 = nn.ReLU(inplace=True)
        self.linear3 = MyLinear(4096, 1000)
            
    def forward(self, x: torch.Tensor) ->torch.Tensor:
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.mp1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.mp2(x)
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.conv4(x)
        x = self.relu4(x)
        x = self.conv5(x)
        x = self.relu5(x)
        x = self.mp3(x)
        
        x = self.avgpool(x)
        
        x = self.flatten(x)
        x = self.dropout1(x)
        x = self.linear1(x)
        x = self.relu6(x)
        x = self.dropout2(x)
        x = self.linear2(x)
        x = self.relu7(x)
        x = self.linear3(x)
        return x

In [11]:
Block_Size = 64
modified_model = modified_AlexNet()
input_data = torch.randn(1, 3, 224, 224)  # Assuming batch size is 1
modified_model.eval()

modified_AlexNet(
  (conv1): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (relu1): ReLU(inplace=True)
  (mp1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (relu2): ReLU(inplace=True)
  (mp2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu3): ReLU(inplace=True)
  (conv4): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu4): ReLU(inplace=True)
  (conv5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu5): ReLU(inplace=True)
  (mp3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (dropout1): Dropout(p=0.5, inplace=False)
  (linear1): MyLinear(in_features=9216, out_fe

### Export to pt

In [12]:
traced_script_module = torch.jit.trace(modified_model, input_data, check_trace=True)
traced_script_module.save("./models/modified_alexnet_pytorch.pt")

In [13]:
script_module = torch.jit.load('./models/modified_alexnet_pytorch.pt')
output = script_module(input_data)
print(output.shape)

torch.Size([1, 1000])


### Onnx Version

In [2]:
class modified_AlexNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        
        #classifier
        self.dropout1 = nn.Dropout()
        self.relu1 = nn.ReLU(inplace=True)
        self.dropout2 = nn.Dropout()
        self.relu2 = nn.ReLU(inplace=True)
        self.W1 = Parameter(torch.empty((9216, 4096)))
        self.b1 = Parameter(torch.empty(4096))
        self.W2 = Parameter(torch.empty((4096, 4096)))
        self.b2 = Parameter(torch.empty(4096))
        self.W3 = Parameter(torch.empty((4096, 1000)))
        self.b3 = Parameter(torch.empty(1000))
            
    def Build_Mylinear_Layer(self, A: torch.Tensor, Block_Size: int, idx: int, M: int, N: int, K: int) ->torch.Tensor:
        if (idx == 1):
            B = self.W1
            bias = self.b1
        elif(idx == 2):
            B = self.W2
            bias = self.b2
        else:
            B = self.W3
            bias = self.b3
            
        M_blocks = (M + Block_Size - 1) // Block_Size
        N_blocks = (N + Block_Size - 1) // Block_Size
        K_blocks = (K + Block_Size - 1) // Block_Size

        A_split_row = torch.split(A, split_size_or_sections=Block_Size, dim=0)
        A_blocks = []
        for A_row in A_split_row:
            A_col = torch.split(A_row, split_size_or_sections=Block_Size, dim=1)
            A_blocks.append(A_col)
            
        B_split_row = torch.split(B, split_size_or_sections=Block_Size, dim=0)
        B_blocks = []
        for B_row in B_split_row:
            B_col = torch.split(B_row, split_size_or_sections=Block_Size, dim=1)
            B_blocks.append(B_col)
        
        matmul_blocks =  [[[0 for _ in range(N_blocks)] for _ in range(K_blocks)] for _ in range(M_blocks)]
        for i in range(M_blocks):
            for j in range(K_blocks):
                for k in range(N_blocks):
                    matmul_blocks[i][j][k] = torch.matmul(A_blocks[i][k], B_blocks[k][j])

        C_blocks = [[0 for _ in range(K_blocks)] for _ in range(M_blocks)]
        for i in range(M_blocks):
            for j in range(K_blocks):
                if (N_blocks == 1): 
                    C_blocks[i][j] = matmul_blocks[i][j][0]
                else:
                    temp = matmul_blocks[i][j]
                    while (len(temp) > 2):
                        temp.append(torch.add(temp[0], temp[1]))
                        temp.pop(0)
                        temp.pop(0)
                    C_blocks[i][j] = torch.add(temp[0], temp[1])

        C_row = []
        for i in range(M_blocks):
            C_row.append(torch.cat(C_blocks[i], dim=1))
        C = torch.cat(C_row, dim=0)
        OUT = torch.add(C, bias)
        return OUT
        
    def forward(self, x: torch.Tensor) ->torch.Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout1(x)
        x = self.Build_Mylinear_Layer(x, Block_Size, 1, 1, 9216, 4096)
        x = self.relu1(x)
        x = self.dropout2(x)
        x = self.Build_Mylinear_Layer(x, Block_Size, 2, 1, 4096, 4096)
        x = self.relu2(x)
        x = self.Build_Mylinear_Layer(x, Block_Size, 3, 1, 4096, 1000)
        return x


In [3]:
Block_Size = 1024
modified_model = modified_AlexNet()
input_data = torch.randn(1, 3, 224, 224)  # Assuming batch size is 1
modified_model.eval()

modified_AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (dropout1): Dropout(p=0.5, inplace=False)
  (relu1): ReLU(inplace=True)
  (dropout2): Dropout(p=0.5, inplace=False)
  (re

### Export to onnx

In [4]:
# Create some sample input in the shape this model expects
dummy_input = torch.randn(1, 3, 224, 224)

# It's optional to label the input and output layers
input_names = [ "actual_input_1" ] + [ "learned_%d" % ((i+10)%16) for i in range(16) ]
#input_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ]
output_names = [ "output1" ]

# Use the exporter from torch to convert to onnx 
# model (that has the weights and net arch)
torch.onnx.export(modified_model, dummy_input, "./models/modified_alexnet_pytorch.onnx", verbose=True, input_names=input_names, output_names=output_names)

Exported graph: graph(%actual_input_1 : Float(1, 3, 224, 224, strides=[150528, 50176, 224, 1], requires_grad=0, device=cpu),
      %learned_10 : Float(9216, 4096, strides=[4096, 1], requires_grad=1, device=cpu),
      %learned_11 : Float(4096, strides=[1], requires_grad=1, device=cpu),
      %learned_12 : Float(4096, 4096, strides=[4096, 1], requires_grad=1, device=cpu),
      %learned_13 : Float(4096, strides=[1], requires_grad=1, device=cpu),
      %learned_14 : Float(4096, 1000, strides=[1000, 1], requires_grad=1, device=cpu),
      %learned_15 : Float(1000, strides=[1], requires_grad=1, device=cpu),
      %learned_0 : Float(64, 3, 11, 11, strides=[363, 121, 11, 1], requires_grad=1, device=cpu),
      %learned_1 : Float(64, strides=[1], requires_grad=1, device=cpu),
      %learned_2 : Float(192, 64, 5, 5, strides=[1600, 25, 5, 1], requires_grad=1, device=cpu),
      %learned_3 : Float(192, strides=[1], requires_grad=1, device=cpu),
      %learned_4 : Float(384, 192, 3, 3, strides=[1

### Check with onnx

In [6]:
onnx_model = onnx.load("./models/modified_alexnet_pytorch.onnx", load_external_data=False)
onnx.checker.check_model(onnx_model)
onnx_session = ort.InferenceSession("./models/modified_alexnet_pytorch.onnx")