In [1]:
import sys
import copy

import torch
from torch import nn, optim
from torch.utils import data
from torchvision import datasets, transforms

In [2]:
# sys.path.append("/home/matthias/Documents/EmbeddedAI/deep-microcompression/")
sys.path.append("../../")

from development import (
    Sequential,
    BatchNorm2d,
    Conv2d,
    Linear,
    ReLU,
    MaxPool2d,
    Flatten
)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
mobilenetv1_file = f"mobilenetv1_state_dict_{DEVICE}.pth"

LUCKY_NUMBER = 25
torch.manual_seed(LUCKY_NUMBER)
torch.random.manual_seed(LUCKY_NUMBER)
torch.cuda.manual_seed(LUCKY_NUMBER)


In [4]:
DEVICE

'cuda'

In [5]:
data_transform = transforms.Compose([
    # transforms.RandomCrop((24, 24)),
    # transforms.Resize((32, 32)),
    transforms.ToTensor(),
])

cifar10_train_dataset = datasets.CIFAR10("./datasets", train=True, download=True, transform=data_transform)
cifar10_test_dataset = datasets.CIFAR10("./datasets", train=False, download=True, transform=data_transform)

cifar10_train_loader = data.DataLoader(cifar10_train_dataset, batch_size=32, shuffle=True)
cifar10_test_loader = data.DataLoader(cifar10_test_dataset, batch_size=32)

cifar100_train_dataset = datasets.CIFAR100("./datasets", train=True, download=True, transform=data_transform)
cifar100_test_dataset = datasets.CIFAR100("./datasets", train=False, download=True, transform=data_transform)

cifar100_train_loader = data.DataLoader(cifar100_train_dataset, batch_size=32, shuffle=True)
cifar100_test_loader = data.DataLoader(cifar100_test_dataset, batch_size=32)


In [6]:
def ConvBatchReLU(
        in_channels:int,
        out_channels:int,
        kernel_size:int,
        stride:int = 1,
        padding:int = 0,
        groups:int = 1,
):
    return (Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, groups=groups, padding=padding),
            BatchNorm2d(num_features=out_channels),
            ReLU())

In [7]:
def DeepWiseSeperableConv2d(
        in_channels:int,
        out_channels:int,
        kernel_size:int,
        stride:int,
        padding:int,
):
    return (
        *ConvBatchReLU(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, stride=stride, padding=padding,  groups=in_channels),
        *ConvBatchReLU(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0, groups=1)
    )

In [8]:
mobilenetv1_model = Sequential(
    *ConvBatchReLU(in_channels=3, out_channels=32, kernel_size=3, stride=2, padding=0),
    *ConvBatchReLU(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=0),
    *DeepWiseSeperableConv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=0),
    *DeepWiseSeperableConv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=0),
    *DeepWiseSeperableConv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=0),
    *DeepWiseSeperableConv2d(in_channels=64, out_channels=16, kernel_size=3, stride=1, padding=0),
    Flatten(),
    Linear(in_features= 16*5*5, out_features=10)
).to(DEVICE)

top1_acc_fun = lambda y_pred, y_true: (y_pred.argmax(dim=1) == y_true).sum().item()
# top5_acc_fun = lambda y_pred, y_true: (y_true in (y_pred.topk(dim=1))).sum().item()

In [9]:
try:
    # raise RuntimeError
    mobilenetv1_model.load_state_dict(torch.load(mobilenetv1_file, weights_only=True))
    
except (RuntimeError, FileNotFoundError, RuntimeError) as e:
    
    criterion_fun = nn.CrossEntropyLoss()
    optimizion_fun = optim.Adam(mobilenetv1_model.parameters(), lr=1.e-3)
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2, factor=.1)

    mobilenetv1_model.fit(
        cifar10_train_loader, 1, 
        criterion_fun, optimizion_fun, lr_scheduler,
        validation_dataloader=cifar10_test_loader, 
        metrics={"top1_acc" : top1_acc_fun},
        device=DEVICE,
    )
    torch.save(mobilenetv1_model.state_dict(), mobilenetv1_file)
    
mobilenetv1_model.evaluate(cifar10_test_loader, top1_acc_fun, device=DEVICE)

100%|██████████| 313/313 [00:03<00:00, 93.86it/s] 


0.4692

In [10]:
mobilenetv1_model.convert_to_c(var_name="mobilenetv1_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")
mobilenetv1_model.to(DEVICE)
mobilenetv1_model.test(DEVICE)

tensor([[-1.7732, -2.4490,  0.4521,  0.2767,  0.4370,  0.2624, -1.9979,  1.0788,
         -3.4145, -1.5809]], device='cuda:0')

In [11]:
mobilenetv1_model.cpu()

Sequential(
  (conv2d_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2))
  (batchnorm2d_0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_0): ReLU()
  (conv2d_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (batchnorm2d_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_1): ReLU()
  (conv2d_2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), groups=64)
  (batchnorm2d_2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_2): ReLU()
  (conv2d_3): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
  (batchnorm2d_3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_3): ReLU()
  (conv2d_4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), groups=64)
  (batchnorm2d_4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_4): ReLU()
  (conv2d_5): Conv2d(64, 64, kernel_size=(1, 1), stride=(

## Prunning

In [12]:
sparsity = .34

mobilenetv1_model.cpu()
mobilenetv1_mcu_model = mobilenetv1_model.prune_channel(sparsity)

mobilenetv1_mcu_model.to(DEVICE)
print(f"Pruned with {sparsity}, acc = {mobilenetv1_mcu_model.evaluate(cifar10_test_loader, top1_acc_fun, device=DEVICE)}")

mobilenetv1_mcu_model.cpu()
mobilenetv1_mcu_model.convert_to_c(var_name="mobilenetv1_mcu_model", src_dir="./HP HP Pavilion Laptop 15-cs3xxx/src/", include_dir="./HP HP Pavilion Laptop 15-cs3xxx/include/")

mobilenetv1_mcu_model.to(DEVICE)
mobilenetv1_mcu_model.test(DEVICE)

100%|██████████| 313/313 [00:03<00:00, 94.01it/s] 


Pruned with 0.34, acc = 0.1665


tensor([[-1.4526, -2.7972,  0.7867,  1.6661,  0.2931,  0.6367,  1.9935, -1.4532,
         -1.7374, -3.2327]], device='cuda:0')

In [13]:
mobilenetv1_mcu_model.cpu()

Sequential(
  (conv2d_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2))
  (batchnorm2d_0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_0): ReLU()
  (conv2d_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (batchnorm2d_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_1): ReLU()
  (conv2d_2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), groups=64)
  (batchnorm2d_2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_2): ReLU()
  (conv2d_3): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
  (batchnorm2d_3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_3): ReLU()
  (conv2d_4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), groups=64)
  (batchnorm2d_4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_4): ReLU()
  (conv2d_5): Conv2d(64, 64, kernel_size=(1, 1), stride=(

In [14]:
test_input = mobilenetv1_mcu_model.test_input.clone()
test_input.shape

torch.Size([1, 3, 32, 32])

In [15]:
torch.mul(test_input[0, 0, :3, :3], conv_0_weight[0, 0, :, :])

NameError: name 'conv_0_weight' is not defined

In [None]:
conv = mobilenetv1_mcu_model[0]
conv_0_weight, conv_01_bias = conv.get_compression_parameters()


prev  tensor([0, 1, 2])
current  tensor([0, 3])


In [None]:
    # *ConvBatchReLU(in_channels=3, out_channels=4, kernel_size=3, stride=5, padding=0),
nn.functional.conv2d(test_input, conv_0_weight, conv_01_bias, stride=5, padding=0, groups=1)

tensor([[[[-0.2533, -0.2824, -0.2574, -0.1810, -0.2239, -0.2670],
          [-0.3305, -0.3295, -0.3087, -0.2380, -0.2280, -0.2843],
          [-0.2637, -0.4784, -0.3115, -0.2665, -0.1900, -0.1550],
          [-0.3471, -0.3795, -0.5152, -0.2797, -0.4471, -0.4776],
          [-0.3192, -0.3125, -0.4427, -0.2451, -0.3010, -0.4832],
          [-0.3074, -0.3045, -0.3776, -0.1619, -0.2142, -0.3204]],

         [[-0.1799, -0.2380, -0.2670, -0.4844, -0.3505, -0.2095],
          [-0.5141, -0.5323, -0.5396, -0.1620, -0.4902, -0.4292],
          [-0.6067, -0.7609, -0.5807, -0.2428, -0.3661, -0.4772],
          [-0.6538, -0.6226, -0.8619, -0.3631, -0.3921, -0.7202],
          [-0.8195, -0.8020, -0.5469, -0.5109, -0.7914, -0.5722],
          [-0.7686, -0.7674, -0.6896, -0.3620, -0.5315, -0.4710]]]])

In [None]:

# criterion_fun = nn.CrossEntropyLoss()
# optimizion_fun = optim.Adam(mobilenetv1_mcu_model.parameters(), lr=1.e-3)
# lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizion_fun, mode="min", patience=2)

# mobilenetv1_mcu_model.fit(
#     cifar10_train_loader, 15, 
#     criterion_fun, optimizion_fun, lr_scheduler,
#     validation_dataloader=cifar10_test_loader, 
#     metrics={"top1_acc" : top1_acc_fun},
#     device=DEVICE,
# )