In [1]:
import os
import sys
import platform
sys.path.append(os.path.join(os.getcwd().replace("notebooks/pruning_quantization", "")))

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from config import Config
from data.dataset import NetworkDataset, load_datasets
from model_config import MLP_Models, LSTM_Models, GRU_Models, CNN_models
import copy
import torch.ao.quantization as quant
import warnings
warnings.filterwarnings("ignore")

In [2]:
conf = Config()
load_model = CNN_models()
#load_model = MLP_Models()
model_conf = load_model.cnn_4
model = load_model.get_model(model_conf)
print(conf.device)

cuda


In [3]:
# data loader
X_train, y_train, X_val, y_val, X_test, y_test = load_datasets(conf.datasets, model_type=load_model.type)

# create train, val and test dataloaders
train_dataset = NetworkDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, conf.batch_size, shuffle=True)

val_dataset = NetworkDataset(X_val, y_val)
val_loader = DataLoader(val_dataset, conf.batch_size, shuffle=True)

test_dataset = NetworkDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, conf.batch_size)

train_dataset_no_aug = copy.deepcopy(train_dataset)
calibration_loader = DataLoader(train_dataset_no_aug, batch_size=conf.batch_size, shuffle=False)

In [4]:
def evaluate(model, dataloader, device="cpu"):
    #model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for x, y in dataloader:
            x = x.to(device)
            y = y.to(device)

            outputs = model(x)     # FP32 input
            preds = outputs.argmax(dim=1)

            correct += (preds == y).sum().item()
            total += y.size(0)

    return correct / total

In [5]:
train_metrics, train_loss, val_loss = model.train(train_loader, val_loader, conf.epochs)

Epoch: 1/10, Macro-F1 score: 0.59, Micro-F1 score: 0.77, Macro ROC AUC score: 0.97, Train loss: 0.409, Val loss: 0.229
Epoch: 2/10, Macro-F1 score: 0.65, Micro-F1 score: 0.80, Macro ROC AUC score: 0.97, Train loss: 0.223, Val loss: 0.202
Epoch: 3/10, Macro-F1 score: 0.68, Micro-F1 score: 0.81, Macro ROC AUC score: 0.97, Train loss: 0.199, Val loss: 0.183
Epoch: 4/10, Macro-F1 score: 0.69, Micro-F1 score: 0.81, Macro ROC AUC score: 0.97, Train loss: 0.186, Val loss: 0.178
Epoch: 5/10, Macro-F1 score: 0.73, Micro-F1 score: 0.89, Macro ROC AUC score: 0.98, Train loss: 0.154, Val loss: 0.127
Epoch: 6/10, Macro-F1 score: 0.75, Micro-F1 score: 0.90, Macro ROC AUC score: 0.98, Train loss: 0.123, Val loss: 0.119
Epoch: 7/10, Macro-F1 score: 0.75, Micro-F1 score: 0.90, Macro ROC AUC score: 0.98, Train loss: 0.114, Val loss: 0.119
Epoch: 8/10, Macro-F1 score: 0.76, Micro-F1 score: 0.90, Macro ROC AUC score: 0.99, Train loss: 0.106, Val loss: 0.107
Epoch: 9/10, Macro-F1 score: 0.76, Micro-F1 scor

In [6]:
fp32_model = model.model
fp32_model.cpu()
fp32_model.eval()

CNN(
  (ln1): L2ByteNorm()
  (conv): ModuleList(
    (0): Sequential(
      (0): Conv1d(1, 32, kernel_size=(23,), stride=(1,))
      (1): LeakyReLU(negative_slope=0.01)
      (2): Dropout(p=0.2, inplace=False)
    )
    (1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Sequential(
      (0): Conv1d(32, 64, kernel_size=(19,), stride=(1,))
      (1): LeakyReLU(negative_slope=0.01)
      (2): Dropout(p=0.2, inplace=False)
    )
    (3): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Conv1d(64, 128, kernel_size=(15,), stride=(1,))
      (1): LeakyReLU(negative_slope=0.01)
      (2): Dropout(p=0.2, inplace=False)
    )
    (5): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Sequential(
      (0): Conv1d(128, 192, kernel_size=(11,), stride=(1,))
      (1): LeakyReLU(negative_slope=0.01)
      (2): Dropout(p=0.2, inplace=False)
    )
    (7): MaxPool1d(kernel

In [7]:
from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx
torch.backends.quantized.engine = 'fbgemm'

qconfig = quant.get_default_qconfig("fbgemm")  # x86
# qconfig = quant.get_default_qconfig("qnnpack")  # ARM

# 1. Copy FP32 model
model_to_quantize = copy.deepcopy(fp32_model)
model_to_quantize.cpu()
model_to_quantize.eval()

# .set_module_name("ln2", None)     # LayerNorm FP32
qconfig_mapping = (
    quant.QConfigMapping()
    .set_global(qconfig)
    .set_module_name("ln1", None)     # LayerNorm FP32
    .set_module_name("ln2", None)     # LayerNorm FP32
    .set_module_name("ln3", None)     # LayerNorm FP32
    .set_module_name("output", None)  # Linear FP32
)

example_input = torch.randn(1, 1, 513)

prepared = prepare_fx(
    model_to_quantize,
    qconfig_mapping,
    example_inputs=(example_input,)
)

with torch.no_grad():
    for i, (x, _) in enumerate(calibration_loader):
        prepared(x)
        if i > 20:
            break

int8_model = convert_fx(prepared)

In [9]:
print(int8_model)
#print(int8_model.fc1.weight().dtype)

GraphModule(
  (conv): Module(
    (0): Module(
      (0): QuantizedConv1d(1, 32, kernel_size=(23,), stride=(1,), scale=1.2880123853683472, zero_point=66)
      (1): QuantizedLeakyReLU(negative_slope=0.01)
      (2): QuantizedDropout(p=0.2, inplace=False)
    )
    (1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Module(
      (0): QuantizedConv1d(32, 64, kernel_size=(19,), stride=(1,), scale=0.6548764705657959, zero_point=69)
      (1): QuantizedLeakyReLU(negative_slope=0.01)
      (2): QuantizedDropout(p=0.2, inplace=False)
    )
    (3): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Module(
      (0): QuantizedConv1d(64, 128, kernel_size=(15,), stride=(1,), scale=0.3296360373497009, zero_point=78)
      (1): QuantizedLeakyReLU(negative_slope=0.01)
      (2): QuantizedDropout(p=0.2, inplace=False)
    )
    (5): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Module(
    

In [10]:
#Verify Quantization
fp32_acc = evaluate(fp32_model, test_loader)
int8_acc = evaluate(int8_model, test_loader)

print(f"FP32 accuracy: {fp32_acc:.4f}")
print(f"INT8 accuracy: {int8_acc:.4f}")
print(f"Drop: {fp32_acc - int8_acc:.4f}")

FP32 accuracy: 0.9069
INT8 accuracy: 0.8931
Drop: 0.0138


In [11]:
data, labels = next(iter(test_loader))
q_pred = int8_model(data).argmax(dim=1)
pred = fp32_model(data).argmax(dim=1)

print(labels[:10])
print(q_pred[:10])
print(pred[:10])

tensor([ 0,  0, 14,  0, 12, 10,  5, 15, 10,  5])
tensor([ 0,  0, 14,  0, 12, 10,  5, 15, 10,  5])
tensor([ 0,  0, 14,  0, 12, 10,  5, 15, 10,  5])


In [13]:
"""
int8_model = quant.quantize_dynamic(
    fp32_model,
    {nn.Linear},
    dtype=torch.qint8
)

int8_model.fc2 = fp32_model.fc2  # overwrite last layer with FP32
"""

'\nint8_model = quant.quantize_dynamic(\n    fp32_model,\n    {nn.Linear},\n    dtype=torch.qint8\n)\n\nint8_model.fc2 = fp32_model.fc2  # overwrite last layer with FP32\n'