In [1]:
# ====== Section 1: Imports and Setup ======
# Standard library imports for data handling and machine learning.
import pickle
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# Imports from PyTorch for model definition.
import torch.nn as nn
import torch.optim as optim

# Imports for quantization and model inspection from NNDCT (Neural Network Distiller and Compiler Tools), a toolkit for optimizing models.
import pytorch_nndct
from pytorch_nndct import Inspector

# Set batch size for data loading.
batch_size = 1


[0;32m[VAIQ_NOTE]: Loading NNDCT kernels...[0m


No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'


In [2]:
# ====== Section 2: Data Loading and Preparation ======
# Load dataset from a Pickle file.
data = pd.read_pickle("RML2016.10a_dict.pkl", compression='infer')

# Extract specific modulation types and signal-to-noise ratios (SNR) data.
qpsk_2_data_all = data[('QPSK', 2)]
bpsk_2_data_all = data[('BPSK', 2)]

# Generate labels for the two types of modulation.
qpsk_labels = [1] * 1000  # QPSK labeled as 1.
bpsk_labels = [0] * 1000  # BPSK labeled as 0.

# Combine data and labels from both modulation types.
data_combined = np.concatenate((qpsk_2_data_all, bpsk_2_data_all), axis=0)
labels_combined = np.array(qpsk_labels + bpsk_labels, dtype=np.int64)

# Convert combined data and labels to PyTorch tensors.
data_combined = torch.from_numpy(data_combined).float()
labels_combined = torch.from_numpy(labels_combined).long()

# convert labels 2 NumPy array and then 2 PyTorch tensor
labels_combined = np.array(labels_combined)
labels_combined = torch.from_numpy(labels_combined)

# Split data into training and testing sets.
data_train, data_test, labels_train, labels_test = train_test_split(
    data_combined, labels_combined, test_size=0.2, random_state=42)

In [3]:
# ====== Section 3: Dataset and DataLoader Definition ======
# Define a custom dataset class for loading the data.
class MyDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
         # access a single data sample and label
        sample = self.data[idx]
        label = self.labels[idx]
    
        # Convert sample, min_vals, and max_vals to PyTorch tensors
        sample = torch.tensor(sample, dtype=torch.float32)
        min_vals = torch.tensor(sample.min(axis=1).values, dtype=torch.float32)
        max_vals = torch.tensor(sample.max(axis=1).values, dtype=torch.float32)
        
        #normalize
        epsilon = 1e-10
        normalized_sample = 2 * (sample - min_vals.unsqueeze(1)) / (max_vals.unsqueeze(1) - min_vals.unsqueeze(1) + epsilon) - 1
    
        return normalized_sample, label

# Initialize training and testing datasets and dataloaders.
train_dataset = MyDataset(data_train, labels_train)
test_dataset = MyDataset(data_test, labels_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [4]:
# ====== Section 4: Model Definition ======
# Define a convolutional neural network model.
class CNN2D(nn.Module):
    def __init__(self, num_classes):
        super(CNN2D, self).__init__()
        self.upsample = nn.Upsample(scale_factor=(1, 2), mode='bilinear', align_corners=False)
        self.conv1 = nn.Conv2d(2, 64, kernel_size=(1, 3), padding=(0, 1))
        self.relu1 = nn.ReLU()
        self.adaptive_pool1 = nn.AdaptiveAvgPool2d((1,64))
        self.conv2 = nn.Conv2d(64, 128, kernel_size=(1, 3), padding=(0, 1))
        self.relu2 = nn.ReLU()
        self.adaptive_pool2 = nn.AdaptiveAvgPool2d((1,32))
        self.adaptive_avg_pool2d = nn.AdaptiveAvgPool2d(output_size=(1, 1))
        self.fc1 = nn.Linear(128, 256)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(256, num_classes)
    
    def forward(self, x):
        x = x.unsqueeze(-2)
        x = self.upsample(x)
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.adaptive_pool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.adaptive_pool2(x)
        x = self.adaptive_avg_pool2d(x)
        x = x.reshape(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        return x

# Initialize the CNN model and print structure.
model = CNN2D(num_classes=2)
print(model)

CNN2D(
  (upsample): Upsample(scale_factor=(1.0, 2.0), mode=bilinear)
  (conv1): Conv2d(2, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
  (relu1): ReLU()
  (adaptive_pool1): AdaptiveAvgPool2d(output_size=(1, 64))
  (conv2): Conv2d(64, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
  (relu2): ReLU()
  (adaptive_pool2): AdaptiveAvgPool2d(output_size=(1, 32))
  (adaptive_avg_pool2d): AdaptiveAvgPool2d(output_size=(1, 1))
  (fc1): Linear(in_features=128, out_features=256, bias=True)
  (relu3): ReLU()
  (fc2): Linear(in_features=256, out_features=2, bias=True)
)


In [5]:
# ====== Section 5: Training and Evaluation Functions ======
# Define functions for training and evaluating the model.
def train(model, train_loader, num_epochs=3):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    for epoch in range(num_epochs):
        for data, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

def evaluate(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data, labels in test_loader:
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    return accuracy

# Train and evaluate the original model
train(model, train_loader)
evaluate(model, test_loader)

  sample = torch.tensor(sample, dtype=torch.float32)
  min_vals = torch.tensor(sample.min(axis=1).values, dtype=torch.float32)
  max_vals = torch.tensor(sample.max(axis=1).values, dtype=torch.float32)


Epoch [1/3], Loss: 0.0123
Epoch [2/3], Loss: 0.0014
Epoch [3/3], Loss: 0.0032
Test Accuracy: 93.25%


93.25

In [6]:
# ====== Section 6: Dataset and DataLoader Definition ======
# Define target and initialize the inspector
target = "DPUCVDX8G_ISA3_C32B6"
inspector = Inspector(target)

# Set the device based on CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize your model, assuming CNN2D is imported from your model library
inspector_model = CNN2D(2).to(device)

# Assuming 'train_loader' is already defined and is a PyTorch DataLoader
data_dummy, _ = next(iter(train_loader))

# Logging type and shape of the dummy data
print(f"Type: {type(data_dummy)}, Shape: {data_dummy.shape}")

# Create a dummy input for inspection and log its shape
dummy_input = torch.randn(1, 2, 128, device=device)
print(dummy_input.shape)

# Inspect the model using a clone of the dummy data, detached with gradients enabled, and additional parameters
inspector.inspect(inspector_model, data_dummy.clone().detach().requires_grad_(True).to(device), device=device, output_dir="inspect", image_format=None)


[0;32m[VAIQ_NOTE]: Inspector is on.[0m
Type: <class 'torch.Tensor'>, Shape: torch.Size([1, 2, 128])
torch.Size([1, 2, 128])

[0;32m[VAIQ_NOTE]: =>Start to inspect model...[0m

[0;32m[VAIQ_NOTE]: =>Quant Module is in 'cpu'.[0m

[0;32m[VAIQ_NOTE]: =>Parsing CNN2D...[0m


  sample = torch.tensor(sample, dtype=torch.float32)
  min_vals = torch.tensor(sample.min(axis=1).values, dtype=torch.float32)
  max_vals = torch.tensor(sample.max(axis=1).values, dtype=torch.float32)



[0;32m[VAIQ_NOTE]: Start to trace and freeze model...[0m

[0;32m[VAIQ_NOTE]: The input model nndct_st_CNN2D_ed is torch.nn.Module.[0m

[0;32m[VAIQ_NOTE]: Finish tracing.[0m

[0;32m[VAIQ_NOTE]: Processing ops...[0m


██████████████████████████████████████████████████| 16/16 [00:00<00:00, 2066.80it/s, OpInfo: name = return_0, type = Return]                                                    


[0;32m[VAIQ_NOTE]: =>Doing weights equalization...[0m

[0;32m[VAIQ_NOTE]: =>Quantizable module is generated.(inspect/CNN2D.py)[0m

[0;33m[VAIQ_WARN]: CNN2D::507 is not tensor.[0m

[0;32m[VAIQ_NOTE]: Find subgraph for convlike_fix_18:
node name:CNN2D::CNN2D/Linear[fc1]/ret.19, op type:nndct_dense, output shape: [1, 256]
node name:CNN2D::CNN2D/ReLU[relu3]/ret.21, op type:nndct_relu, output shape: [1, 256]

[0m

[0;32m[VAIQ_NOTE]: Find subgraph for convlike_fix_18:
node name:CNN2D::CNN2D/Conv2d[conv2]/ret.11, op type:nndct_conv2d, output shape: [1, 1, 64, 128]
node name:CNN2D::CNN2D/ReLU[relu2]/ret.13, op type:nndct_relu, output shape: [1, 1, 64, 128]

[0m

[0;32m[VAIQ_NOTE]: Find subgraph for convlike_fix_18:
node name:CNN2D::CNN2D/Conv2d[conv1]/ret.7, op type:nndct_conv2d, output shape: [1, 1, 256, 64]
node name:CNN2D::CNN2D/ReLU[relu1]/ret.9, op type:nndct_relu, output shape: [1, 1, 256, 64]

[0m

[0;32m[VAIQ_NOTE]: Find subgraph for reshape_fix_1:
node name:CNN2D::CNN2D/


I20240402 10:20:05.754566 241249 compile_pass_manager.cpp:352] [UNILOG][INFO] Compile mode: dpu
I20240402 10:20:05.754592 241249 compile_pass_manager.cpp:353] [UNILOG][INFO] Debug mode: null
I20240402 10:20:05.754599 241249 compile_pass_manager.cpp:357] [UNILOG][INFO] Target architecture: DPUCVDX8G_ISA3_C32B6
I20240402 10:20:05.754668 241249 compile_pass_manager.cpp:465] [UNILOG][INFO] Graph name: nndct_dense_nndct_relu_HZqtrWK36sX50Feo, with op num: 9
I20240402 10:20:05.754673 241249 compile_pass_manager.cpp:478] [UNILOG][INFO] Begin to compile...
I20240402 10:20:05.759114 241249 compile_pass_manager.cpp:489] [UNILOG][INFO] Total device subgraph number 3, DPU subgraph number 1
I20240402 10:20:05.759132 241249 compile_pass_manager.cpp:504] [UNILOG][INFO] Compile done.
I20240402 10:20:05.761606 241249 compile_pass_manager.cpp:352] [UNILOG][INFO] Compile mode: dpu
I20240402 10:20:05.761615 241249 compile_pass_manager.cpp:353] [UNILOG][INFO] Debug mode: null
I20240402 10:20:05.761618 241

In [7]:
# ====== Section 7: Model Quantization for Deployment ======
def quant_calib_test(CALIB):
    
    # Perform model quantization.
    quantizer = pytorch_nndct.torch_quantizer("calib" if CALIB else "test", model, (dummy_input,))
    quant_model = quantizer.quant_model
    evaluate(quant_model, test_loader)


    # Export the quantized model or config file.
    if CALIB: 
        quantizer.export_quant_config()
    else:
        quantizer.export_xmodel(deploy_check=False)

quant_calib_test(CALIB=True) # config file will be created
quant_calib_test(CALIB=False) # xmodel will be created


[0;33m[VAIQ_WARN][QUANTIZER_TORCH_CUDA_UNAVAILABLE]: CUDA (HIP) is not available, change device to CPU[0m

[0;32m[VAIQ_NOTE]: OS and CPU information:
               system --- Linux
                 node --- seakn-ThinkPad-P16s-Gen-1
              release --- 6.5.0-26-generic
              version --- #26~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Tue Mar 12 10:22:43 UTC 2
              machine --- x86_64
            processor --- x86_64[0m

[0;32m[VAIQ_NOTE]: Tools version information:
                  GCC --- GCC 7.5.0
               python --- 3.8.6
              pytorch --- 1.13.1
        vai_q_pytorch --- 3.5.0+60df3f1+torch1.13.1[0m

[0;33m[VAIQ_WARN][QUANTIZER_TORCH_CUDA_UNAVAILABLE]: CUDA (HIP) is not available, change device to CPU.[0m

[0;32m[VAIQ_NOTE]: Quant config file is empty, use default quant configuration[0m

[0;32m[VAIQ_NOTE]: Quantization calibration process start up...[0m

[0;32m[VAIQ_NOTE]: =>Quant Module is in 'cpu'.[0m

[0;32m[VAIQ_NOTE]: =>Parsing CNN2

██████████████████████████████████████████████████| 16/16 [00:00<00:00, 2119.74it/s, OpInfo: name = return_0, type = Return]                                                    


[0;32m[VAIQ_NOTE]: =>Doing weights equalization...[0m

[0;32m[VAIQ_NOTE]: =>Quantizable module is generated.(quantize_result/CNN2D.py)[0m

[0;32m[VAIQ_NOTE]: =>Get module with quantization.[0m



  sample = torch.tensor(sample, dtype=torch.float32)
  min_vals = torch.tensor(sample.min(axis=1).values, dtype=torch.float32)
  max_vals = torch.tensor(sample.max(axis=1).values, dtype=torch.float32)
  bnfp[1] = stats.mode(data)[0][0]


Test Accuracy: 92.75%

[0;32m[VAIQ_NOTE]: =>Exporting quant config.(quantize_result/quant_info.json)[0m

[0;33m[VAIQ_WARN][QUANTIZER_TORCH_CUDA_UNAVAILABLE]: CUDA (HIP) is not available, change device to CPU[0m

[0;32m[VAIQ_NOTE]: OS and CPU information:
               system --- Linux
                 node --- seakn-ThinkPad-P16s-Gen-1
              release --- 6.5.0-26-generic
              version --- #26~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Tue Mar 12 10:22:43 UTC 2
              machine --- x86_64
            processor --- x86_64[0m

[0;32m[VAIQ_NOTE]: Tools version information:
                  GCC --- GCC 7.5.0
               python --- 3.8.6
              pytorch --- 1.13.1
        vai_q_pytorch --- 3.5.0+60df3f1+torch1.13.1[0m

[0;33m[VAIQ_WARN][QUANTIZER_TORCH_CUDA_UNAVAILABLE]: CUDA (HIP) is not available, change device to CPU.[0m

[0;32m[VAIQ_NOTE]: Quant config file is empty, use default quant configuration[0m

[0;32m[VAIQ_NOTE]: Quantization test process start

██████████████████████████████████████████████████| 16/16 [00:00<00:00, 2388.73it/s, OpInfo: name = return_0, type = Return]                                                    


[0;32m[VAIQ_NOTE]: =>Doing weights equalization...[0m

[0;32m[VAIQ_NOTE]: =>Quantizable module is generated.(quantize_result/CNN2D.py)[0m

[0;32m[VAIQ_NOTE]: =>Get module with quantization.[0m





Test Accuracy: 93.00%

[0;32m[VAIQ_NOTE]: =>Converting to xmodel ...[0m

[0;33m[VAIQ_WARN]: CNN2D::507 is not tensor.[0m

[0;32m[VAIQ_NOTE]: =>Successfully convert 'CNN2D' to xmodel.(quantize_result/CNN2D_int.xmodel)[0m
