In [1]:
from imports import *
from utils import *
from model import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data loading
dataset = 'Google Speech Command (GSC)'

data_path = 'raw/speech_commands'
labels = ['yes', 'no', 'stop','on','off']

train_dataset, test_dataset = load_speech_command_datasets(data_path, labels)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Class 'yes' has 2377 audio files.
Class 'no' has 2375 audio files.
Class 'stop' has 2380 audio files.
Class 'on' has 2367 audio files.
Class 'off' has 2357 audio files.


In [5]:
# Model Definition

num_classes = 5

# Initialize the model with the given configuration and number of classes, and move it to the specified device
model = NET_32k_GSC(num_classes).to(device)

total_weights = count_weights(model)
print(f"The network has {total_weights} weights that need to be deployed.")

model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
model = torch.quantization.prepare_qat(model, inplace=True)
model = torch.quantization.convert(model, inplace=True)

The network has 2600 weights that need to be deployed.


In [6]:
# Load Pretrained Model

# Specify the filename of the saved model
model_filename = f"checkpoint/GSC-3560-97.13_model.t7"

# Load the checkpoint from the file, mapping the model to the specified device
checkpoint = torch.load(model_filename, map_location=device)

# Load the model state dictionary from the checkpoint
model.load_state_dict(checkpoint['model'])

print(model) # The model has been quantized to int8

NET_32k_GSC(
  (conv1): QuantizedConv2d(1, 8, kernel_size=(5, 5), stride=(1, 1), scale=0.027086596935987473, zero_point=64, padding=(2, 2))
  (bn1): QuantizedBatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation1): QuantizedHardswish()
  (conv2): QuantizedConv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), scale=0.12287382781505585, zero_point=80, padding=(1, 1))
  (bn2): QuantizedBatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation2): QuantizedHardswish()
  (conv3): QuantizedConv2d(8, 12, kernel_size=(3, 3), stride=(1, 1), scale=0.07499497383832932, zero_point=54, padding=(1, 1))
  (bn3): QuantizedBatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation3): QuantizedHardswish()
  (fc): QuantizedLinear(in_features=192, out_features=5, scale=0.36147260665893555, zero_point=68, qscheme=torch.per_channel_affine)
  (quant): Quantize(scale=tensor([0.0079]), zero_point=tensor([0]), 

In [4]:
# Test the model on the test dataset and store the accuracy
model.eval()
test_accuracy = test_model(model, test_loader)

# Extract the int8 weights from all quantized layers in the model and save them to a list
quantized_weights_list = print_quantized_weights(model)

print(f"The test accuracy after quantization is: {test_accuracy:.2f}%")

Extracting weights from layer: conv1
Weight Shape: (200,), Int8 Weights:
[ 127  107  -26  -60 -104   37  125  -17    6  -84  -33  -29   36  -20
   41  -91  -20   92   27   -2  -91  -20   29   40   42   58  -60 -104
 -128  -24   10  -25 -117  -98  -24  105   72  -29   24   -5   87  -50
  -70  -17   -2  107   90   -1  -26   96  -77   31   17  -37   36  -79
  -51   35   26   23  -44    0  -30  -27  -16   -9   45   72   71   33
   26  127   49   59   19    3  -29   11  -35  -44   86    5  -61  -39
  -33   15   -6  -27  -22   49  -38  -69   10   56   83 -128  -34   30
   12   19  -48  -22   26   28  127   25 -100   -2   39   14  -12  -20
   74   57  -95  -68   75   93  -98  -79  126  127  -40  -65 -113  -56
  -61    0  -66 -106    7   -2   47   43  -96  -41  -40   41   11  -19
   29   81  -18  -25  -45   69  124   77   17 -128   31  -66  -40   45
  -23  -48   20  -20   78   36  -16  -10  -39  -12   62   42  -33   51
  -30   85  -43  -67 -128  -98  -13  -85   -8   -1   65   30   -9   93
  12