In [4]:
from torch.utils.data import Dataset
import pandas as pd
import torchaudio
import os
from torch.utils.data import DataLoader
import torch
import numpy as np
import math

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt
import IPython.display as ipd
from tqdm.notebook import tqdm
import torchvision.transforms as T

from os.path import abspath, dirname, join
import torchvision.models.quantization

import copy
import collections
import torch.quantization._numeric_suite as ns

import torch.nn.utils.prune as prune

ModuleNotFoundError: No module named 'pandas'

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def print_size_of_model(model):
    torch.save(model.state_dict(), ".\\temp.pt")
    print("%.2f MB" %(os.path.getsize(".\\temp.pt")/1e6))
    os.remove('.\\temp.pt')

def number_of_correct(pred, target):
    # count number of correct predictions
    return pred.squeeze().eq(target).sum().item()


def get_likely_index(tensor):
    # find most likely label index for each element in the batch
    return tensor.argmax(dim=-1)

def compute_error(x, y):
    Ps = torch.norm(x)
    Pn = torch.norm(x-y)
    return 20*torch.log10(Ps/Pn)


: 

In [None]:
ANNOTATIONS_FILE = "D:\\units\\deep learning\\final_project\\data\\annotations_file_v2.csv"
AUDIO_DIR = "D:\\units\\deep learning\\final_project\\data\\audio"
SAMPLE_RATE = 32000
DEVICE = 'cpu'

: 

In [None]:
class BabiesSoundDataset(Dataset):

    def __init__(self, annotations_file, audio_dir, target_sample_rate, folder_list):
        self.annotations = pd.read_csv(annotations_file)
        self.audio_dir = audio_dir
        #self.transformation = transformation
        self.target_sample_rate = target_sample_rate/10
        #initialize lists to hold file names, labels, and folder numbers
        self.file_names = []
        self.labels = []
        
        self.folders = []

        for i in range(0,len(self.annotations)):
            if self.annotations.iloc[i, 1] in folder_list:
                self.file_names.append(self.annotations.iloc[i, 0])
                self.labels.append(self.annotations.iloc[i, 2])
                self.folders.append(self.annotations.iloc[i, 1])

        self.folder_list = folder_list

    #len(usd)
    def __len__(self):
        return len(self.file_names)



    # a_list[1] --> a__list.__getitem__(1)
    def __getitem__(self, index):
        audio_sample_path = self._get_audio_sample_path(index)

        #label associated with this sample path
        label = self._get_audio_sample_label(index)
        signal, sample_rate = torchaudio.load(audio_sample_path) # not all files have the same sample rate
        # signal -> (num_channels, samples) -> (2, 16000) -> (1,1600)
        signal = self._resample_if_necessary(signal, sample_rate)
        signal = self._mix_down_if_necessary(signal)
        
        
        return signal, label

    def _get_audio_sample_path(self, index):
        fold = f"fold{self.folders[index]}"
        path = os.path.join(self.audio_dir, fold, self.file_names[index])
        return path

    def _get_audio_sample_label(self, index):
        label = self.labels[index]
        label = np.asarray(label)   
        return torch.from_numpy(label.astype('long'))

    def _resample_if_necessary(self, signal, sr):
        if sr > self.target_sample_rate:
            resampler = torchaudio.transforms.Resample(sr, self.target_sample_rate)
            signal = resampler(signal)
        if signal.shape != torch.Size((1, 32000)):
            signal = nn.ConstantPad1d((0, 32000 - signal.shape[1]), 0)(signal)

        return signal

    def _mix_down_if_necessary(self, signal):
        if signal.shape[0] > 1:
            signal = torch.mean(signal, dim=0, keepdim=True)
        return signal

: 

In [None]:
train_set = BabiesSoundDataset(ANNOTATIONS_FILE, AUDIO_DIR,  SAMPLE_RATE, range(1,5))
test_set = BabiesSoundDataset(ANNOTATIONS_FILE, AUDIO_DIR, SAMPLE_RATE, [6])

print("Train set size: " + str(len(train_set)))
print("Test set size: " + str(len(test_set)))

: 

In [None]:
fold = f"fold1"
path = os.path.join(AUDIO_DIR, fold, '1TB3-XSom_A.wav')

signal, sample_rate = torchaudio.load(path) # not all files have the same sample rate
print(f"Shape of waveform: {signal.size()} audio: 1TB3-XSom_A.wav")

plt.plot(signal.t().numpy())


: 

In [None]:
waveform, label = train_set[0]
print(f"Shape of waveform: {waveform.size()} audio: 1TB3-XSom_A.wav")
plt.plot(waveform.t().numpy())

: 

In [None]:
if DEVICE == "cuda":
    num_workers = 1
    pin_memory = True
else:
    num_workers = 0
    pin_memory = False

: 

In [None]:
def collate_fn(batch):

    # A data tuple has the form:
    # waveform, sample_rate, label, speaker_id, utterance_number

    tensors, targets = [], []

    # Gather in lists, and encode labels as indices
    for waveform,label in batch:
        tensors += [waveform]
        targets += [torch.tensor(label).type(torch.LongTensor)]

    # Group the list of tensors into a batched tensor
    targets = torch.stack(targets)

    return tensors, targets

: 

In [None]:
BATCH_SIZE = 128

train_loader = torch.utils.data.DataLoader(
        train_set,
        batch_size=BATCH_SIZE,
        shuffle=True,
        collate_fn=collate_fn,
        num_workers=num_workers,
        pin_memory=pin_memory,
    )
test_loader = torch.utils.data.DataLoader(
        test_set,
        batch_size=BATCH_SIZE,
        shuffle=False,
        drop_last=False,
        collate_fn=collate_fn,
        num_workers=num_workers,
        pin_memory=pin_memory,
    )

: 

In [None]:


class MR(nn.Module):
    
    def __init__(self, n_input=1, n_output=35, stride=16, n_channel=32):
            super().__init__()
            self.conv1 = nn.Conv1d(n_input, n_channel, kernel_size=13, stride=1)
            self.bn1 = nn.BatchNorm1d(n_channel)
            self.pool1 = nn.MaxPool1d(2)
            self.conv2 = nn.Conv1d(n_channel, 2 * n_channel, kernel_size=13, stride=1)
            self.bn2 = nn.BatchNorm1d(2 *n_channel)
            self.conv3 = nn.Conv1d(2*n_channel, 4 * n_channel, kernel_size=15, stride=3)
            self.bn3 = nn.BatchNorm1d(4 * n_channel)
            self.conv4 = nn.Conv1d(4 * n_channel, 4 * n_channel, kernel_size=7, stride=5)
            self.bn4 = nn.BatchNorm1d(4 * n_channel)
            self.conv5 = nn.Conv1d(4 * n_channel, 4 * n_channel, kernel_size=11, stride=8)
            self.bn5 = nn.BatchNorm1d(4 * n_channel)

            # Input (1 × M × N)
            # 3 × Conv (1~3) (15, 1) (1, 1) (1, 1) (1, 1) 32
            # 3 × Conv (4~6) (15, 1) (1, 1) (1, 1) (2, 1) 64

            self.fc1 = nn.Linear(4 * n_channel, n_output)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(self.bn1(x))
        x = self.pool1(x)
        x = self.conv2(x)
        x = F.relu(self.bn2(x))
        x = self.conv3(x)
        x = F.relu(self.bn3(x))
        x = self.conv4(x)
        x = F.relu(self.bn4(x))
        x = self.conv5(x)
        x = F.relu(self.bn5(x))
        x = F.avg_pool1d(x, x.shape[-1])
        x = x.permute(0, 2, 1)
        x = self.fc1(x)
        return F.log_softmax(x, dim=2)

model = MR(1,2)
model.to(DEVICE)
print(model)

n = count_parameters(model)
print("Number of parameters: %s" % n)

: 

In [None]:
class M5(nn.Module):

  

    def __init__(self, n_input=1, n_output=35, stride=16, n_channel=32):
        super().__init__()
        self.conv1 = nn.Conv1d(n_input, n_channel, kernel_size=25, stride=5)
        self.bn1 = nn.BatchNorm1d(n_channel)
        self.relu1 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool1d(4)
        self.conv2 = nn.Conv1d(n_channel, n_channel, kernel_size=3)
        self.bn2 = nn.BatchNorm1d(n_channel)
        self.relu2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool1d(4)
        self.conv3 = nn.Conv1d(n_channel, 2 * n_channel, kernel_size=3)
        self.bn3 = nn.BatchNorm1d(2 * n_channel)
        self.relu3 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool1d(4)
        self.conv4 = nn.Conv1d(2 * n_channel, 2 * n_channel, kernel_size=3)
        self.bn4 = nn.BatchNorm1d(2 * n_channel)
        self.relu4 = nn.ReLU(inplace=True)
        self.pool4 = nn.MaxPool1d(4)
        #self.fc1 = nn.Linear(2 * n_channel, n_output)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        # self.lstm = nn.LSTM(2*n_channel, 64)

        # The linear layer that maps from hidden state space to tag space
        self.fc1 = nn.Linear(2 * n_channel, n_output)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(self.bn1(x))
        x = self.pool1(x)
        # pool_size = int(x.size(2))
        # x = F.max_pool1d(x, pool_size).squeeze()
        x = self.conv2(x)
        x = self.relu2(self.bn2(x))
        x = self.pool2(x)
        # pool_size = int(x.size(2))
        # x = F.max_pool1d(x, pool_size).squeeze()
        x = self.conv3(x)
        x = self.relu3(self.bn3(x))
        x = self.pool3(x)
        # pool_size = int(x.size(2))
        # x = F.max_pool1d(x, pool_size).squeeze()
        x = self.conv4(x)
        x = self.relu4(self.bn4(x))
        x = self.pool4(x)
        #pool_size = int(x.size(2))  
        x = F.avg_pool1d(x, x.shape[-1])
        # x = F.avg_pool1d(x, pool_size).squeeze()
        x = x.permute(0, 2, 1)   
        # x = self.lstm(x)
        #x = self.fc1(x[0])
        x = self.fc1(x)
        return  F.log_softmax(x, dim=2) # x only for lstm

: 

In [None]:
model = M5(1,2)
model.to(DEVICE)
print(model)

n = count_parameters(model)
print("Number of parameters: %s" % n)

: 

In [None]:
def evaluate_model(model, test_loader, device, criterion=None):

    model.eval()
    model.to(device)

    running_loss = 0
    running_corrects = 0

    for inputs, labels in test_loader:

        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        if criterion is not None:
            loss = criterion(outputs, labels).item()
        else:
            loss = 0

        # statistics
        running_loss += loss * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    eval_loss = running_loss / len(test_loader.dataset)
    eval_accuracy = running_corrects / len(test_loader.dataset)

    return eval_loss, eval_accuracy

: 

In [None]:
def calibrate_model(model, loader, device=torch.device("cpu:0")):

    model.to(device)
    model.eval()

    for inputs, labels in loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        _ = model(inputs)

: 

In [None]:
def create_model(num_classes=10):

    # The number of channels in ResNet18 is divisible by 8.
    # This is required for fast GEMM integer matrix multiplication.
    # model = torchvision.models.resnet18(pretrained=False)
    model = M5(n_output=2)

    # We would use the pretrained ResNet18 as a feature extractor.
    # for param in model.parameters():
    #     param.requires_grad = False
    
    # Modify the last FC layer
    # num_features = model.fc.in_features
    # model.fc = nn.Linear(num_features, 10)

    return model

: 

In [None]:
def train(model, epoch, log_interval, device, pbar_update):

    model.train()
    losses = []
    batch_idx = 0

    for data, target in train_loader:
        
        data = torch.stack(data)

        data = data.to(device)
        target = target.to(device)

        output = model(data)

        # negative log-likelihood for a tensor of size (batch x 1 x n_output)
        loss = F.nll_loss(output.squeeze(), target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print training stats
        if batch_idx % log_interval == 0:
            print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}")

        # update progress bar
        pbar.update(pbar_update)
        # record loss
        
        losses.append(loss.item())

        batch_idx += 1

: 

In [None]:
def test(model, epoch, device, pbar_update):
    model.eval()
    correct = 0

    for data, target in test_loader:
        data = torch.stack(data)

        data = data.to(device)
        target = target.to(device)

        output = model(data)

        pred = get_likely_index(output)
        correct += number_of_correct(pred, target)

        # update progress bar
        pbar.update(pbar_update)

    print(f"\nTest Epoch: {epoch}\tAccuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n")



: 

In [None]:
LOG_INTERVAL = 20
N_EPOCH = 50

pbar_update = 1 / (len(train_loader) + len(test_loader))

optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)  # reduce the learning after 20 epochs by a factor of 10

with tqdm(total=N_EPOCH) as pbar:
    for epoch in range(1, N_EPOCH + 1):
        train(model, epoch, LOG_INTERVAL,DEVICE, pbar_update)
        test(model, epoch, DEVICE, pbar_update)
        scheduler.step()

: 

In [None]:
    # # save the model
    # path_tosave = join('saved_models', 'model_for_onnx.pth')
    # #print(f'path to save: {path_tosave}')
    # with open('D:\\units\\deep learning\\final_project\\data\\saved_models\\model_ASOOB7_BEST_jit_NOlstM.pth', 'wb') as f:
    #     torch.save(model.state_dict(), f)

: 

In [None]:
state_dict = torch.load('D:\\units\\deep learning\\final_project\\data\\saved_models\\model_ASOOB7_BEST_jit_NOlstM.pth')
print(state_dict.keys())
model.load_state_dict(state_dict)


: 

In [None]:
print("Model's state dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

: 

In [None]:
pruned_model = model

parameters_to_prune = (
    (pruned_model.conv1, 'weight'),
    (pruned_model.conv2, 'weight'),
    (pruned_model.conv3, 'weight'),
    (pruned_model.conv4, 'weight'),
    (pruned_model.bn1, 'weight'),
    (pruned_model.bn2, 'weight'),
    (pruned_model.bn3, 'weight'),
    (pruned_model.bn4, 'weight'),
    (pruned_model.lstm, 'weight_ih_l0'),
    (pruned_model.lstm, 'weight_hh_l0'),
    (pruned_model.fc1, 'weight'),
)

prune.global_unstructured(
    parameters_to_prune,
    pruning_method=prune.L1Unstructured,
    amount=0.9,
)


: 

In [None]:
prune.remove(pruned_model.conv1, 'weight')
prune.remove(pruned_model.conv2, 'weight')
prune.remove(pruned_model.conv3, 'weight')
prune.remove(pruned_model.conv4, 'weight')
prune.remove(pruned_model.bn1, 'weight')
prune.remove(pruned_model.bn2, 'weight')
prune.remove(pruned_model.bn3, 'weight')
prune.remove(pruned_model.bn4, 'weight')
prune.remove(pruned_model.lstm, 'weight_ih_l0')
prune.remove(pruned_model.lstm, 'weight_hh_l0')
prune.remove(pruned_model.fc1, 'weight')

: 

In [None]:
with tqdm(total=N_EPOCH) as pbar:
    for epoch in range(1, N_EPOCH + 1):
        train(pruned_model, epoch, LOG_INTERVAL,DEVICE, pbar_update)
        test(pruned_model, epoch, DEVICE, pbar_update)
        scheduler.step()

: 

In [None]:
print_size_of_model(model)
print_size_of_model(pruned_model)

: 

In [None]:
    # # save the model
    # path_tosave = join('saved_models', 'model_for_onnx.pth')
    # print(f'path to save: {path_tosave}')
    # with open(path_tosave, 'wb') as f:
    #     torch.save(model.state_dict(), f)

: 

In [None]:
print_size_of_model(model)

: 

In [None]:
class QuantizedModel(nn.Module):
    def __init__(self, model_fp32):
        super(QuantizedModel, self).__init__()
        self.m_weight = model_fp32.state_dict()
        self.weight = self.m_weight
        # QuantStub converts tensors from floating point to quantized.
        # This will only be used for inputs.
        self.quant = torch.quantization.QuantStub()
        # DeQuantStub converts tensors from quantized to floating point.
        # This will only be used for outputs.
        self.dequant = torch.quantization.DeQuantStub()
        # FP32 model
        self.model_fp32 = model_fp32

    def forward(self, x):
        x = self.quant(x)
        x = self.model_fp32(x)
        x = self.dequant(x)
        self.weight = self.dequant(self.weight)
        return x

: 

In [None]:
# I WAY
# Make a copy of the model for layer fusion
# model = create_model(2)
def calibrate_model(model, loader, device=torch.device("cpu:0")):

    model.to(device)
    model.eval()

    for inputs, labels in loader:
        inputs = torch.stack(inputs)

        inputs = inputs.to(device)
        labels = labels.to(device)
        _ = model(inputs)


model_copy = copy.deepcopy(model)
model_copy.eval()

model_copy = torch.quantization.fuse_modules(model_copy, [['conv1', 'bn1', 'relu1'],['conv2', 'bn2','relu2'], ['conv3', 'bn3', 'relu3'], ['conv4', 'bn4', 'relu4']])

# #quantize everything
qmodel = QuantizedModel(model_fp32=model_copy)

quantized_model = torch.quantization.quantize(model=qmodel, run_fn=calibrate_model, run_args=[train_loader], mapping=None, inplace=False)

: 

In [None]:
# Make a copy of the model for layer fusion
# model = create_model(2)

#OR II WAY

model_copy = copy.deepcopy(model)
model_copy.eval()

model_copy = torch.quantization.fuse_modules(model_copy, [['conv1', 'bn1', 'relu1'],['conv2', 'bn2','relu2'], ['conv3', 'bn3', 'relu3'], ['conv4', 'bn4', 'relu4']])

# #quantize everything
qmodel = QuantizedModel(model_fp32=model_copy)

# Select quantization schemes from 
    # https://pytorch.org/docs/stable/quantization-support.html

quantization_config = torch.quantization.get_default_qconfig("qnnpack")

qmodel.qconfig = quantization_config
model_copy.qconfig = quantization_config

torch.quantization.prepare(qmodel, inplace=False)
# collect calibration statistics

for batch, target in train_loader:
    qmodel(torch.stack(batch))

# get the quantized model
# converts operations from fp32 to int8 arithmetic
# simply divide the entire range of activations into 256 levels
qmodel = torch.quantization.convert(qmodel) 

qmodel.eval()

# state_dict_q = torch.load('data/saved_models/model_quantized_jit.pth')
# print(state_dict_q.keys())
# qmodel.load_state_dict(state_dict_q)

# # save the q model
# path_tosave = join('data', 'saved_models', 'model_quantized_jit.pth')
# print(f'path to save: {path_tosave}')
# with open(path_tosave, 'wb') as f:
#     torch.save(qmodel.state_dict(), f)

: 

In [None]:
# 3rd WAY

model_copy2 = model
model_copy2.qconfig = torch.quantization.default_qconfig
mp = torch.quantization.prepare(model_copy2)
for batch, target in train_loader:
    mp(torch.stack(batch))
mq = torch.quantization.convert(mp)

: 

In [None]:
mqj = torch.jit.script(mq)

(wt,bias) = mqj.conv1._weight_bias()
wt

: 

In [None]:
import pprint

# print('before', mqj.conv1.state_dict())
# w, b = mqj.conv1._weight_bias()
# new_w = torch.quantize_per_tensor(torch.randn(32,1,25), 0.1, 0, torch.qint8)
# mqj.conv1.set_weight_bias(new_w, b)
# print('after', mqj.conv1.state_dict())

print("Old values to conv1: ", w)
print("Set new values to conv1: ", new_w)
    

: 

In [None]:
qmodule = qmodel.model_fp32.conv1
print(list(qmodule.named_parameters()))

: 

In [None]:
print_size_of_model(model)
print_size_of_model(qmodel)

print(f'Params count of model for q prep : {count_parameters(model)}')
print(f'Params saved of q model: {count_parameters(qmodel)}')

with tqdm(total=N_EPOCH) as pbar:
    for epoch in range(1, N_EPOCH + 1):
        train(quantized_model, epoch, LOG_INTERVAL,DEVICE, pbar_update)
        test(quantized_model, epoch, DEVICE, pbar_update)
        scheduler.step()


: 

In [None]:
state_dict = torch.load('saved_models/model_for_onnx.pth')
print(state_dict.keys())
model.load_state_dict(state_dict)

# set the model to inference mode
model.eval()

: 

In [None]:
import torch

x = torch.randn(128, 1, 16000, requires_grad=True)
x.shape

: 

In [None]:
x = torch.randn(128, 1, 16000, requires_grad=True)
  
torch_out = model(x)

# Export the model
torch.onnx.export(model,               # model being run
                  x,                         # model input (or a tuple for multiple inputs)
                  "asoob7.onnx",   # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=10,          # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input'],   # the model's input names
                  output_names = ['output'], # the model's output names
                  dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
                                'output' : {0 : 'batch_size'}})

: 