<a href="https://colab.research.google.com/github/Hussien-Sayed/Audio-Classification-On-Edge-Devices/blob/main/Audio_Classifcation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Imports

In [8]:
#imports
import cv2
import tqdm
import random
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat
from pathlib import Path
from typing import Type
import torch
import torch.nn.functional as F
from torch import nn, optim
from torch import Tensor
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader, random_split
import librosa
import scipy.io.wavfile as wav
import scipy

def set_seed(seed):
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  np.random.seed(seed)
  random.seed(seed)
  torch.backends.cudnn.benchmark = False
  torch.backends.cudnn.deterministic = True
  g = torch.Generator()
  g.manual_seed(seed)
  return g

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Loading Data

In [7]:
!cp -r '/content/drive/MyDrive/zindi/Audio Classification/TechCabal Ewe Audio Files/' ./

!cp '/content/drive/MyDrive/zindi/Audio Classification/Train.csv' ./
!cp '/content/drive/MyDrive/zindi/Audio Classification/Test_1.csv' ./
!cp '/content/drive/MyDrive/zindi/Audio Classification/SampleSubmission_1.csv' ./

In [9]:
train_df = pd.read_csv('Train.csv')
test_df = pd.read_csv('Test_1.csv')

In [10]:
sub_df = pd.read_csv('SampleSubmission_1.csv')

In [15]:
train_df.head()

Unnamed: 0,id,audio_filepath,duration,class
0,id_6nazjwygdl,id_6nazjwygdl.wav,1.984,left
1,id_jpwl97yilf,id_jpwl97yilf.wav,2.027,down
2,id_9d131fb32p,id_9d131fb32p.wav,2.274,go
3,id_kat2oty4s0,id_kat2oty4s0.wav,2.176,down
4,id_g3hvae3jbe,id_g3hvae3jbe.wav,1.634,down


In [16]:
test_df.head()

Unnamed: 0,id,audio_filepath,duration
0,id_u5iqtgjzhx,id_u5iqtgjzhx.wav,2.358
1,id_l7ebzcfk5e,id_l7ebzcfk5e.wav,3.221
2,id_jbzci8uepl,id_jbzci8uepl.wav,2.94898
3,id_jzil0fw5vs,id_jzil0fw5vs.wav,4.331
4,id_o7mrvf5wj7,id_o7mrvf5wj7.wav,1.856


In [17]:
new_tests = glob.glob('/content/test_2/*')
new_tests_name = [t.split('/')[-1].split('.')[0] for t in new_tests]

In [20]:
# Listen to a sample
ix = 100
pth = train_df['audio_filepath'].iloc[ix]
import IPython.display as ipd
ipd.Audio(f'/content/TechCabal Ewe Audio Files/{pth}')


In [21]:
!mkdir soundfiles/

## Processing Data

In [22]:
def load_audio(file_path):
    sr, y = wav.read(file_path)
    if y.dtype == np.int16:
        y = y / 32768.0
    elif y.dtype == np.int32:
        y = y / 2147483648.0
    elif y.dtype == np.uint8:
        y = (y - 128) / 128.0
    if y.ndim == 2:
        y = y.mean(axis=1)
    return y

sound_files = glob.glob('./TechCabal Ewe Audio Files/*.wav')
for sound_file in tqdm.tqdm(sound_files):
      y = load_audio(sound_file)
      S = scipy.signal.spectrogram(y)[2]
      S = cv2.resize(S,(100,128))
      np.save(Path('soundfiles') / Path(sound_file).parts[-1].replace('.wav','.npy'),[S])
len(glob.glob('soundfiles/*'))


100%|██████████| 8280/8280 [01:01<00:00, 135.01it/s]


8280

In [23]:
class_mapper =        {'left':0,
                       'down':1,
                       'go':2,
                       'right':3,
                       'up':4,
                       'yes':5,
                       'no':6,
                       'stop':7,}

train_df['label'] = train_df['class'].map(class_mapper)

## Modelling

In [24]:
class BasicBlock(nn.Module):
    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        stride: int = 1,
        expansion: int = 1,
        downsample: nn.Module = None
    ) -> None:
        super(BasicBlock, self).__init__()

        self.expansion = expansion
        self.downsample = downsample
        self.conv1 = nn.Conv2d(
            in_channels,
            out_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False
        )
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)
        return  out



class ResNet(nn.Module):
    def __init__(
        self,
        img_channels: int,
        block: Type[BasicBlock],
        num_classes: int  = 1000
    ) -> None:
        super(ResNet, self).__init__()
        layers = [1, 1, 1]
        self.expansion = 1

        self.in_channels = 8
        self.conv1 = nn.Conv2d(
            in_channels=img_channels,
            out_channels=self.in_channels,
            kernel_size=7,
            stride=2,
            padding=3,
            bias=False
        )
        self.bn1 = nn.BatchNorm2d(self.in_channels)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 8, layers[0])
        self.layer2 = self._make_layer(block, 16, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 32, layers[2], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(32*self.expansion, num_classes)

    def _make_layer(
        self,
        block: Type[BasicBlock],
        out_channels: int,
        blocks: int,
        stride: int = 1
    ) -> nn.Sequential:
        downsample = None
        if stride != 1:

            downsample = nn.Sequential(
                nn.Conv2d(
                    self.in_channels,
                    out_channels*self.expansion,
                    kernel_size=1,
                    stride=stride,
                    bias=False
                ),
                nn.BatchNorm2d(out_channels * self.expansion),
            )
        layers = []
        layers.append(
            block(
                self.in_channels, out_channels, stride, self.expansion, downsample
            )
        )
        self.in_channels = out_channels * self.expansion

        for i in range(1, blocks):
            layers.append(block(
                self.in_channels,
                out_channels,
                expansion=self.expansion
            ))
        return nn.Sequential(*layers)

    def forward(self, x: Tensor) -> Tensor:
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x


In [25]:
class data_set_train(Dataset):
    def __init__(self,explore_mode = False,test=False):
        self.specs = train_df.audio_filepath.values
        self.labels = train_df.label.values
        self.explore_mode = explore_mode

    def __len__(self):
        return len(self.specs)

    def __getitem__(self, index):
        sound_file =  'soundfiles/' + self.specs[index].replace('.wav','.npy')
        spec = np.load(sound_file)
        label = self.labels[index]
        if self.explore_mode:
            return spec,label,sound_file
        else:
            return spec,label


In [26]:
model = ResNet(img_channels= 1,block=BasicBlock,num_classes = 8)

g = set_seed(0)

num_epochs = 60
lr = 0.001
weight_decay = 5e-4
batch_size = 16

if torch.cuda.is_available():
  model = model.cuda()
optimizer = optim.Adam(model.parameters(),lr=lr,weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience = 3,factor=0.1)

DataSet = data_set_train()
train_DataSet, val_DataSet = random_split(DataSet,[4000,1334])

train_loader = DataLoader(train_DataSet, batch_size=batch_size, shuffle=True, drop_last=True, generator=g)
val_loader = DataLoader(val_DataSet, batch_size=batch_size, shuffle=True, drop_last=False, generator=g)
test_len_data = val_DataSet.__len__()

loss_fn = nn.CrossEntropyLoss()

In [27]:
min_val_losses = np.inf

#training loop
train_loss_epochs = []
val_loss_epochs = []

for epoch in range(0,num_epochs):
  print('-'*10,f"Epoch :{epoch+1}/{num_epochs}",'-'*10)
  print('learning rate: ',optimizer.param_groups[0]["lr"])
  #train
  losses=[]
  model.train()
  for imgs,labels in tqdm.tqdm(train_loader):
    if torch.cuda.is_available():
      imgs = imgs.cuda()
      labels = labels.cuda()
    #forward
    preds = model(imgs.float())
    optimizer.zero_grad()
    loss = loss_fn(preds,labels)
    loss.backward()
    optimizer.step()

    losses.append(loss.detach().cpu())
  print('train loss: ',np.mean(losses))
  train_loss_epochs.append(np.mean(losses))

  #validation
  val_losses=[]
  model.eval()
  true_preds_tot = 0
  for imgs,labels in val_loader:
    if torch.cuda.is_available():
      imgs = imgs.cuda()
      labels = labels.cuda()
    preds = model(imgs.float())
    val_loss = loss_fn(preds,labels)
    val_losses.append(val_loss.detach().cpu())
    true_preds = (torch.max(preds,axis=1).indices == labels).sum()
    true_preds_tot += true_preds

  scheduler.step(np.mean(val_losses))
  print('validation loss: ',np.mean(val_losses))
  print('validation accuracy: ',100*(true_preds_tot.cpu().numpy()/test_len_data),'%')

  if np.mean(val_losses) < min_val_losses:
    min_val_losses = np.mean(val_losses)
    torch.save(model.state_dict(), 'best.pth')
  val_loss_epochs.append(np.mean(val_losses))


---------- Epoch :1/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:04<00:00, 56.37it/s] 


train loss:  1.8876666
validation loss:  1.6610978
validation accuracy:  55.62218890554723 %
---------- Epoch :2/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:01<00:00, 128.93it/s]


train loss:  1.3428762
validation loss:  1.076502
validation accuracy:  74.5127436281859 %
---------- Epoch :3/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:01<00:00, 129.43it/s]


train loss:  0.9326597
validation loss:  0.84317636
validation accuracy:  80.0599700149925 %
---------- Epoch :4/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:02<00:00, 111.25it/s]


train loss:  0.6915309
validation loss:  0.57602996
validation accuracy:  85.45727136431785 %
---------- Epoch :5/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:02<00:00, 87.48it/s]


train loss:  0.5335353
validation loss:  0.47590396
validation accuracy:  86.65667166416792 %
---------- Epoch :6/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:02<00:00, 84.14it/s] 


train loss:  0.45030499
validation loss:  0.43901858
validation accuracy:  88.68065967016491 %
---------- Epoch :7/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:01<00:00, 127.74it/s]


train loss:  0.38739386
validation loss:  0.40072477
validation accuracy:  89.73013493253373 %
---------- Epoch :8/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:01<00:00, 130.02it/s]


train loss:  0.3516932
validation loss:  0.3428623
validation accuracy:  90.85457271364318 %
---------- Epoch :9/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:01<00:00, 130.12it/s]


train loss:  0.28436875
validation loss:  0.33537892
validation accuracy:  90.62968515742129 %
---------- Epoch :10/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:02<00:00, 124.97it/s]


train loss:  0.24809492
validation loss:  0.29398662
validation accuracy:  92.35382308845578 %
---------- Epoch :11/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:02<00:00, 100.92it/s]


train loss:  0.25115705
validation loss:  0.2566064
validation accuracy:  93.10344827586206 %
---------- Epoch :12/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:03<00:00, 72.66it/s]


train loss:  0.1919637
validation loss:  0.21669877
validation accuracy:  94.22788605697151 %
---------- Epoch :13/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:02<00:00, 122.30it/s]


train loss:  0.18199979
validation loss:  0.38464165
validation accuracy:  87.33133433283359 %
---------- Epoch :14/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:01<00:00, 131.41it/s]


train loss:  0.17639779
validation loss:  0.23039319
validation accuracy:  93.62818590704649 %
---------- Epoch :15/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:01<00:00, 131.16it/s]


train loss:  0.16197425
validation loss:  0.17859808
validation accuracy:  95.7271364317841 %
---------- Epoch :16/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:01<00:00, 129.22it/s]


train loss:  0.15718654
validation loss:  0.21015297
validation accuracy:  93.85307346326837 %
---------- Epoch :17/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:02<00:00, 107.75it/s]


train loss:  0.13580103
validation loss:  0.52385634
validation accuracy:  81.85907046476761 %
---------- Epoch :18/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:02<00:00, 84.44it/s]


train loss:  0.13369127
validation loss:  0.11577576
validation accuracy:  97.82608695652173 %
---------- Epoch :19/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:02<00:00, 90.04it/s] 


train loss:  0.112827286
validation loss:  0.26484522
validation accuracy:  91.60419790104947 %
---------- Epoch :20/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:01<00:00, 130.29it/s]


train loss:  0.10633287
validation loss:  0.12340923
validation accuracy:  97.52623688155923 %
---------- Epoch :21/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:01<00:00, 126.28it/s]


train loss:  0.09318439
validation loss:  0.18734927
validation accuracy:  95.57721139430285 %
---------- Epoch :22/60 ----------
learning rate:  0.001


100%|██████████| 250/250 [00:01<00:00, 129.41it/s]


train loss:  0.13999997
validation loss:  0.1376246
validation accuracy:  97.15142428785607 %
---------- Epoch :23/60 ----------
learning rate:  0.0001


100%|██████████| 250/250 [00:02<00:00, 123.17it/s]


train loss:  0.074972354
validation loss:  0.09088162
validation accuracy:  98.80059970014993 %
---------- Epoch :24/60 ----------
learning rate:  0.0001


100%|██████████| 250/250 [00:02<00:00, 96.58it/s]


train loss:  0.06419888
validation loss:  0.08906581
validation accuracy:  98.50074962518741 %
---------- Epoch :25/60 ----------
learning rate:  0.0001


100%|██████████| 250/250 [00:03<00:00, 74.35it/s]


train loss:  0.053930037
validation loss:  0.08345763
validation accuracy:  98.80059970014993 %
---------- Epoch :26/60 ----------
learning rate:  0.0001


100%|██████████| 250/250 [00:01<00:00, 127.77it/s]


train loss:  0.055556573
validation loss:  0.08481136
validation accuracy:  98.65067466266866 %
---------- Epoch :27/60 ----------
learning rate:  0.0001


100%|██████████| 250/250 [00:01<00:00, 132.06it/s]


train loss:  0.05380737
validation loss:  0.07816225
validation accuracy:  98.80059970014993 %
---------- Epoch :28/60 ----------
learning rate:  0.0001


100%|██████████| 250/250 [00:01<00:00, 130.83it/s]


train loss:  0.051596295
validation loss:  0.078423165
validation accuracy:  98.87556221889055 %
---------- Epoch :29/60 ----------
learning rate:  0.0001


100%|██████████| 250/250 [00:01<00:00, 130.19it/s]


train loss:  0.052981038
validation loss:  0.08248823
validation accuracy:  98.80059970014993 %
---------- Epoch :30/60 ----------
learning rate:  0.0001


100%|██████████| 250/250 [00:02<00:00, 103.82it/s]


train loss:  0.04884317
validation loss:  0.079119645
validation accuracy:  98.87556221889055 %
---------- Epoch :31/60 ----------
learning rate:  0.0001


100%|██████████| 250/250 [00:02<00:00, 83.66it/s]


train loss:  0.04782546
validation loss:  0.0758912
validation accuracy:  98.95052473763118 %
---------- Epoch :32/60 ----------
learning rate:  0.0001


100%|██████████| 250/250 [00:02<00:00, 87.88it/s] 


train loss:  0.04452814
validation loss:  0.07769403
validation accuracy:  98.80059970014993 %
---------- Epoch :33/60 ----------
learning rate:  0.0001


100%|██████████| 250/250 [00:01<00:00, 129.75it/s]


train loss:  0.042569857
validation loss:  0.07193535
validation accuracy:  98.80059970014993 %
---------- Epoch :34/60 ----------
learning rate:  0.0001


100%|██████████| 250/250 [00:02<00:00, 124.78it/s]


train loss:  0.0419213
validation loss:  0.06905008
validation accuracy:  99.10044977511244 %
---------- Epoch :35/60 ----------
learning rate:  0.0001


100%|██████████| 250/250 [00:01<00:00, 128.06it/s]


train loss:  0.04165998
validation loss:  0.073382586
validation accuracy:  98.95052473763118 %
---------- Epoch :36/60 ----------
learning rate:  0.0001


100%|██████████| 250/250 [00:02<00:00, 123.21it/s]


train loss:  0.04265609
validation loss:  0.0748092
validation accuracy:  98.7256371814093 %
---------- Epoch :37/60 ----------
learning rate:  0.0001


100%|██████████| 250/250 [00:02<00:00, 97.32it/s]


train loss:  0.04096244
validation loss:  0.07150432
validation accuracy:  98.87556221889055 %
---------- Epoch :38/60 ----------
learning rate:  0.0001


100%|██████████| 250/250 [00:03<00:00, 71.64it/s]


train loss:  0.038683433
validation loss:  0.07346884
validation accuracy:  98.87556221889055 %
---------- Epoch :39/60 ----------
learning rate:  1e-05


100%|██████████| 250/250 [00:01<00:00, 126.25it/s]


train loss:  0.037860386
validation loss:  0.0723163
validation accuracy:  99.10044977511244 %
---------- Epoch :40/60 ----------
learning rate:  1e-05


100%|██████████| 250/250 [00:01<00:00, 129.07it/s]


train loss:  0.035222862
validation loss:  0.072389215
validation accuracy:  98.95052473763118 %
---------- Epoch :41/60 ----------
learning rate:  1e-05


100%|██████████| 250/250 [00:01<00:00, 127.33it/s]


train loss:  0.03679003
validation loss:  0.07341369
validation accuracy:  98.95052473763118 %
---------- Epoch :42/60 ----------
learning rate:  1e-05


100%|██████████| 250/250 [00:01<00:00, 129.34it/s]


train loss:  0.037464887
validation loss:  0.07299031
validation accuracy:  98.87556221889055 %
---------- Epoch :43/60 ----------
learning rate:  1.0000000000000002e-06


100%|██████████| 250/250 [00:02<00:00, 101.03it/s]


train loss:  0.035243213
validation loss:  0.0707364
validation accuracy:  98.95052473763118 %
---------- Epoch :44/60 ----------
learning rate:  1.0000000000000002e-06


100%|██████████| 250/250 [00:03<00:00, 81.13it/s]


train loss:  0.035602957
validation loss:  0.07174047
validation accuracy:  98.95052473763118 %
---------- Epoch :45/60 ----------
learning rate:  1.0000000000000002e-06


100%|██████████| 250/250 [00:02<00:00, 93.32it/s] 


train loss:  0.03562222
validation loss:  0.07146362
validation accuracy:  98.95052473763118 %
---------- Epoch :46/60 ----------
learning rate:  1.0000000000000002e-06


100%|██████████| 250/250 [00:01<00:00, 129.79it/s]


train loss:  0.035728466
validation loss:  0.072246656
validation accuracy:  98.95052473763118 %
---------- Epoch :47/60 ----------
learning rate:  1.0000000000000002e-07


100%|██████████| 250/250 [00:01<00:00, 128.94it/s]


train loss:  0.036523465
validation loss:  0.06867862
validation accuracy:  98.95052473763118 %
---------- Epoch :48/60 ----------
learning rate:  1.0000000000000002e-07


100%|██████████| 250/250 [00:01<00:00, 130.79it/s]


train loss:  0.037489623
validation loss:  0.072333455
validation accuracy:  99.10044977511244 %
---------- Epoch :49/60 ----------
learning rate:  1.0000000000000002e-07


100%|██████████| 250/250 [00:02<00:00, 119.03it/s]


train loss:  0.035031706
validation loss:  0.07157588
validation accuracy:  99.10044977511244 %
---------- Epoch :50/60 ----------
learning rate:  1.0000000000000002e-07


100%|██████████| 250/250 [00:02<00:00, 91.79it/s]


train loss:  0.038915426
validation loss:  0.07006124
validation accuracy:  99.10044977511244 %
---------- Epoch :51/60 ----------
learning rate:  1.0000000000000002e-07


100%|██████████| 250/250 [00:03<00:00, 74.69it/s]


train loss:  0.03699674
validation loss:  0.073565096
validation accuracy:  98.95052473763118 %
---------- Epoch :52/60 ----------
learning rate:  1.0000000000000004e-08


100%|██████████| 250/250 [00:01<00:00, 127.25it/s]


train loss:  0.036868382
validation loss:  0.072873406
validation accuracy:  98.95052473763118 %
---------- Epoch :53/60 ----------
learning rate:  1.0000000000000004e-08


100%|██████████| 250/250 [00:01<00:00, 128.34it/s]


train loss:  0.035696875
validation loss:  0.069753736
validation accuracy:  99.10044977511244 %
---------- Epoch :54/60 ----------
learning rate:  1.0000000000000004e-08


100%|██████████| 250/250 [00:01<00:00, 131.42it/s]


train loss:  0.036619216
validation loss:  0.07012485
validation accuracy:  99.10044977511244 %
---------- Epoch :55/60 ----------
learning rate:  1.0000000000000004e-08


100%|██████████| 250/250 [00:01<00:00, 128.39it/s]


train loss:  0.03508225
validation loss:  0.07413797
validation accuracy:  98.87556221889055 %
---------- Epoch :56/60 ----------
learning rate:  1.0000000000000004e-08


100%|██████████| 250/250 [00:02<00:00, 100.65it/s]


train loss:  0.03630732
validation loss:  0.0713422
validation accuracy:  98.95052473763118 %
---------- Epoch :57/60 ----------
learning rate:  1.0000000000000004e-08


100%|██████████| 250/250 [00:03<00:00, 75.70it/s]


train loss:  0.034368973
validation loss:  0.06983441
validation accuracy:  99.02548725637182 %
---------- Epoch :58/60 ----------
learning rate:  1.0000000000000004e-08


100%|██████████| 250/250 [00:02<00:00, 111.10it/s]


train loss:  0.0368677
validation loss:  0.07219696
validation accuracy:  98.95052473763118 %
---------- Epoch :59/60 ----------
learning rate:  1.0000000000000004e-08


100%|██████████| 250/250 [00:01<00:00, 128.95it/s]


train loss:  0.034745384
validation loss:  0.07016882
validation accuracy:  98.95052473763118 %
---------- Epoch :60/60 ----------
learning rate:  1.0000000000000004e-08


100%|██████████| 250/250 [00:01<00:00, 129.09it/s]


train loss:  0.037277028
validation loss:  0.06990811
validation accuracy:  98.80059970014993 %


## Saving ONNX Model

In [28]:
!pip install onnx

Collecting onnx
  Downloading onnx-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Downloading onnx-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m47.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx
Successfully installed onnx-1.17.0


In [None]:
model.cpu()

In [32]:
model = ResNet(img_channels= 1,block=BasicBlock,num_classes = 8)

if torch.cuda.is_available():
  model.load_state_dict(torch.load('best.pth'))
  model = model.cuda()
else:
  model.load_state_dict(torch.load('best.pth',map_location=torch.device('cpu') ))

model.eval()
torch.onnx.export(model.cpu(), torch.randn(1, 1, 128, 100, requires_grad=True), "best.onnx",opset_version=11,do_constant_folding=False)

  model.load_state_dict(torch.load('best.pth'))


In [None]:
# copy best model to / from drive
#!cp best.onnx '/content/drive/MyDrive/zindi/Audio Classification/'
#!cp '/content/drive/MyDrive/zindi/Audio Classification/best.onnx' ./

## Apply On Test-set

In [33]:
#re-validate
import os
os.sched_setaffinity(0,{0})
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
import cv2
cv2.setNumThreads(1)
import time

model = cv2.dnn.readNetFromONNX('best.onnx')

start_time = time.time()


all_preds = []
all_labels = []
for img,label in tqdm.tqdm(val_DataSet):
  blob = cv2.dnn.blobFromImage(img[0,:,:].astype('float32'), 1, (100, 128))
  model.setInput(blob)
  pred = model.forward()[0]
  all_preds.append(pred.argmax())
  all_labels.append(label)

end_time = time.time()
execution_time = end_time - start_time

print(f"Code execution time: {execution_time:.2f} seconds")

acc = (np.array(all_preds)==np.array(all_labels)).sum()/len(all_preds)
print('test accuracy: ',100*acc,'%')


100%|██████████| 1334/1334 [00:01<00:00, 922.60it/s] 

Code execution time: 1.46 seconds
test accuracy:  98.95052473763118 %





In [34]:
import numpy as np
from numpy.lib.stride_tricks import as_strided
import scipy.io.wavfile as wav
import scipy
from scipy.signal import resample
import pandas as pd
import cv2
import time
import tqdm
import os

os.sched_setaffinity(0,{0})
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
cv2.setNumThreads(1)

main_dir = '/content/'
#main_dir =  '/storage/emulated/0/Android/data/org.qpython.qpy/files/'

train_df = pd.read_csv(main_dir+'Train.csv')
test_df = pd.read_csv(main_dir+'Test_1.csv')

i_class_mapper =        {0:'left',
                         1:'down',
                         2:'go',
                         3:'right',
                         4:'up',
                         5:'yes',
                         6:'no',
                         7:'stop'}


def get_all_audio(data_dir):
    raw_data = []
    for sample_dir in data_dir:
        sample_dir = main_dir + 'TechCabal Ewe Audio Files/' + sample_dir
        _, y = wav.read(sample_dir)
        raw_data.append(y)
    return raw_data

def process_audio_sample(y):
    if y.dtype == np.int16:
        y = y / 32768.0
    elif y.dtype == np.int32:
        y = y / 2147483648.0
    elif y.dtype == np.uint8:
        y = (y - 128) / 128.0
    if y.ndim == 2:
        y = y.mean(axis=1)
    return  y

model = cv2.dnn.readNetFromONNX(main_dir+'best.onnx')
test_pathes = test_df.audio_filepath.values
raw_data = get_all_audio(test_pathes)


start_time = time.time()


all_preds = []
for sample_data in tqdm.tqdm(raw_data):
    y = process_audio_sample(sample_data)
    S = scipy.signal.spectrogram(y)[2]
    img = cv2.resize(S,(100,128))


    blob = cv2.dnn.blobFromImage(img.astype('float32'), 1, (100, 128))
    model.setInput(blob)
    pred = model.forward()[0]
    all_preds.append(i_class_mapper[pred.argmax()])

end_time = time.time()
execution_time = end_time - start_time

print(f"Code execution time: {execution_time:.2f} seconds")


ss = pd.read_csv(main_dir +'SampleSubmission_1.csv')
name_to_pred = dict(zip(test_df.id.values,all_preds))
ss['class'] = ss.id.map(name_to_pred)
ss.to_csv('submission.csv',index=False)

100%|██████████| 2946/2946 [00:14<00:00, 199.75it/s]

Code execution time: 14.77 seconds



