# KWS 35 Class Tests

In [1]:
import os
import sys

import numpy as np
import torch

import matplotlib.pyplot as plt
import seaborn as sns

import scipy.io.wavfile as wav

import importlib
import librosa
import soundfile as sf

import torchnet.meter as tnt
from collections import OrderedDict

from IPython.display import clear_output

sys.path.append(os.path.join(os.getcwd(), '..'))
sys.path.append(os.path.join(os.getcwd(), '../models/'))
sys.path.append(os.path.join(os.getcwd(), '../datasets/'))

import ai8x

from types import SimpleNamespace

In [2]:
#trained_checkpoint_path = os.path.join("/home/merveeyuboglu/Github/ai8x-training-merve/ai8x-training/logs/train_kws35/2024.05.23-103316/qat_best.pth.tar")
trained_checkpoint_path = os.path.join('..', 'logs', '2024.06.05-221115/qat_best.pth.tar')
mod_qat = importlib.import_module("ai85net-kws20-nas")
dataset = importlib.import_module("kws20")

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Working with device:", device)

ai8x.set_device(device=85, simulate=False, round_avg=False)
# qat_policy = {'start_epoch': 10, 'weight_bits': 8, 'bias_bits': 8}

Working with device: cuda
Configuring device: MAX78000, simulate=False.


In [4]:
model = mod_qat.AI85KWS20NetNAS(num_classes=35, num_channels=128, dimensions=(128, 1), bias=True, 
                           quantize_activation=False)


checkpoint = torch.load(trained_checkpoint_path)

state_dict = checkpoint['state_dict']
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    if k.startswith('module.'):
        k = k[7:]
    new_state_dict[k] = v
checkpoint['state_dict'] = new_state_dict

ai8x.fuse_bn_layers(model)

model.load_state_dict(checkpoint['state_dict'], strict=False)

ai8x.update_model(model)

In [5]:
sn = SimpleNamespace()
sn.truncate_testset = False
sn.act_mode_8bit = False

_, test = dataset.KWS_35_get_datasets( ('/data_ssd', sn), load_train=False, load_test=True)
test_loader = torch.utils.data.DataLoader(test, batch_size=256)


Processing test...
test set: 11413 elements
Filtering silence elements...
test set: 11005 elements
Filtering librispeech elements...
test set: 11005 elements
Class backward (# 0): 165 elements
Class bed (# 1): 207 elements
Class bird (# 2): 185 elements
Class cat (# 3): 194 elements
Class dog (# 4): 220 elements
Class down (# 5): 406 elements
Class eight (# 6): 408 elements
Class five (# 7): 445 elements
Class follow (# 8): 172 elements
Class forward (# 9): 155 elements
Class four (# 10): 400 elements
Class go (# 11): 402 elements
Class happy (# 12): 203 elements
Class house (# 13): 191 elements
Class learn (# 14): 161 elements
Class left (# 15): 412 elements
Class marvin (# 17): 195 elements
Class nine (# 18): 408 elements
Class no (# 19): 405 elements
Class off (# 20): 402 elements
Class on (# 21): 396 elements
Class one (# 22): 399 elements
Class right (# 23): 396 elements
Class seven (# 24): 406 elements
Class sheila (# 25): 212 elements
Class six (# 27): 394 elements
Class stop (

In [6]:
classerr_test = tnt.ClassErrorMeter(accuracy=True, topk=(1, min(35, 5)))
test_confusion = tnt.ConfusionMeter(35)
outputs_all = np.zeros((len(test), 35))
targets_all = np.zeros((len(test)))
with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        outputs = model(inputs)
        classerr_test.add(outputs, targets)
        test_confusion.add(outputs, targets)
        #print("Batch: [",batch_idx*256 ,"/", len(val_dataset),"]")
        test_acc = classerr_test.value()[0]
        #print("Accuracy: ", acc)
        outputs_all[batch_idx*256:batch_idx*256+len(inputs)] = outputs.cpu().numpy()
        targets_all[batch_idx*256:batch_idx*256+len(inputs)] = targets.cpu().numpy()
    print("Total Accuracy: ", test_acc)

Total Accuracy:  83.93457519309405


In [7]:
np.set_printoptions(threshold=np.inf, linewidth=500)
test_confusion.value()

array([[141,   0,   0,   0,   0,   0,   0,   0,   0,   1,   0,   0,   0,   0,   0,   0,   2,   0,   0,   0,   0,   0,   1,   2,   0,   0,   4,   0,   0,   0,   3,  11,   0,   0,   0],
       [  3, 136,   5,   4,   1,   6,   5,   0,   0,   0,   0,   4,   1,   0,   0,  14,   1,   1,   2,   0,   0,   0,   8,   4,   0,   1,   1,   1,   0,   3,   3,   1,   0,   1,   1],
       [  1,   8, 139,   0,   2,   2,   1,   1,   1,   0,   1,   2,   0,   0,   1,   6,   1,   2,   2,   0,   1,   0,   6,   1,   0,   1,   0,   3,   0,   1,   2,   0,   0,   0,   0],
       [  0,   6,   0, 129,   3,   7,   0,   1,   0,   0,   1,   9,   0,   0,   0,  10,   0,   0,   1,   0,   0,   0,   4,   1,   0,   1,   7,   0,   0,   0,   7,   1,   0,   6,   0],
       [  0,   2,   1,   0, 140,  21,   0,   0,   0,   0,   0,  12,   0,   0,   0,   2,   0,   0,   6,   1,   1,   1,   2,   0,   0,   1,  28,   0,   0,   0,   2,   0,   0,   0,   0],
       [  0,   1,   0,   0,   1, 374,   0,   0,   0,   0,   1,   8,   0,   0,   

In [8]:
classes = ['down', 'go', 'left', 'no', 'off', 'on', 'right', 'stop', 'up', 'yes']

In [9]:
classes_35 = [5, 11, 15, 18, 19, 20, 22, 26, 30, 33]

In [10]:
classerr_test = tnt.ClassErrorMeter(accuracy=True, topk=(1, min(11, 5)))
test_confusion = tnt.ConfusionMeter(11)
outputs_all = np.zeros((len(test), 11))
targets_all = np.zeros((len(test)))
with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        outputs = model(inputs)
        targets_12_idx = [idx for idx, i in enumerate(targets) if i not in classes_35]
        
        for i in targets_12_idx:
            targets[i] = 10

        outputs_12_idx = [idx for idx, i in enumerate(np.argmax(outputs, 1)) if i not in classes_35]
        outputs_12 = torch.empty((len(inputs), 11))
        for idx in range(len(inputs)):
        
            if targets[idx] == 5:
                targets[idx] = 0
            elif targets[idx] == 11:
                targets[idx] = 1
            elif targets[idx] == 15:
                targets[idx] = 2
            elif targets[idx] == 18:
                targets[idx] = 3
            elif targets[idx] == 19:
                targets[idx] = 4
            elif targets[idx] == 20:
                targets[idx] = 5
            elif targets[idx] == 22:
                targets[idx] = 6
            elif targets[idx] == 26:
                targets[idx] = 7
            elif targets[idx] == 30:
                targets[idx] = 8
            elif targets[idx] == 33:
                targets[idx] = 9
            else:
                targets[idx] = 10

            if idx in outputs_12_idx:
                outputs_12[idx][10] = 100
                for i in [0,1,2,3,4,5,6,7,8,9]:
                    outputs_12[idx][i] = 0
            else:
                outputs_12[idx][0] = outputs[idx][5]
                outputs_12[idx][1] = outputs[idx][11]
                outputs_12[idx][2] = outputs[idx][15]
                outputs_12[idx][3] = outputs[idx][18]
                outputs_12[idx][4] = outputs[idx][19]
                outputs_12[idx][5] = outputs[idx][20]
                outputs_12[idx][6] = outputs[idx][22]
                outputs_12[idx][7] = outputs[idx][26]
                outputs_12[idx][8] = outputs[idx][30]
                outputs_12[idx][9] = outputs[idx][33]
                outputs_12[idx][10] = -100

        outputs_12 = outputs_12.to(device)
        targets = targets.to(device)

        classerr_test.add(outputs_12, targets)
        test_confusion.add(outputs_12, targets)
        #print("Batch: [",batch_idx*256 ,"/", len(val_dataset),"]")
        test_acc = classerr_test.value()[0]
        #print("Accuracy: ", acc)
        outputs_all[batch_idx*256:batch_idx*256+len(inputs)] = outputs_12.cpu().numpy()
        targets_all[batch_idx*256:batch_idx*256+len(inputs)] = targets.cpu().numpy()
    print("Total Accuracy: ", test_acc)

Total Accuracy:  89.64107223989096


In [11]:
np.disp(test_confusion.value())

[[ 374    8    2    7    1    1    0    4    2    1    6]
 [   4  377    1    9    0    2    0    2    2    3    2]
 [   1    0  395    0    0    0    3    1    2    7    3]
 [   7    4    7  375    1    0    2    1    0    1    7]
 [   1    2    0    0  353   12    0    0   30    1    3]
 [   0    0    0    0    8  380    0    2    4    0    2]
 [   2    0    7    0    0    2  378    0    2    0    5]
 [   1    0    0    0    0    0    0  403    6    0    1]
 [   0    1    1    0   22    1    0    4  393    0    3]
 [   0    2    0    0    0    0    4    0    1  410    2]
 [  65  136  106  105   52  135  139   74   62   30 6027]]


## benchmark test

You should load the benchmark test data. In order to achieve this with 35 class:

In [12]:
sn = SimpleNamespace()
sn.truncate_testset = False
sn.act_mode_8bit = False

train, test = dataset.KWS_35_get_datasets( ('/data_ssd', sn), load_train=True, load_test=True, benchmark=True)
test_loader = torch.utils.data.DataLoader(test, batch_size=256)


Processing train...
train set: 331395 elements
validation set: 32943 elements
Filtering silence elements...
train set: 316395 elements
validation set: 29943 elements
Filtering librispeech elements...
train set: 254529 elements
validation set: 29943 elements
Class backward (# 0): 4497 elements
Class bed (# 1): 5421 elements
Class bird (# 2): 5637 elements
Class cat (# 3): 5511 elements
Class dog (# 4): 5724 elements
Class down (# 5): 10533 elements
Class eight (# 6): 10137 elements
Class five (# 7): 10821 elements
Class follow (# 8): 4221 elements
Class forward (# 9): 4206 elements
Class four (# 10): 9984 elements
Class go (# 11): 10434 elements
Class happy (# 12): 5553 elements
Class house (# 13): 5766 elements
Class learn (# 14): 4242 elements
Class left (# 15): 10167 elements
Class marvin (# 17): 5715 elements
Class nine (# 18): 10578 elements
Class no (# 19): 10608 elements
Class off (# 20): 10029 elements
Class on (# 21): 10347 elements
Class one (# 22): 10473 elements
Class right

In [13]:
classerr_test = tnt.ClassErrorMeter(accuracy=True, topk=(1, min(35, 5)))
test_confusion = tnt.ConfusionMeter(35)
outputs_all = np.zeros((len(test), 35))
targets_all = np.zeros((len(test)))
with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        outputs = model(inputs)
        classerr_test.add(outputs, targets)
        test_confusion.add(outputs, targets)
        #print("Batch: [",batch_idx*256 ,"/", len(val_dataset),"]")
        test_acc = classerr_test.value()[0]
        #print("Accuracy: ", acc)
        outputs_all[batch_idx*256:batch_idx*256+len(inputs)] = outputs.cpu().numpy()
        targets_all[batch_idx*256:batch_idx*256+len(inputs)] = targets.cpu().numpy()
    print("Total Accuracy: ", test_acc)

Total Accuracy:  92.70577738121794


In [14]:
classerr_test = tnt.ClassErrorMeter(accuracy=True, topk=(1, min(11, 5)))
test_confusion = tnt.ConfusionMeter(11)
outputs_all = np.zeros((len(test), 11))
targets_all = np.zeros((len(test)))
with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        outputs = model(inputs)
        targets_12_idx = [idx for idx, i in enumerate(targets) if i not in classes_35]
        
        for i in targets_12_idx:
            targets[i] = 10

        outputs_12_idx = [idx for idx, i in enumerate(np.argmax(outputs, 1)) if i not in classes_35]
        outputs_12 = torch.empty((len(inputs), 11))
        for idx in range(len(inputs)):
        
            if targets[idx] == 5:
                targets[idx] = 0
            elif targets[idx] == 11:
                targets[idx] = 1
            elif targets[idx] == 15:
                targets[idx] = 2
            elif targets[idx] == 18:
                targets[idx] = 3
            elif targets[idx] == 19:
                targets[idx] = 4
            elif targets[idx] == 20:
                targets[idx] = 5
            elif targets[idx] == 22:
                targets[idx] = 6
            elif targets[idx] == 26:
                targets[idx] = 7
            elif targets[idx] == 30:
                targets[idx] = 8
            elif targets[idx] == 33:
                targets[idx] = 9
            else:
                targets[idx] = 10

            if idx in outputs_12_idx:
                outputs_12[idx][10] = 100
                for i in [0,1,2,3,4,5,6,7,8,9]:
                    outputs_12[idx][i] = 0
            else:
                outputs_12[idx][0] = outputs[idx][5]
                outputs_12[idx][1] = outputs[idx][11]
                outputs_12[idx][2] = outputs[idx][15]
                outputs_12[idx][3] = outputs[idx][18]
                outputs_12[idx][4] = outputs[idx][19]
                outputs_12[idx][5] = outputs[idx][20]
                outputs_12[idx][6] = outputs[idx][22]
                outputs_12[idx][7] = outputs[idx][26]
                outputs_12[idx][8] = outputs[idx][30]
                outputs_12[idx][9] = outputs[idx][33]
                outputs_12[idx][10] = -100

        outputs_12 = outputs_12.to(device)
        targets = targets.to(device)

        classerr_test.add(outputs_12, targets)
        test_confusion.add(outputs_12, targets)
        #print("Batch: [",batch_idx*256 ,"/", len(val_dataset),"]")
        test_acc = classerr_test.value()[0]
        #print("Accuracy: ", acc)
        outputs_all[batch_idx*256:batch_idx*256+len(inputs)] = outputs_12.cpu().numpy()
        targets_all[batch_idx*256:batch_idx*256+len(inputs)] = targets.cpu().numpy()
    print("Total Accuracy: ", test_acc)

Total Accuracy:  93.55342404639751


# Class balancing with weigths

In [53]:
sum_others = 0
sum_keywords = 0
for i in test.new_class_dict.keys():
    count = test.targets.tolist().count([test.new_class_dict[i]])
    print(i, count)
    if not i in classes:
        sum_others += count
    else:
        sum_keywords += count

print(sum_others)
print(sum_keywords)

backward 9
bed 11
bird 10
cat 10
dog 7
down 406
eight 34
five 23
follow 9
forward 13
four 18
go 402
happy 21
house 10
learn 10
left 412
marvin 11
nine 24
no 405
off 402
on 396
one 25
right 396
seven 18
sheila 17
six 28
stop 411
three 26
tree 10
two 20
up 425
visual 6
wow 15
yes 419
zero 24
UNKNOWN 0
409
4074


In [54]:
sum_others = 0
sum_keywords = 0
for i in train.new_class_dict.keys():
    count = train.targets.tolist().count([train.new_class_dict[i]])
    print(i, count)
    if not i in classes:
        sum_others += count
    else:
        sum_keywords += count

print(sum_others)
print(sum_keywords)

backward 4497
bed 5421
bird 5637
cat 5511
dog 5724
down 10533
eight 10137
five 10821
follow 4221
forward 4206
four 9984
go 10434
happy 5553
house 5766
learn 4242
left 10167
marvin 5715
nine 10578
no 10608
off 10029
on 10347
one 10473
right 10146
seven 10776
sheila 5430
six 10398
stop 10383
three 9966
tree 4698
two 10368
up 9894
visual 4281
wow 5751
yes 10875
zero 10902
UNKNOWN 0
181056
103416


- 0.04 * 4074 / 10 = 16.36
- 1 * 409 / 25 = 16.36

- 1 for other classes
- 0.04 for benchmark classes

test dagilimi

her other sinif %0.4 
her keyword %9

weightler
7240 * x => 0.4
10340 * y => 9

1
0.06


In [50]:
len(classes)

10

## 35 class tests

In [None]:
trained_checkpoint_path = os.path.join("/home/merveeyuboglu/Github/ai8x-training-merve/ai8x-training/logs/train_kws35/2024.05.23-103316/qat_best.pth.tar")
mod_qat = importlib.import_module("ai85net-kws20-nas")
dataset = importlib.import_module("kws20")

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Working with device:", device)

ai8x.set_device(device=85, simulate=False, round_avg=False)
# qat_policy = {'start_epoch': 10, 'weight_bits': 8, 'bias_bits': 8}

Working with device: cuda
Configuring device: MAX78000, simulate=False.


In [None]:
model = mod_qat.AI85KWS20NetNAS(num_classes=35, num_channels=128, dimensions=(128, 1), bias=True, 
                           quantize_activation=False)


checkpoint = torch.load(trained_checkpoint_path)

state_dict = checkpoint['state_dict']
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    if k.startswith('module.'):
        k = k[7:]
    new_state_dict[k] = v
checkpoint['state_dict'] = new_state_dict

ai8x.fuse_bn_layers(model)

model.load_state_dict(checkpoint['state_dict'], strict=False)

ai8x.update_model(model)

In [None]:
sn = SimpleNamespace()
sn.truncate_testset = False
sn.act_mode_8bit = False

_, test = dataset.KWS_35_get_datasets( ('/data_ssd', sn), load_train=False, load_test=True)
test_loader = torch.utils.data.DataLoader(test, batch_size=256)


Processing test...
test set: 11413 elements
Filtering silence elements...
test set: 11005 elements
Class backward (# 0): 165 elements
Class bed (# 1): 207 elements
Class bird (# 2): 185 elements
Class cat (# 3): 194 elements
Class dog (# 4): 220 elements
Class down (# 5): 406 elements
Class eight (# 6): 408 elements
Class five (# 7): 445 elements
Class follow (# 8): 172 elements
Class forward (# 9): 155 elements
Class four (# 10): 400 elements
Class go (# 11): 402 elements
Class happy (# 12): 203 elements
Class house (# 13): 191 elements
Class learn (# 14): 161 elements
Class left (# 15): 412 elements
Class marvin (# 17): 195 elements
Class nine (# 18): 408 elements
Class no (# 19): 405 elements
Class off (# 20): 402 elements
Class on (# 21): 396 elements
Class one (# 22): 399 elements
Class right (# 23): 396 elements
Class seven (# 24): 406 elements
Class sheila (# 25): 212 elements
Class six (# 27): 394 elements
Class stop (# 28): 411 elements
Class three (# 29): 405 elements
Class 

In [None]:
classerr_test = tnt.ClassErrorMeter(accuracy=True, topk=(1, min(35, 5)))
test_confusion = tnt.ConfusionMeter(35)
outputs_all = np.zeros((len(test), 35))
targets_all = np.zeros((len(test)))
with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        outputs = model(inputs)
        classerr_test.add(outputs, targets)
        test_confusion.add(outputs, targets)
        #print("Batch: [",batch_idx*256 ,"/", len(val_dataset),"]")
        test_acc = classerr_test.value()[0]
        #print("Accuracy: ", acc)
        outputs_all[batch_idx*256:batch_idx*256+len(inputs)] = outputs.cpu().numpy()
        targets_all[batch_idx*256:batch_idx*256+len(inputs)] = targets.cpu().numpy()
    print("Total Accuracy: ", test_acc)

Total Accuracy:  92.88505224897774


In [None]:
test_confusion.value()

array([[160,   1,   0, ...,   0,   0,   1],
       [  0, 181,   5, ...,   0,   0,   0],
       [  1,   5, 171, ...,   0,   0,   2],
       ...,
       [  0,   0,   0, ..., 189,   0,   0],
       [  0,   0,   0, ...,   0, 407,   1],
       [  0,   0,   0, ...,   0,   0, 398]], dtype=int32)

In [None]:
classes = ['down', 'go', 'left', 'no', 'off', 'on', 'right', 'stop', 'up', 'yes']

In [None]:
classes_35 = [5, 11, 15, 18, 19, 20, 22, 26, 30, 33]

In [None]:
classerr_test = tnt.ClassErrorMeter(accuracy=True, topk=(1, min(11, 5)))
test_confusion = tnt.ConfusionMeter(11)
outputs_all = np.zeros((len(test), 11))
targets_all = np.zeros((len(test)))
with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        outputs = model(inputs)
        targets_12_idx = [idx for idx, i in enumerate(targets) if i not in classes_35]
        
        for i in targets_12_idx:
            targets[i] = 10

        outputs_12_idx = [idx for idx, i in enumerate(np.argmax(outputs, 1)) if i not in classes_35]
        outputs_12 = torch.empty((len(inputs), 11))
        for idx in range(len(inputs)):
        
            if targets[idx] == 5:
                targets[idx] = 0
            elif targets[idx] == 11:
                targets[idx] = 1
            elif targets[idx] == 15:
                targets[idx] = 2
            elif targets[idx] == 18:
                targets[idx] = 3
            elif targets[idx] == 19:
                targets[idx] = 4
            elif targets[idx] == 20:
                targets[idx] = 5
            elif targets[idx] == 22:
                targets[idx] = 6
            elif targets[idx] == 26:
                targets[idx] = 7
            elif targets[idx] == 30:
                targets[idx] = 8
            elif targets[idx] == 33:
                targets[idx] = 9
            else:
                targets[idx] = 10

            if idx in outputs_12_idx:
                outputs_12[idx][10] = 100
                for i in [0,1,2,3,4,5,6,7,8,9]:
                    outputs_12[idx][i] = 0
            else:
                outputs_12[idx][0] = outputs[idx][5]
                outputs_12[idx][1] = outputs[idx][11]
                outputs_12[idx][2] = outputs[idx][15]
                outputs_12[idx][3] = outputs[idx][18]
                outputs_12[idx][4] = outputs[idx][19]
                outputs_12[idx][5] = outputs[idx][20]
                outputs_12[idx][6] = outputs[idx][22]
                outputs_12[idx][7] = outputs[idx][26]
                outputs_12[idx][8] = outputs[idx][30]
                outputs_12[idx][9] = outputs[idx][33]
                outputs_12[idx][10] = -100

        outputs_12 = outputs_12.to(device)
        targets = targets.to(device)

        classerr_test.add(outputs_12, targets)
        test_confusion.add(outputs_12, targets)
        #print("Batch: [",batch_idx*256 ,"/", len(val_dataset),"]")
        test_acc = classerr_test.value()[0]
        #print("Accuracy: ", acc)
        outputs_all[batch_idx*256:batch_idx*256+len(inputs)] = outputs_12.cpu().numpy()
        targets_all[batch_idx*256:batch_idx*256+len(inputs)] = targets.cpu().numpy()
    print("Total Accuracy: ", test_acc)

Total Accuracy:  95.93820990458882


In [None]:
np.disp(test_confusion.value())

[[ 360   12    0    8    0    0    0    3    1    0   22]
 [   2  375    1    8    0    0    1    0    1    1   13]
 [   0    1  399    1    0    0    0    0    1    2    8]
 [   1    1    6  383    0    0    0    0    0    1   13]
 [   0    1    0    1  352    4    0    0   34    0   10]
 [   0    0    0    0   11  372    1    0    1    0   11]
 [   1    0    3    0    2    0  365    0    1    0   24]
 [   0    1    0    2    0    0    0  391    6    0   11]
 [   0    1    1    0   24    0    0    4  384    0   11]
 [   0    2    2    0    0    0    3    0    0  407    5]
 [  17   20   19   26   15   23   12   14   13    2 6770]]


## benchmark test

You should load the benchmark test data. In order to achieve this with 35 class:

In [None]:
sn = SimpleNamespace()
sn.truncate_testset = False
sn.act_mode_8bit = False

_, test = dataset.KWS_35_get_datasets( ('/data_ssd', sn), load_train=False, load_test=True, benchmark=True)
test_loader = torch.utils.data.DataLoader(test, batch_size=256)


Processing test...
test set: 4891 elements
Filtering silence elements...
test set: 4483 elements
Filtering librispeech elements...
test set: 4483 elements
Class backward (# 0): 9 elements
Class bed (# 1): 11 elements
Class bird (# 2): 10 elements
Class cat (# 3): 10 elements
Class dog (# 4): 7 elements
Class down (# 5): 406 elements
Class eight (# 6): 34 elements
Class five (# 7): 23 elements
Class follow (# 8): 9 elements
Class forward (# 9): 13 elements
Class four (# 10): 18 elements
Class go (# 11): 402 elements
Class happy (# 12): 21 elements
Class house (# 13): 10 elements
Class learn (# 14): 10 elements
Class left (# 15): 412 elements
Class marvin (# 17): 11 elements
Class nine (# 18): 24 elements
Class no (# 19): 405 elements
Class off (# 20): 402 elements
Class on (# 21): 396 elements
Class one (# 22): 25 elements
Class right (# 23): 396 elements
Class seven (# 24): 18 elements
Class sheila (# 25): 17 elements
Class six (# 27): 28 elements
Class stop (# 28): 411 elements
Class

In [None]:
classerr_test = tnt.ClassErrorMeter(accuracy=True, topk=(1, min(35, 5)))
test_confusion = tnt.ConfusionMeter(35)
outputs_all = np.zeros((len(test), 35))
targets_all = np.zeros((len(test)))
with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        outputs = model(inputs)
        classerr_test.add(outputs, targets)
        test_confusion.add(outputs, targets)
        #print("Batch: [",batch_idx*256 ,"/", len(val_dataset),"]")
        test_acc = classerr_test.value()[0]
        #print("Accuracy: ", acc)
        outputs_all[batch_idx*256:batch_idx*256+len(inputs)] = outputs.cpu().numpy()
        targets_all[batch_idx*256:batch_idx*256+len(inputs)] = targets.cpu().numpy()
    print("Total Accuracy: ", test_acc)

Total Accuracy:  92.90653580191835


In [None]:
classerr_test = tnt.ClassErrorMeter(accuracy=True, topk=(1, min(11, 5)))
test_confusion = tnt.ConfusionMeter(11)
outputs_all = np.zeros((len(test), 11))
targets_all = np.zeros((len(test)))
with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        outputs = model(inputs)
        targets_12_idx = [idx for idx, i in enumerate(targets) if i not in classes_35]
        
        for i in targets_12_idx:
            targets[i] = 10

        outputs_12_idx = [idx for idx, i in enumerate(np.argmax(outputs, 1)) if i not in classes_35]
        outputs_12 = torch.empty((len(inputs), 11))
        for idx in range(len(inputs)):
        
            if targets[idx] == 5:
                targets[idx] = 0
            elif targets[idx] == 11:
                targets[idx] = 1
            elif targets[idx] == 15:
                targets[idx] = 2
            elif targets[idx] == 18:
                targets[idx] = 3
            elif targets[idx] == 19:
                targets[idx] = 4
            elif targets[idx] == 20:
                targets[idx] = 5
            elif targets[idx] == 22:
                targets[idx] = 6
            elif targets[idx] == 26:
                targets[idx] = 7
            elif targets[idx] == 30:
                targets[idx] = 8
            elif targets[idx] == 33:
                targets[idx] = 9
            else:
                targets[idx] = 10

            if idx in outputs_12_idx:
                outputs_12[idx][10] = 100
                for i in [0,1,2,3,4,5,6,7,8,9]:
                    outputs_12[idx][i] = 0
            else:
                outputs_12[idx][0] = outputs[idx][5]
                outputs_12[idx][1] = outputs[idx][11]
                outputs_12[idx][2] = outputs[idx][15]
                outputs_12[idx][3] = outputs[idx][18]
                outputs_12[idx][4] = outputs[idx][19]
                outputs_12[idx][5] = outputs[idx][20]
                outputs_12[idx][6] = outputs[idx][22]
                outputs_12[idx][7] = outputs[idx][26]
                outputs_12[idx][8] = outputs[idx][30]
                outputs_12[idx][9] = outputs[idx][33]
                outputs_12[idx][10] = -100

        outputs_12 = outputs_12.to(device)
        targets = targets.to(device)

        classerr_test.add(outputs_12, targets)
        test_confusion.add(outputs_12, targets)
        #print("Batch: [",batch_idx*256 ,"/", len(val_dataset),"]")
        test_acc = classerr_test.value()[0]
        #print("Accuracy: ", acc)
        outputs_all[batch_idx*256:batch_idx*256+len(inputs)] = outputs_12.cpu().numpy()
        targets_all[batch_idx*256:batch_idx*256+len(inputs)] = targets.cpu().numpy()
    print("Total Accuracy: ", test_acc)

Total Accuracy:  93.44189159045283
