# Результаты train, dev, test

In [None]:
config.model.test_ds.sample_rate

22050

In [None]:
restored_model = nemo_asr.models.EncDecClassificationModel.load_from_checkpoint('/content/epoch=19-step=26099.ckpt')
restored_model.setup_multiple_test_data({'manifest_filepath': os.path.join('/content/test.json'), 'sample_rate': 22050, 'labels': ['marvin', 'unknow'], 'batch_size': 128, 'shuffle': False})
restored_model.setup_multiple_validation_data({'manifest_filepath': os.path.join('/content/dev.json'), 'sample_rate': 22050, 'labels': ['marvin', 'unknow'], 'batch_size': 128, 'shuffle': False})
restored_model.setup_training_data({'manifest_filepath': os.path.join('/content/train.json'), 'sample_rate': 22050, 'labels': ['marvin', 'unknow'], 'batch_size': 128, 'shuffle': False})

In [None]:
test_dl = restored_model._test_dl
dev_dl = restored_model._validation_dl
train_dl = restored_model._train_dl

In [None]:
cpu_model = restored_model.cpu()
cpu_model.eval()

In [None]:
@torch.no_grad()
def extract_logits(model, dataloader):
  logits_buffer = []
  label_buffer = []

  # Follow the above definition of the test_step
  for batch in dataloader:
    audio_signal, audio_signal_len, labels, labels_len = batch
    logits = model(input_signal=audio_signal, input_signal_length=audio_signal_len)

    logits_buffer.append(logits)
    label_buffer.append(labels)
    print(".", end='')
  print()
  
  print("Finished extracting logits !")
  logits = torch.cat(logits_buffer, 0)
  labels = torch.cat(label_buffer, 0)
  return logits, labels


In [None]:
def get_labels_logits(model, dl):
    logits, labels = extract_logits(model, dl)
    print("Logits:", logits.shape, "Labels :", labels.shape)

    return logits, labels

In [None]:
logits_train, labels_train = get_labels_logits(cpu_model, train_dl)

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

In [None]:
logits_dev, labels_dev = get_labels_logits(cpu_model, dev_dl)

..........................................................................................................................................................
Finished extracting logits !
Logits: torch.Size([19692, 2]) Labels : torch.Size([19692])


In [None]:
logits_test, labels_test = get_labels_logits(cpu_model, test_dl)

.......................................
Finished extracting logits !
Logits: torch.Size([4890, 2]) Labels : torch.Size([4890])


In [None]:
# train accuracy
acc = cpu_model._accuracy(logits=logits_train, labels=labels_train)
print("Accuracy : ", float(acc[0]*100))

Accuracy :  99.85322570800781


In [None]:
# dev accuracy 
acc = cpu_model._accuracy(logits=logits_dev, labels=labels_dev)
print("Accuracy : ", float(acc[0]*100))

Accuracy :  99.61405181884766


In [None]:
# test accuracy 
acc = cpu_model._accuracy(logits=logits_test, labels=labels_test)
print("Accuracy : ", float(acc[0]*100))

Accuracy :  99.8364028930664


## Метрики

In [None]:
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import f1_score

In [None]:
probs_test = torch.softmax(logits_test, dim=-1)
probas_test, preds_test = torch.max(probs_test, dim=-1)

print(balanced_accuracy_score(labels_test, preds_test))

probs_train = torch.softmax(logits_train, dim=-1)
probas_train, preds_train = torch.max(probs_train, dim=-1)

print(balanced_accuracy_score(labels_train, preds_train))

probs_dev = torch.softmax(logits_dev, dim=-1)
probas_dev, preds_dev = torch.max(probs_dev, dim=-1)

print(balanced_accuracy_score(labels_dev, preds_dev))

print(f1_score(labels_test, preds_test))
print(f1_score(labels_train, preds_train))
print(f1_score(labels_dev, preds_dev))

0.9084760289925282
0.998532318936081
0.9961405646963234
0.9991798236620874
0.9985335224790055
0.9961526779386453


# Посмотрим на неверно классифицированные данные

In [None]:
# для вывода labels
class ReverseMapLabel:
    def __init__(self, data_loader):
        self.label2id = dict(data_loader.dataset.label2id)
        self.id2label = dict(data_loader.dataset.id2label)

    def __call__(self, pred_idx, label_idx):
        return self.id2label[pred_idx], self.id2label[label_idx]

In [None]:
def indices_inc_lables(dl, logits, labels, cpu_model):
    sample_idx = 0
    incorrect_preds = []
    rev_map = ReverseMapLabel(dl)

    probs = torch.softmax(logits, dim=-1)
    probas, preds = torch.max(probs, dim=-1)

    total_count = cpu_model._accuracy.total_counts_k[0]
    incorrect_ids = (preds != labels).nonzero()
    for idx in incorrect_ids:
        proba = float(probas[idx][0])
        pred = int(preds[idx][0])
        label = int(labels[idx][0])
        idx = int(idx[0]) + sample_idx

        incorrect_preds.append((idx, *rev_map(pred, label), proba))

    print(f"Num test samples : {total_count.item()}")
    print(f"Num errors : {len(incorrect_preds)}")

    incorrect_preds = sorted(incorrect_preds, key=lambda x: x[-1], reverse=False)

    return incorrect_preds

In [None]:
# train тут напутано с Num test samples, должно быть 166930
incorrect_preds_train = indices_inc_lables(train_dl, logits_train, labels_train, cpu_model)

Num test samples : 4890
Num errors : 245


In [None]:
#dev тут напутано с Num test samples, должно быть 19692
incorrect_preds_dev = indices_inc_lables(dev_dl, logits_dev, labels_dev, cpu_model)

Num test samples : 4890
Num errors : 76


In [None]:
#test
incorrect_preds_test = indices_inc_lables(test_dl, logits_test, labels_test, cpu_model)

Num test samples : 4890
Num errors : 8


### Списки неверно классифицированных

In [None]:
for incorrect_sample in incorrect_preds_test:
    print(str(incorrect_sample))

(4208, 'marvin', 'unknow', 0.834339439868927)
(3896, 'unknow', 'marvin', 0.847510814666748)
(1581, 'marvin', 'unknow', 0.9664788246154785)
(1554, 'marvin', 'unknow', 0.9675670266151428)
(3899, 'unknow', 'marvin', 0.9837485551834106)
(4413, 'marvin', 'unknow', 0.9890121221542358)
(1076, 'marvin', 'unknow', 0.9970698356628418)
(1098, 'marvin', 'unknow', 0.9992333650588989)


In [None]:
for incorrect_sample in incorrect_preds_dev:
    print(str(incorrect_sample))

(5101, 'unknow', 'marvin', 0.5211857557296753)
(10777, 'unknow', 'marvin', 0.5339115262031555)
(8243, 'unknow', 'marvin', 0.5707648396492004)
(6131, 'unknow', 'marvin', 0.5930638909339905)
(6238, 'unknow', 'marvin', 0.5960043668746948)
(11784, 'unknow', 'marvin', 0.5960043668746948)
(7412, 'unknow', 'marvin', 0.603659987449646)
(3842, 'unknow', 'marvin', 0.636572003364563)
(9742, 'unknow', 'marvin', 0.6439694166183472)
(9484, 'unknow', 'marvin', 0.647192120552063)
(17556, 'marvin', 'unknow', 0.6553643941879272)
(4914, 'unknow', 'marvin', 0.6633752584457397)
(9535, 'unknow', 'marvin', 0.6696017980575562)
(12172, 'unknow', 'marvin', 0.6702904105186462)
(8812, 'unknow', 'marvin', 0.6907464265823364)
(736, 'marvin', 'unknow', 0.7040918469429016)
(5295, 'unknow', 'marvin', 0.710063099861145)
(4551, 'unknow', 'marvin', 0.7370499968528748)
(18315, 'marvin', 'unknow', 0.7713037133216858)
(12136, 'unknow', 'marvin', 0.8083779811859131)
(11182, 'unknow', 'marvin', 0.8126702904701233)
(5258, 'unk

# FAR per hour

In [None]:
restored_model.setup_multiple_test_data({'manifest_filepath': os.path.join('/content/far.json'), 'sample_rate': 22050, 'labels': ['marvin', 'unknow'], 'batch_size': 128, 'shuffle': False})

[NeMo I 2023-04-17 09:28:32 collections:298] Filtered duration for loading collection is 0.000000.
[NeMo I 2023-04-17 09:28:32 collections:301] Dataset loaded with 7198 items, total duration of  2.00 hours.
[NeMo I 2023-04-17 09:28:33 collections:303] # 7198 files loaded accounting to # 1 labels


In [None]:
far_dl = restored_model._test_dl
logits_far, labels_far = get_labels_logits(cpu_model, far_dl)

.........................................................
Finished extracting logits !
Logits: torch.Size([7198, 2]) Labels : torch.Size([7198])


In [None]:
# far accuracy 
acc = cpu_model._accuracy(logits=logits_far, labels=labels_far)
print("Accuracy : ", float(acc[0]*100))

Accuracy :  97.74937438964844


In [None]:
#far
incorrect_preds_far = indices_inc_lables(far_dl, logits_far, labels_far, cpu_model)

Num test samples : 7198
Num errors : 162


In [None]:
162 / 7198 * 60 * 60

81.0225062517366