In [1]:
from sklearn.metrics import multilabel_confusion_matrix, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import json

In [2]:
labels = ["backward", "forward", "right", "left", "down", "up", "go", "stop", "on", "off", "yes", "no", 
          "learn", "follow", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", 
          "nine", "bed", "bird", "cat", "dog", "happy", "house", "read", "write", "tree", "visual", "wow"]

In [3]:
def results(preds_path, labels, print_cm=False):
    # groundtruth and predicted 
    # labels
    y_true = []
    y_pred = []

    # opening a JSON file
    f = open(preds_path)

    # returns JSON object as 
    # a dictionary
    data = json.load(f)
    # iterating through the json list
    # and adding true and predicted labels
    for t, p in data.items():
        t = t.split('/')[-2]
 
        y_true.append(t)
        y_pred.append(p)

    # closing file
    f.close()
    # generate the classification report
    print(classification_report(y_true,y_pred, digits=4))
    
    if print_cm:
        # generate a confusion matrix in %
        cm = confusion_matrix(y_true, y_pred, labels=labels)
        cmn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100
        cmn = np.round(cmn,1)

        # plot the confusion matrix in a beautiful manner
        fig = plt.figure(figsize=(16, 16))
        ax= plt.subplot()
        sns.heatmap(cmn, annot=True, ax = ax, fmt=".1f", linewidth=.1, 
                    cmap='YlGn', cbar=False, square=True, linecolor='white')

        # labels, title, and ticks
        ax.set_xlabel('Predicted commands', fontsize=14)
        ax.xaxis.set_label_position('bottom')
        plt.xticks(rotation=90)
        ax.xaxis.set_ticklabels(labels, fontsize=12)
        ax.xaxis.tick_bottom()
        ax.set_ylabel('Actual commands', fontsize=14)
        ax.yaxis.set_ticklabels(labels, fontsize=12)
        plt.yticks(rotation=0)
        plt.title('Confusion Matrix', fontsize=16)
        plt.savefig("confusion_matrix.png")
        plt.show()

In [4]:
def results_2(preds_path, labels, print_cm=False):
    # groundtruth and predicted 
    # labels
    y_true = []
    y_pred = []

    # opening a JSON file
    f = open(preds_path)
    lang = preds_path.split('/')[1].split('_')[1]
    print(lang)
    # returns JSON object as 
    # a dictionary
    data = json.load(f)
    # iterating through the json list
    # and adding true and predicted labels
    for t, p in data.items():
        t = t.split('/')[-2]
        p = p.split('_')[0]
 
        y_true.append(t)
        y_pred.append(p)

    # closing file
    f.close()
    # generate the classification report
    print(classification_report(y_true,y_pred, digits=4))
    
    if print_cm:
        # generate a confusion matrix in %
        cm = confusion_matrix(y_true, y_pred, labels=labels)
        cmn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100
        cmn = np.round(cmn,1)

        # plot the confusion matrix in a beautiful manner
        fig = plt.figure(figsize=(16, 16))
        ax= plt.subplot()
        sns.heatmap(cmn, annot=True, ax = ax, fmt=".1f", linewidth=.1, 
                    cmap='YlGn', cbar=False, square=True, linecolor='white')

        # labels, title, and ticks
        ax.set_xlabel('Predicted commands', fontsize=14)
        ax.xaxis.set_label_position('bottom')
        plt.xticks(rotation=90)
        ax.xaxis.set_ticklabels(labels, fontsize=12)
        ax.xaxis.tick_bottom()
        ax.set_ylabel('Actual commands', fontsize=14)
        ax.yaxis.set_ticklabels(labels, fontsize=12)
        plt.yticks(rotation=0)
        plt.title('Confusion Matrix', fontsize=16)
        plt.savefig("confusion_matrix.png")
        plt.show()
    

In [5]:
!python3 inference.py --conf checkpoints/mono-35-ru/kwmlp_rscd.yaml \
                      --ckpt checkpoints/mono-35-ru/best.pth \
                      --inp test_data_ru/ \
                      --out outputs/mono_ru/ \
                      --lmap checkpoints/mono-35-ru/label_map.json \
                      --device cpu \
                      --batch_size 256 

100%|█████████████████████████████████████████████| 3/3 [00:01<00:00,  1.97it/s]
Saved preds to outputs/mono_ru/preds.json
[0m

In [6]:
results('outputs/mono_ru/preds.json', labels)

              precision    recall  f1-score   support

    backward     0.9524    1.0000    0.9756        20
         bed     0.9524    1.0000    0.9756        20
        bird     0.9000    0.9000    0.9000        20
         cat     1.0000    0.9500    0.9744        20
         dog     1.0000    1.0000    1.0000        20
        down     1.0000    0.9000    0.9474        20
       eight     1.0000    1.0000    1.0000        20
        five     1.0000    0.9500    0.9744        20
      follow     0.8947    0.8500    0.8718        20
     forward     0.8636    0.9500    0.9048        20
        four     1.0000    0.8500    0.9189        20
          go     0.9524    1.0000    0.9756        20
       happy     0.9048    0.9500    0.9268        20
       house     0.9500    0.9500    0.9500        20
       learn     0.9524    1.0000    0.9756        20
        left     0.9500    0.9500    0.9500        20
        nine     1.0000    1.0000    1.0000        20
          no     0.9000    

In [7]:
!python3 inference.py --conf checkpoints/mono-35-ru/kwmlp_rscd.yaml \
                      --ckpt checkpoints/mono-35-ru/best.pth \
                      --inp aug_test_data_ru/ \
                      --out outputs/aug_mono_ru/ \
                      --lmap checkpoints/mono-35-ru/label_map.json \
                      --device cpu \
                      --batch_size 256 

100%|███████████████████████████████████████████| 28/28 [00:15<00:00,  1.76it/s]
Saved preds to outputs/aug_mono_ru/preds.json
[0m

In [8]:
results('outputs/aug_mono_ru/preds.json', labels)

              precision    recall  f1-score   support

    backward     0.9343    0.9950    0.9637       200
         bed     0.9381    0.9850    0.9610       200
        bird     0.8873    0.9050    0.8960       200
         cat     0.9948    0.9500    0.9719       200
         dog     0.9899    0.9800    0.9849       200
        down     0.9721    0.8700    0.9182       200
       eight     0.9709    1.0000    0.9852       200
        five     1.0000    0.9300    0.9637       200
      follow     0.9105    0.8650    0.8872       200
     forward     0.8610    0.9600    0.9078       200
        four     0.9655    0.8400    0.8984       200
          go     0.9302    1.0000    0.9639       200
       happy     0.9139    0.9550    0.9340       200
       house     0.8821    0.9350    0.9078       200
       learn     0.9302    1.0000    0.9639       200
        left     0.9588    0.9300    0.9442       200
        nine     0.9848    0.9750    0.9799       200
          no     0.8883    

In [9]:
!python3 inference.py --conf checkpoints/multi-35/kwmlp_multi_35.yaml \
                      --ckpt checkpoints/multi-35/best.pth \
                      --inp test_data_ru/ \
                      --out outputs/multi_ru/ \
                      --lmap checkpoints/multi-35/label_map.json \
                      --device cpu \
                      --batch_size 256 

100%|█████████████████████████████████████████████| 3/3 [00:01<00:00,  2.05it/s]
Saved preds to outputs/multi_ru/preds.json
[0m

In [10]:
results('outputs/multi_ru/preds.json', labels)

              precision    recall  f1-score   support

    backward     0.9524    1.0000    0.9756        20
         bed     1.0000    1.0000    1.0000        20
        bird     0.9524    1.0000    0.9756        20
         cat     1.0000    1.0000    1.0000        20
         dog     0.9524    1.0000    0.9756        20
        down     0.8636    0.9500    0.9048        20
       eight     1.0000    0.9000    0.9474        20
        five     1.0000    1.0000    1.0000        20
      follow     0.9500    0.9500    0.9500        20
     forward     1.0000    0.9500    0.9744        20
        four     1.0000    0.9000    0.9474        20
          go     0.9524    1.0000    0.9756        20
       happy     1.0000    0.9500    0.9744        20
       house     0.9474    0.9000    0.9231        20
       learn     1.0000    1.0000    1.0000        20
        left     1.0000    0.9500    0.9744        20
        nine     1.0000    1.0000    1.0000        20
          no     0.9524    

In [11]:
!python3 inference.py --conf checkpoints/multi-35/kwmlp_multi_35.yaml \
                      --ckpt checkpoints/multi-35/best.pth \
                      --inp aug_test_data_ru/ \
                      --out outputs/aug_multi_ru/ \
                      --lmap checkpoints/multi-35/label_map.json \
                      --device cpu \
                      --batch_size 256 

100%|███████████████████████████████████████████| 28/28 [00:13<00:00,  2.13it/s]
Saved preds to outputs/aug_multi_ru/preds.json
[0m

In [12]:
results('outputs/aug_multi_ru/preds.json', labels)

              precision    recall  f1-score   support

    backward     0.9612    0.9900    0.9754       200
         bed     1.0000    0.9900    0.9950       200
        bird     0.9471    0.9850    0.9657       200
         cat     0.9852    1.0000    0.9926       200
         dog     0.9754    0.9900    0.9826       200
        down     0.9126    0.9400    0.9261       200
       eight     1.0000    0.9100    0.9529       200
        five     1.0000    1.0000    1.0000       200
      follow     0.9497    0.9450    0.9474       200
     forward     0.9947    0.9400    0.9666       200
        four     0.9830    0.8650    0.9202       200
          go     0.9346    1.0000    0.9662       200
       happy     0.9797    0.9650    0.9723       200
       house     0.9314    0.9500    0.9406       200
       learn     0.9901    1.0000    0.9950       200
        left     0.9894    0.9350    0.9614       200
        nine     0.9950    0.9900    0.9925       200
          no     0.9606    

In [13]:
!python3 inference.py --conf checkpoints/multi-140/kwmlp_multi_140.yaml \
                      --ckpt checkpoints/multi-140/best.pth \
                      --inp test_data_ru/ \
                      --out outputs/multi_ru_2/ \
                      --lmap checkpoints/multi-140/label_map.json \
                      --device cpu \
                      --batch_size 256 

100%|█████████████████████████████████████████████| 3/3 [00:01<00:00,  2.13it/s]
Saved preds to outputs/multi_ru_2/preds.json
[0m

In [14]:
results_2('outputs/multi_ru_2/preds.json', labels)

ru
              precision    recall  f1-score   support

    backward     0.9524    1.0000    0.9756        20
         bed     1.0000    1.0000    1.0000        20
        bird     0.9500    0.9500    0.9500        20
         cat     0.9524    1.0000    0.9756        20
         dog     1.0000    1.0000    1.0000        20
        down     0.9500    0.9500    0.9500        20
       eight     1.0000    0.9000    0.9474        20
        five     1.0000    1.0000    1.0000        20
      follow     0.9000    0.9000    0.9000        20
     forward     0.9444    0.8500    0.8947        20
        four     0.9500    0.9500    0.9500        20
          go     0.9524    1.0000    0.9756        20
       happy     1.0000    1.0000    1.0000        20
       house     0.9474    0.9000    0.9231        20
       learn     0.9524    1.0000    0.9756        20
        left     1.0000    0.9500    0.9744        20
        nine     0.9500    0.9500    0.9500        20
          no     1.0000 

In [15]:
!python3 inference.py --conf checkpoints/multi-140/kwmlp_multi_140.yaml \
                      --ckpt checkpoints/multi-140/best.pth \
                      --inp aug_test_data_ru/ \
                      --out outputs/aug_multi_ru_2/ \
                      --lmap checkpoints/multi-140/label_map.json \
                      --device cpu \
                      --batch_size 256                     

100%|███████████████████████████████████████████| 28/28 [00:15<00:00,  1.78it/s]
Saved preds to outputs/aug_multi_ru_2/preds.json
[0m

In [16]:
results_2('outputs/aug_multi_ru_2/preds.json', labels)

multi
              precision    recall  f1-score   support

    backward     0.9660    0.9950    0.9803       200
         bed     0.9802    0.9900    0.9851       200
        bird     0.9466    0.9750    0.9606       200
         cat     0.9569    1.0000    0.9780       200
         dog     0.9851    0.9950    0.9900       200
        down     0.9485    0.9200    0.9340       200
       eight     0.9895    0.9400    0.9641       200
        five     0.9899    0.9850    0.9875       200
      follow     0.9184    0.9000    0.9091       200
     forward     0.9514    0.8800    0.9143       200
        four     0.9474    0.9000    0.9231       200
          go     0.9479    1.0000    0.9732       200
       happy     0.9900    0.9900    0.9900       200
       house     0.9444    0.9350    0.9397       200
       learn     0.9706    0.9900    0.9802       200
        left     0.9794    0.9500    0.9645       200
        nine     0.9798    0.9700    0.9749       200
          no     0.98