In [1]:
from sklearn.metrics import multilabel_confusion_matrix, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import json

In [2]:
labels = ["backward", "forward", "right", "left", "down", "up", "go", "stop", "on", "off", "yes", "no", 
          "learn", "follow", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", 
          "nine", "bed", "bird", "cat", "dog", "happy", "house", "read", "write", "tree", "visual", "wow"]

In [3]:
def results(preds_path, labels, print_cm=False):
    # groundtruth and predicted 
    # labels
    y_true = []
    y_pred = []

    # opening a JSON file
    f = open(preds_path)

    # returns JSON object as 
    # a dictionary
    data = json.load(f)
    # iterating through the json list
    # and adding true and predicted labels
    for t, p in data.items():
        t = t.split('/')[-2]
 
        y_true.append(t)
        y_pred.append(p)

    # closing file
    f.close()
    # generate the classification report
    print(classification_report(y_true,y_pred, digits=4))
    
    if print_cm:
        # generate a confusion matrix in %
        cm = confusion_matrix(y_true, y_pred, labels=labels)
        cmn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100
        cmn = np.round(cmn,1)

        # plot the confusion matrix in a beautiful manner
        fig = plt.figure(figsize=(16, 16))
        ax= plt.subplot()
        sns.heatmap(cmn, annot=True, ax = ax, fmt=".1f", linewidth=.1, 
                    cmap='YlGn', cbar=False, square=True, linecolor='white')

        # labels, title, and ticks
        ax.set_xlabel('Predicted commands', fontsize=14)
        ax.xaxis.set_label_position('bottom')
        plt.xticks(rotation=90)
        ax.xaxis.set_ticklabels(labels, fontsize=12)
        ax.xaxis.tick_bottom()
        ax.set_ylabel('Actual commands', fontsize=14)
        ax.yaxis.set_ticklabels(labels, fontsize=12)
        plt.yticks(rotation=0)
        plt.title('Confusion Matrix', fontsize=16)
        plt.savefig("confusion_matrix.png")
        plt.show()

In [4]:
def results_2(preds_path, labels, print_cm=False):
    # groundtruth and predicted 
    # labels
    y_true = []
    y_pred = []

    # opening a JSON file
    f = open(preds_path)
    lang = preds_path.split('/')[1].split('_')[1]
    print(lang)
    # returns JSON object as 
    # a dictionary
    data = json.load(f)
    # iterating through the json list
    # and adding true and predicted labels
    for t, p in data.items():
        t = t.split('/')[-2]
        p = p.split('_')[0]
 
        y_true.append(t)
        y_pred.append(p)

    # closing file
    f.close()
    # generate the classification report
    print(classification_report(y_true,y_pred, digits=4))
    
    if print_cm:
        # generate a confusion matrix in %
        cm = confusion_matrix(y_true, y_pred, labels=labels)
        cmn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100
        cmn = np.round(cmn,1)

        # plot the confusion matrix in a beautiful manner
        fig = plt.figure(figsize=(16, 16))
        ax= plt.subplot()
        sns.heatmap(cmn, annot=True, ax = ax, fmt=".1f", linewidth=.1, 
                    cmap='YlGn', cbar=False, square=True, linecolor='white')

        # labels, title, and ticks
        ax.set_xlabel('Predicted commands', fontsize=14)
        ax.xaxis.set_label_position('bottom')
        plt.xticks(rotation=90)
        ax.xaxis.set_ticklabels(labels, fontsize=12)
        ax.xaxis.tick_bottom()
        ax.set_ylabel('Actual commands', fontsize=14)
        ax.yaxis.set_ticklabels(labels, fontsize=12)
        plt.yticks(rotation=0)
        plt.title('Confusion Matrix', fontsize=16)
        plt.savefig("confusion_matrix.png")
        plt.show()
    

In [5]:
!python3 inference.py --conf checkpoints/mono-35-tt/kwmlp_tscd.yaml \
                      --ckpt checkpoints/mono-35-tt/best.pth \
                      --inp test_data_tt/ \
                      --out outputs/mono_tt/ \
                      --lmap checkpoints/mono-35-tt/label_map.json \
                      --device cpu \
                      --batch_size 256 

100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.05it/s]
Saved preds to outputs/mono_tt/preds.json
[0m

In [6]:
results('outputs/mono_tt/preds.json', labels)

              precision    recall  f1-score   support

    backward     1.0000    0.9667    0.9831        30
         bed     1.0000    1.0000    1.0000        30
        bird     1.0000    1.0000    1.0000        30
         cat     1.0000    1.0000    1.0000        30
         dog     0.9375    1.0000    0.9677        30
        down     0.9677    1.0000    0.9836        30
       eight     1.0000    1.0000    1.0000        30
        five     1.0000    0.9333    0.9655        30
      follow     1.0000    1.0000    1.0000        30
     forward     1.0000    1.0000    1.0000        30
        four     0.9655    0.9333    0.9492        30
          go     1.0000    0.9667    0.9831        30
       happy     1.0000    1.0000    1.0000        30
       house     0.9667    0.9667    0.9667        30
       learn     1.0000    1.0000    1.0000        30
        left     1.0000    0.9667    0.9831        30
        nine     1.0000    1.0000    1.0000        30
          no     0.9677    

In [7]:
!python3 inference.py --conf checkpoints/mono-35-tt/kwmlp_tscd.yaml \
                      --ckpt checkpoints/mono-35-tt/best.pth \
                      --inp aug_test_data_tt/ \
                      --out outputs/aug_mono_tt/ \
                      --lmap checkpoints/mono-35-tt/label_map.json \
                      --device cpu \
                      --batch_size 256 

100%|███████████████████████████████████████████| 42/42 [00:20<00:00,  2.05it/s]
Saved preds to outputs/aug_mono_tt/preds.json
[0m

In [8]:
results('outputs/aug_mono_tt/preds.json', labels)

              precision    recall  f1-score   support

    backward     0.9929    0.9367    0.9640       300
         bed     1.0000    0.9967    0.9983       300
        bird     0.9933    0.9933    0.9933       300
         cat     0.9901    1.0000    0.9950       300
         dog     0.9346    1.0000    0.9662       300
        down     0.9375    1.0000    0.9677       300
       eight     1.0000    0.9833    0.9916       300
        five     0.9861    0.9467    0.9660       300
      follow     1.0000    1.0000    1.0000       300
     forward     0.9801    0.9867    0.9834       300
        four     0.9589    0.9333    0.9459       300
          go     1.0000    0.9667    0.9831       300
       happy     1.0000    1.0000    1.0000       300
       house     0.9664    0.9600    0.9632       300
       learn     0.9967    0.9967    0.9967       300
        left     1.0000    0.9667    0.9831       300
        nine     0.9967    0.9933    0.9950       300
          no     0.9836    

In [9]:
!python3 inference.py --conf checkpoints/multi-35/kwmlp_multi_35.yaml \
                      --ckpt checkpoints/multi-35/best.pth \
                      --inp test_data_tt/ \
                      --out outputs/multi_tt/ \
                      --lmap checkpoints/multi-35/label_map.json \
                      --device cpu \
                      --batch_size 256 

100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.39it/s]
Saved preds to outputs/multi_tt/preds.json
[0m

In [10]:
results('outputs/multi_tt/preds.json', labels)

              precision    recall  f1-score   support

    backward     1.0000    1.0000    1.0000        30
         bed     0.9677    1.0000    0.9836        30
        bird     1.0000    1.0000    1.0000        30
         cat     1.0000    1.0000    1.0000        30
         dog     1.0000    1.0000    1.0000        30
        down     1.0000    1.0000    1.0000        30
       eight     1.0000    1.0000    1.0000        30
        five     1.0000    0.9667    0.9831        30
      follow     0.9677    1.0000    0.9836        30
     forward     1.0000    1.0000    1.0000        30
        four     1.0000    1.0000    1.0000        30
          go     1.0000    0.9667    0.9831        30
       happy     1.0000    1.0000    1.0000        30
       house     1.0000    1.0000    1.0000        30
       learn     1.0000    1.0000    1.0000        30
        left     1.0000    0.9333    0.9655        30
        nine     1.0000    1.0000    1.0000        30
          no     1.0000    

In [11]:
!python3 inference.py --conf checkpoints/multi-35/kwmlp_multi_35.yaml \
                      --ckpt checkpoints/multi-35/best.pth \
                      --inp aug_test_data_tt/ \
                      --out outputs/aug_multi_tt/ \
                      --lmap checkpoints/multi-35/label_map.json \
                      --device cpu \
                      --batch_size 256 

100%|███████████████████████████████████████████| 42/42 [00:19<00:00,  2.14it/s]
Saved preds to outputs/aug_multi_tt/preds.json
[0m

In [12]:
results('outputs/aug_multi_tt/preds.json', labels)

              precision    recall  f1-score   support

    backward     1.0000    0.9967    0.9983       300
         bed     0.9740    1.0000    0.9868       300
        bird     1.0000    1.0000    1.0000       300
         cat     1.0000    1.0000    1.0000       300
         dog     0.9967    1.0000    0.9983       300
        down     0.9967    1.0000    0.9983       300
       eight     0.9934    0.9967    0.9950       300
        five     0.9965    0.9600    0.9779       300
      follow     0.9868    1.0000    0.9934       300
     forward     0.9901    0.9967    0.9934       300
        four     0.9967    0.9967    0.9967       300
          go     0.9898    0.9733    0.9815       300
       happy     1.0000    1.0000    1.0000       300
       house     1.0000    0.9967    0.9983       300
       learn     0.9967    0.9967    0.9967       300
        left     1.0000    0.9500    0.9744       300
        nine     0.9967    0.9933    0.9950       300
          no     0.9967    

In [13]:
!python3 inference.py --conf checkpoints/multi-140/kwmlp_multi_140.yaml \
                      --ckpt checkpoints/multi-140/best.pth \
                      --inp test_data_tt/ \
                      --out outputs/multi_tt_2/ \
                      --lmap checkpoints/multi-140/label_map.json \
                      --device cpu \
                      --batch_size 256 

100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.36it/s]
Saved preds to outputs/multi_tt_2/preds.json
[0m

In [14]:
results_2('outputs/multi_tt_2/preds.json', labels)

tt
              precision    recall  f1-score   support

    backward     1.0000    1.0000    1.0000        30
         bed     1.0000    1.0000    1.0000        30
        bird     1.0000    1.0000    1.0000        30
         cat     1.0000    1.0000    1.0000        30
         dog     1.0000    1.0000    1.0000        30
        down     1.0000    1.0000    1.0000        30
       eight     1.0000    1.0000    1.0000        30
        five     0.9667    0.9667    0.9667        30
      follow     1.0000    1.0000    1.0000        30
     forward     0.9375    1.0000    0.9677        30
        four     1.0000    1.0000    1.0000        30
          go     0.9667    0.9667    0.9667        30
       happy     1.0000    1.0000    1.0000        30
       house     1.0000    1.0000    1.0000        30
       learn     1.0000    1.0000    1.0000        30
        left     1.0000    0.9333    0.9655        30
        nine     1.0000    1.0000    1.0000        30
          no     1.0000 

In [15]:
!python3 inference.py --conf checkpoints/multi-140/kwmlp_multi_140.yaml \
                      --ckpt checkpoints/multi-140/best.pth \
                      --inp aug_test_data_tt/ \
                      --out outputs/aug_multi_tt_2/ \
                      --lmap checkpoints/multi-140/label_map.json \
                      --device cpu \
                      --batch_size 256                     

100%|███████████████████████████████████████████| 42/42 [00:18<00:00,  2.23it/s]
Saved preds to outputs/aug_multi_tt_2/preds.json
[0m

In [16]:
results_2('outputs/aug_multi_tt_2/preds.json', labels)

multi
              precision    recall  f1-score   support

    backward     1.0000    0.9933    0.9967       300
         bed     1.0000    0.9867    0.9933       300
        bird     1.0000    0.9967    0.9983       300
         cat     1.0000    1.0000    1.0000       300
         dog     0.9934    1.0000    0.9967       300
        down     0.9934    1.0000    0.9967       300
       eight     0.9934    0.9967    0.9950       300
        five     0.9666    0.9633    0.9649       300
      follow     0.9967    1.0000    0.9983       300
     forward     0.9550    0.9900    0.9722       300
        four     1.0000    0.9900    0.9950       300
          go     0.9731    0.9633    0.9682       300
       happy     1.0000    1.0000    1.0000       300
       house     1.0000    0.9833    0.9916       300
       learn     1.0000    1.0000    1.0000       300
        left     1.0000    0.9433    0.9708       300
        nine     0.9967    0.9967    0.9967       300
          no     0.99