# Spoken commands example
This example uses an audio classifier model from a Tensorflow tutorial:
https://www.tensorflow.org/tutorials/sequences/audio_recognition

**N.B. This script downloads a large (2.3GB) speech commands dataset!**

In [None]:
import sys
sys.path.append('..')
from pathlib import Path
import tarfile
import shutil
import numpy as np
import pandas as pd
from scipy.io.wavfile import read, write
from sklearn.metrics import confusion_matrix
from src.problemgenerator.series import Series
from src.problemgenerator.tuple import Tuple
from src.problemgenerator.filters import ClipWAV, ApplyToTuple
from src.plotting.utils import visualize_confusion_matrix

In [None]:
# In this cell we download the dataset unless it is already present.
# If you have downloaded and extracted the dataset into a different directory,
# change the data_dir variable accordingly.

data_url = "https://storage.googleapis.com/download.tensorflow.org/data/speech_commands_v0.02.tar.gz"
fname = "speech_commands_v0.02.tar.gz"
data_dir = Path.home() / "datasets/speech_data"

if not data_dir.exists():
    !mkdir -p {data_dir}
    !wget {data_url} -P {data_dir}
    tarfile.open(data_dir / fname, "r:gz").extractall(data_dir)

In [None]:
trained_categories = ["yes", "no", "up", "down", "left", "right", "on", "off", "stop", "go"]
labels = ["_silence_", "_unknown_", "yes", "no", "up", "down", "left", "right", "on", "off", "stop", "go"]

test_set_rel_paths = !cat {data_dir / "testing_list.txt"}
test_set_files = [data_dir / p for p in test_set_rel_paths]
test_categories = !cut -d'/' -f1 {data_dir / "testing_list.txt"} | sort -u

len(test_set_files), len(test_categories), len(trained_categories)

In [None]:
# If your dpEmu folder is not located directly under your home directory,
# change the example_path variable accordingly.

example_path = Path.home() / "dpEmu/src/examples/speech_commands"

In [None]:
# Choose a category in which to generate errors.
# Later on we will generate errors in all of the test set categories.

category = "stop"
data_subset_dir = data_dir / category

fs = list(data_subset_dir.iterdir())
wavs = [read(f) for f in data_subset_dir.iterdir()]

In [None]:
# Create an error generating tree and generate errors
# in the category chosen above.

wav_node = Tuple()
wav_node.addfilter(ApplyToTuple(ClipWAV("dyn_range"), 1))
root_node = Series(wav_node)

err_params = {"dyn_range": .2}
clipped = root_node.generate_error(wavs, err_params)

In [None]:
example_index = 123  # Arbitrarily chosen speech command example – try changing the index!

In [None]:
clipped_filename = data_dir / 'clipped.wav'
write(clipped_filename, 16000, clipped[example_index][1])

In [None]:
!aplay {fs[example_index]}

In [None]:
!aplay {clipped_filename}

In [None]:
# Define a function to filter out irrelevant output (e.g. Python deprecation warnings)

def filter_scores(output):
    return [line for line in output if "score" in line or ".wav" in line]

In [None]:
# Run the model on the clean clip selected above.

scores_clean = !python {example_path}/label_wav.py \
--graph={example_path}/trained_model/my_frozen_graph.pb \
--labels={example_path}/trained_model/conv_labels.txt \
--wav={fs[example_index]}

filter_scores(scores_clean)

In [None]:
# Run the model on the corresponding errorified clip.

scores_clipped = !python {example_path}/label_wav.py \
--graph={example_path}/trained_model/my_frozen_graph.pb \
--labels={example_path}/trained_model/conv_labels.txt \
--wav={clipped_filename}

filter_scores(scores_clipped)

In [None]:
# You can also run the model on an entire directory of .wav files in one go

scores_clean_dir = !python {example_path}/label_wav_dir.py \
--graph={example_path}/trained_model/my_frozen_graph.pb \
--labels={example_path}/trained_model/conv_labels.txt \
--wav_dir={data_subset_dir}

filter_scores(scores_clean_dir)

In [None]:
# That was not pretty! We'd better define some helper functions to extract
# the model's guesses from that messy output.

def get_guesses(scores):
    scores = filter_scores(scores)
    if len(scores) % 4 != 0:
        raise ValueError(f"Expected scores list to have a length divisible by 4 after filtering but got length {len(scores)}")
    num_files = len(scores) / 4
    fnames = scores[0::4]
    guesses = [guess.split(' ')[0] for guess in scores[1::4]]
    return zip(fnames, guesses)

def score_directory(directory):
    scores = !python {example_path}/label_wav_dir.py \
        --graph={example_path}/trained_model/my_frozen_graph.pb \
        --labels={example_path}/trained_model/conv_labels.txt \
        --wav_dir={directory}
    return filter_scores(scores)

In [None]:
# Define a function to generate errors in all wav files in a given directory.
# If an inclusion list is provided, only files on the list will be processed.

def errorify_directory(data_root_dir, dir_name, tree_root, err_params, inclusion_list=None):
    clean_data_dir = data_root_dir / dir_name
    if not clean_data_dir.exists():
        raise ValueError(f"Directory {clean_data_dir} does not exist.")
    err_data_dir = data_root_dir / (dir_name + "_err")
    if not err_data_dir.exists():
        !mkdir {err_data_dir}
    if not inclusion_list:
        inclusion_list = [f for f in clean_data_dir.iterdir() if ".wav" in str(f)]
    for file in inclusion_list:
        fname = file.name
        wav = read(file)
        clipped = tree_root.generate_error([wav], err_params)[0]
        err_file_path = err_data_dir / fname
        write(err_file_path, clipped[0], clipped[1])
    return err_data_dir

In [None]:
# Define a function to generate errors in all wav files on a list.
# The function is needed when files from multiple categories are present on the list.
# To facilitate comparisons between clean and errorified data, the clean files
# the list can be automatically copied to suitably named directories. To do this,
# provide the parameter copy_clean=True.

def errorify_list(data_files, categories, tree_root, err_params, copy_clean=False):
    data_root_dir = data_files[0].parents[1]
    print(f"data root dir: {data_root_dir}")
    for cat in categories:
        files_in_cat = [f for f in data_files if (cat + "/") in str(f)]
        print("category:", cat)
        print(f"{len(files_in_cat)}")
        errorify_directory(data_root_dir, cat, tree_root, err_params, inclusion_list=files_in_cat)
        if copy_clean:
            copy_dir = data_root_dir / (cat + "_clean")
            !mkdir {copy_dir}
            for file in files_in_cat:
                shutil.copy(file, copy_dir)          

In [None]:
# Define a function to compare the model's guesses on clean and errorified data.
# The results are returned in a Pandas dataframe.

def compare(data_root, category, clean_ext="_clean", err_ext="_err"):
    scores_clean = score_directory(data_root / (category + clean_ext))
    guesses_clean = get_guesses(scores_clean)
    scores_err = score_directory(data_root / (category + err_ext))
    guesses_err = get_guesses(scores_err)
    df_clean = pd.DataFrame(guesses_clean, columns=["file", "clean_guess"])
    df_err = pd.DataFrame(guesses_err, columns=["file", "err_guess"])
    res = pd.merge(df_clean, df_err, on="file", how="inner")
    res['true_label'] = category
    return res

In [None]:
# Generate errors in all test set audio clips.

errorify_list(test_set_files, trained_categories, root_node, err_params, copy_clean=True)

In [None]:
# Run model on clean and errorified data.

results = [compare(data_dir, cat) for cat in trained_categories]
df = pd.concat(results)

In [None]:
# Create confusion matrices for clean and errorified date, respectively.

cm_clean = confusion_matrix(df['true_label'], df['clean_guess'], labels=labels)
cm_err = confusion_matrix(df['true_label'], df['err_guess'], labels=labels)

In [None]:
# Visualize the confusion matrix for clean data.

visualize_confusion_matrix(df, cm_clean, 0, labels, "dyn_range", "true_label", "clean_guess")

In [None]:
# Visualize the confusion matrix for errorified data.

visualize_confusion_matrix(df, cm_err, 0, labels, "dyn_range", "true_label", "err_guess")