# SC09 speech keyword detection benchmark model using the Time-Frequency TiFGAN features as input

In this notebook, we will train different classifiers on keyword detection on the SC09 (Speech commands from 0 to 9) dataset using the time-frequency features which are also used as an input to a TiFGAN.

### Import packages

In [1]:
import os

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import plot_confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, f1_score, precision_score, recall_score

### Define paths

In [2]:
data_dir = os.path.join("/media", "datastore", "c-matsty-data", "datasets", "SpeechCommands")

#### Training data paths

In [3]:
train_dir = os.path.join(data_dir, "SpeechCommands_Preproc_2_training")
train_input_path = os.path.join(train_dir, "input_data")
train_labels_path = os.path.join(train_dir, "labels")
train_actors_path = os.path.join(train_dir, "actors")

#### Test data paths

In [4]:
test_dir = os.path.join(data_dir, "SpeechCommands_Preproc_2_test")
test_input_path = os.path.join(test_dir, "input_data")
test_labels_path = os.path.join(test_dir, "labels")
test_actors_path = os.path.join(test_dir, "actors")

### Load data

In [5]:
def load_data(input_path, labels_path, actors_path):
    X = []
    Y = []
    A = []
    files = sorted(os.listdir(input_path))
    for input_file_name in os.listdir(input_path):
        file_name = input_file_name.split(".")[0]
        x = np.load(os.path.join(input_path, input_file_name))["logspecs"]
        y = np.load(os.path.join(labels_path, file_name + "_labels.npy"))[..., np.newaxis]
        actors = np.load(os.path.join(actors_path, file_name + "_actors.npy"))[..., np.newaxis]
        X.append(x)
        Y.append(y)
        A.append(actors)
    X = np.vstack(X)
    Y = np.vstack(Y)
    A = np.vstack(A)
    return X, Y, A

###### Load training data

In [6]:
X_tr, y_tr, actors_tr = load_data(train_input_path, train_labels_path, train_actors_path)

###### Load test data

In [7]:
X_ts, y_ts, actors_ts = load_data(test_input_path, test_labels_path, test_actors_path)

In [8]:
label_dict = {value: index  for index, value in enumerate(np.unique(y_tr))}

### Prepare data for training

#### Turn labels from strings to integer identifiers

In [9]:
y_tr = np.vectorize(label_dict.get)(y_tr)
y_ts = np.vectorize(label_dict.get)(y_ts)

#### Reshape input arrays and labels

In [10]:
X_tr = X_tr.reshape((X_tr.shape[0], X_tr.shape[1] * X_tr.shape[2]))
X_ts = X_ts.reshape((X_ts.shape[0], X_ts.shape[1] * X_ts.shape[2]))

In [11]:
y_tr = y_tr.flatten()
y_ts = y_ts.flatten()

### Normalize data

In [12]:
mean = X_tr.mean()
std = X_tr.std()
X_tr = (X_tr - mean) / std 
X_ts = (X_ts - mean) / std

### Define sample weights

In [13]:
class_counts = [len(y_tr[y_tr == i]) for i in label_dict.values()]
class_weights = [max(class_counts)/class_count for class_count in class_counts]
class_weight_dict = {class_idx: class_weight for class_idx, class_weight in zip(label_dict.values(), class_weights)}

In [14]:
label_dict

{'eight': 0,
 'five': 1,
 'four': 2,
 'nine': 3,
 'one': 4,
 'seven': 5,
 'six': 6,
 'three': 7,
 'two': 8,
 'zero': 9}

### Train and test logistic regression classifier

In [15]:
multi_class = 'multinomial'
model = LogisticRegression(multi_class=multi_class, max_iter=15000, random_state=0)

In [16]:
sample_weight = [class_weight_dict[label] for label in y_tr]

In [17]:
model = model.fit(X_tr, y_tr, sample_weight=sample_weight)

In [18]:
y_preds_lg = model.predict(X_ts)
print(classification_report(y_ts, y_preds_lg))
print(balanced_accuracy_score(y_ts, y_preds_lg))

              precision    recall  f1-score   support

           0       0.44      0.50      0.47       257
           1       0.52      0.53      0.52       270
           2       0.65      0.55      0.59       253
           3       0.50      0.49      0.50       259
           4       0.49      0.50      0.49       248
           5       0.53      0.61      0.57       239
           6       0.75      0.66      0.70       244
           7       0.49      0.46      0.48       267
           8       0.52      0.50      0.51       264
           9       0.61      0.63      0.62       250

    accuracy                           0.54      2551
   macro avg       0.55      0.54      0.55      2551
weighted avg       0.55      0.54      0.54      2551

0.5438286657138892


### Train and test RandomForest classifier

In [19]:
model = RandomForestClassifier(n_estimators=400, random_state=0)

In [20]:
model = model.fit(X_tr, y_tr, sample_weight=sample_weight)

In [21]:
y_preds_rf = model.predict(X_ts)
print(classification_report(y_ts, y_preds_rf))
print(balanced_accuracy_score(y_ts, y_preds_rf))

              precision    recall  f1-score   support

           0       0.77      0.84      0.80       257
           1       0.82      0.80      0.81       270
           2       0.74      0.79      0.76       253
           3       0.78      0.84      0.81       259
           4       0.78      0.81      0.80       248
           5       0.92      0.82      0.87       239
           6       0.90      0.83      0.86       244
           7       0.83      0.78      0.81       267
           8       0.74      0.77      0.75       264
           9       0.85      0.80      0.83       250

    accuracy                           0.81      2551
   macro avg       0.81      0.81      0.81      2551
weighted avg       0.81      0.81      0.81      2551

0.8087174901113162
