In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import librosa
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

SELECTED_CLASSES = ["yes", "no", "stop", "go", "right"]

dataset, info = tfds.load(
    "speech_commands",
    split="train",
    as_supervised=True,
    with_info=True
)

label_names   = info.features["label"].names
class_indices = [label_names.index(word) for word in SELECTED_CLASSES]

def extract_mfcc(audio, sample_rate=16000, max_pad_len=40):
    audio  = audio.astype(np.float32)
    mfccs  = librosa.feature.mfcc(y=audio.flatten(), sr=sample_rate, n_mfcc=40)
    pad_w  = max_pad_len - mfccs.shape[1]
    if pad_w > 0:
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_w)), mode="constant")
    else:
        mfccs = mfccs[:, :max_pad_len]
    return mfccs.flatten()

x, y = [], []

for audio, label in tfds.as_numpy(dataset.take(10000)):
    if label in class_indices:
        x.append(extract_mfcc(audio))
        y.append(class_indices.index(label))

x = np.array(x, dtype=np.float32)
y = np.array(y, dtype=np.int64)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42, stratify=y
)

model = tf.keras.Sequential([
    tf.keras.layers.Dense(512, activation="relu", input_shape=(x_train.shape[1],)),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(256, activation="relu"),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(5, activation="softmax")
])

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

history = model.fit(
    x_train,
    y_train,
    epochs=20,
    batch_size=32,
    validation_data=(x_test, y_test)
)

model.save("speech_command_model.h5")

test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"\nTest Accuracy: {test_acc:.2f}")

y_pred         = model.predict(x_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print("\nClassification Report:")
print(classification_report(y_test, y_pred_classes, target_names=SELECTED_CLASSES))

conf_matrix = confusion_matrix(y_test, y_pred_classes)

plt.figure(figsize=(8, 6))
sns.heatmap(
    conf_matrix,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=SELECTED_CLASSES,
    yticklabels=SELECTED_CLASSES
)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

ModuleNotFoundError: No module named 'tensorflow_datasets'