<a href="https://colab.research.google.com/github/Teja5164/Keyword-spotting-embedded-ML/blob/main/Keyword_Spotting_Embedded_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Install Libraries

In [None]:
!pip install numpy==1.23.5
!pip install tensorflow==2.12 librosa matplotlib scikit-learn

Download Dataset

In [None]:
import os
import tensorflow as tf
import pathlib

DATASET_PATH = tf.keras.utils.get_file(
    'speech_commands_v0.02.tar.gz',
    origin='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
    extract=True
)

DATASET_DIR = pathlib.Path(DATASET_PATH).parent / "speech_commands"
print("Dataset directory:", DATASET_DIR)

Selection of Keywords

In [None]:
# Pick 5 keywords

KEYWORDS = ['stop', 'go', 'left', 'right', 'yes']
# Background noise folder: '_background_noise_'

Preprocessing

In [None]:
import librosa
import numpy as np

def extract_mfcc(file_path, sr=16000, n_mfcc=13):
    y, sr = librosa.load(file_path, sr=sr)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return mfcc.T  # Shape (time_steps, n_mfcc)

Load dataset and prepare X,Y

In [None]:
import glob

X = []
y = []

for idx, kw in enumerate(KEYWORDS):
    files = glob.glob(str(DATASET_DIR / kw / '*.wav'))
    for f in files:
        mfcc = extract_mfcc(f)
        if mfcc.shape[0] >= 50:
            mfcc = mfcc[:50, :]  # Fix length
            X.append(mfcc)
            y.append(idx)

X = np.array(X)
y = np.array(y)

print("Data shape:", X.shape)

Build and train CNN modeling

In [None]:
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Input(shape=(50, 13, 1)),
    layers.Conv2D(32, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(KEYWORDS), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Reshape X for CNN
X_cnn = X[..., np.newaxis]

# Train model
model.fit(X_cnn, y, epochs=10, batch_size=32, validation_split=0.2)

Convert to TFLite Model

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save .tflite file
with open('kws_model.tflite', 'wb') as f:
    f.write(tflite_model)

print("TFLite model saved!")

Test TFLite model

In [None]:
interpreter = tf.lite.Interpreter(model_path='kws_model.tflite')
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Example test — using first sample
sample_input = X_cnn[0:1]

interpreter.set_tensor(input_details[0]['index'], sample_input)
interpreter.invoke()
output = interpreter.get_tensor(output_details[0]['index'])

predicted_label = np.argmax(output)
print("Predicted label:", KEYWORDS[predicted_label])