<a href="https://colab.research.google.com/github/Abhinav-1604/gdsc-embedded-induction/blob/main/KWS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install librosa numpy soundfile tqdm




In [None]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
import tensorflow as tf
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from numpy._core.defchararray import center
SAMPLE_RATE = 16000
DURATION = 1.0
SAMPLES = int(SAMPLE_RATE*DURATION)
N_MFCC = 13
FRAME_RATE = int(0.03*SAMPLE_RATE)
HOP_LENGTH = int(0.02*SAMPLE_RATE)
def load_audio(path):
 audio, sr = librosa.load(path, sr=SAMPLE_RATE)
 if len(audio)<SAMPLES:
  audio = np.pad(audio, (0, SAMPLES - len(audio)))
 else:
  audio = audio[:SAMPLES]
 return audio
def extract_mfcc(audio):
    mfcc = librosa.feature.mfcc(
        y=audio,
        sr=SAMPLE_RATE,
        n_mfcc=N_MFCC,
        n_fft=FRAME_RATE,
        hop_length=HOP_LENGTH,
        center=False
    )
    return mfcc.T

DATASET_PATH = "/content/drive/MyDrive/dataset"
CLASSES = ["on", "off", "unknown"]
label_map = { label: idx for idx, label in enumerate(CLASSES) }
X = []
y = []
for label in CLASSES:
    folder = os.path.join(DATASET_PATH, label)
    files = os.listdir(folder)
    print(f"Processing: {label} ({len(files)} files)")
    for f in tqdm(files):
        path = os.path.join(folder, f)

        audio = load_audio(path)
        mfcc = extract_mfcc(audio)

        X.append(mfcc)
        y.append(label_map[label])

X = np.array(X)
y = np.array(y)
X = X[..., np.newaxis]

print(X.shape)
print(y.shape)


Processing: on (2367 files)


100%|██████████| 2367/2367 [01:08<00:00, 34.41it/s] 


Processing: off (2357 files)


100%|██████████| 2357/2357 [00:54<00:00, 43.46it/s] 


Processing: unknown (1519 files)


100%|██████████| 1519/1519 [00:33<00:00, 44.86it/s] 

(6243, 49, 13, 1)
(6243,)





In [None]:
from sklearn.model_selection import train_test_split

X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.30, random_state=42, shuffle=True
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.50, random_state=42, shuffle=True
)

print("Train:", X_train.shape)
print("Validation:", X_val.shape)
print("Test:", X_test.shape)


Train: (4370, 49, 13, 1)
Validation: (936, 49, 13, 1)
Test: (937, 49, 13, 1)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, Dense, GlobalAveragePooling2D, BatchNormalization, ReLU, Reshape
from tensorflow.keras.optimizers import Adam

def build_dscnn(input_shape=(49, 13, 1), num_classes=3):
    model = Sequential()

    # 1️⃣ Standard Conv layer (feature extractor)
    model.add(Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1),
                     padding='same', use_bias=False, input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(ReLU())

    # 2️⃣ Depthwise Separable Convolution blocks
    for filters in [64, 128, 128]:
        # Depthwise
        model.add(DepthwiseConv2D(kernel_size=(3, 3), strides=(1, 1),
                                  padding='same', use_bias=False))
        model.add(BatchNormalization())
        model.add(ReLU())

        # Pointwise
        model.add(Conv2D(filters=filters, kernel_size=(1, 1),
                         strides=(1, 1), padding='same', use_bias=False))
        model.add(BatchNormalization())
        model.add(ReLU())

    # 3️⃣ Global Average Pooling → removes time/freq dimensions
    model.add(GlobalAveragePooling2D())

    # 4️⃣ Final classifier
    model.add(Dense(num_classes, activation='softmax'))

    return model

model = build_dscnn()
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=64,
    epochs=25
)


Epoch 1/25
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 177ms/step - accuracy: 0.5121 - loss: 0.9622 - val_accuracy: 0.3814 - val_loss: 1.0867
Epoch 2/25
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.8237 - loss: 0.5221 - val_accuracy: 0.3814 - val_loss: 1.1251
Epoch 3/25
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9017 - loss: 0.3293 - val_accuracy: 0.3814 - val_loss: 1.1417
Epoch 4/25
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9317 - loss: 0.2385 - val_accuracy: 0.3814 - val_loss: 1.1387
Epoch 5/25
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9451 - loss: 0.1947 - val_accuracy: 0.4145 - val_loss: 1.0489
Epoch 6/25
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9503 - loss: 0.1714 - val_accuracy: 0.6538 - val_loss: 0.9197
Epoch 7/25
[1m69/69[0m [32m━━

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open("model_float.tflite", "wb") as f:
    f.write(tflite_model)

print("Float TFLite model saved as model_float.tflite")


Saved artifact at '/tmp/tmp2_5frg2b'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 49, 13, 1), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 3), dtype=tf.float32, name=None)
Captures:
  140197920173584: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140197920176464: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140197920177040: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140197920176080: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140197920176848: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140197920175696: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140198007260560: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140198007260944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140198007259984: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140198007259408: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140197920176656:

In [None]:
def representative_dataset():
    for i in range(200):
        sample = X_train[i:i+1].astype(np.float32)
        yield [sample]

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

int8_tflite_model = converter.convert()

with open("model_int8.tflite", "wb") as f:
    f.write(int8_tflite_model)

print("INT8 quantized TFLite model saved as model_int8.tflite")


Saved artifact at '/tmp/tmpq9zzq3kx'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 49, 13, 1), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 3), dtype=tf.float32, name=None)
Captures:
  140197920173584: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140197920176464: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140197920177040: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140197920176080: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140197920176848: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140197920175696: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140198007260560: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140198007260944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140198007259984: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140198007259408: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140197920176656:



INT8 quantized TFLite model saved as model_int8.tflite


In [None]:
import numpy as np
import tensorflow as tf

# Load model
interpreter = tf.lite.Interpreter(model_path="model_int8.tflite")
interpreter.allocate_tensors()

input_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.get_output_details()[0]["index"]

# Run inference on 1 sample
sample = X_test[1:2]

# Convert to int8 scale
input_details = interpreter.get_input_details()[0]
scale, zero_point = input_details["quantization"]
sample_int8 = (sample / scale + zero_point).astype(np.int8)

interpreter.set_tensor(input_index, sample_int8)
interpreter.invoke()

output = interpreter.get_tensor(output_index)
print("TFLite prediction:", output)
print("Predicted class:", np.argmax(output))


TFLite prediction: [[ 127 -128 -128]]
Predicted class: 0


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


In [None]:
!cp model_int8.tflite kws_model.tflite
!xxd -i kws_model.tflite > kws_model.h
