In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [None]:
DATASET_PATH = "/content/drive/MyDrive/IOT/SoundClasification/Dataset"  # Change to your dataset path
SAMPLE_RATE = 16000
DURATION = 9  # second
N_MFCC = 13
TARGET_FRAMES = 260
NUM_CLASSES = 8  # Change if different
BATCH_SIZE = 16
EPOCHS = 20

In [None]:
# =============================
# CONFIG
# =============================
DATASET_PATH = "/content/drive/MyDrive/IOT/SoundClasification/AgumentedData"  # Change to your dataset path
SAMPLE_RATE = 16000
DURATION = 9  # second
N_MFCC = 13
TARGET_FRAMES = 260
NUM_CLASSES = 7  # Change if different
BATCH_SIZE = 16
EPOCHS = 20

# =============================
# Load and Preprocess Dataset
# =============================
def extract_mfcc(file_path):
    audio, sr = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)
    if len(audio) < SAMPLE_RATE * DURATION:
        padding = SAMPLE_RATE * DURATION - len(audio)
        audio = np.pad(audio, (0, padding))
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=N_MFCC)
    if mfcc.shape[1] < TARGET_FRAMES:
        pad_width = TARGET_FRAMES - mfcc.shape[1]
        mfcc = np.pad(mfcc, ((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :TARGET_FRAMES]
    return mfcc.reshape(N_MFCC, TARGET_FRAMES, 1)

from tqdm import tqdm  # Add this import at the top

def load_dataset():
    X, y = [], []
    class_names = sorted(os.listdir(DATASET_PATH))
    label_map = {name: idx for idx, name in enumerate(class_names)}

    total_files = sum([len(files) for _, _, files in os.walk(DATASET_PATH)])

    with tqdm(total=total_files, desc="Loading dataset", unit="file") as pbar:
        for class_name in class_names:
            class_dir = os.path.join(DATASET_PATH, class_name)
            for file in os.listdir(class_dir):
                if file.endswith(".wav"):
                    file_path = os.path.join(class_dir, file)
                    mfcc = extract_mfcc(file_path)
                    X.append(mfcc)
                    y.append(label_map[class_name])
                    pbar.update(1)

    X = np.array(X, dtype=np.float32)
    y = to_categorical(np.array(y), num_classes=NUM_CLASSES)
    return X, y, label_map


# =============================
# Model Definition
# =============================
def build_cnn_patch_embedding(input_shape=(13, 260, 1), embed_dim=256):
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=input_shape),
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')
    ])
    return model

# =============================
# Train the Model
# =============================
X, y, label_map = load_dataset()
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y)

model = build_cnn_patch_embedding()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# =============================
# Train the Model & Capture History
# =============================
history = model.fit(
    X_train, y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(X_val, y_val)
)

# =============================
# Print Final Accuracies
# =============================
final_train_acc = history.history['accuracy'][-1]
final_val_acc = history.history['val_accuracy'][-1]

print(f"Final Training Accuracy: {final_train_acc * 100:.2f}%")
print(f"Final Validation Accuracy: {final_val_acc * 100:.2f}%")


Loading dataset:  71%|███████   | 399/563 [03:58<01:38,  1.67file/s]


Epoch 1/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 188ms/step - accuracy: 0.1617 - loss: 57.8178 - val_accuracy: 0.2750 - val_loss: 1.7526
Epoch 2/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5147 - loss: 1.3391 - val_accuracy: 0.7750 - val_loss: 0.6651
Epoch 3/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9174 - loss: 0.3373 - val_accuracy: 0.7250 - val_loss: 0.6592
Epoch 4/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9513 - loss: 0.1400 - val_accuracy: 0.8375 - val_loss: 0.5076
Epoch 5/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9778 - loss: 0.0521 - val_accuracy: 0.8750 - val_loss: 0.3634
Epoch 6/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9876 - loss: 0.0230 - val_accuracy: 0.8000 - val_loss: 0.5628
Epoch 7/20
[1m20/20[0m [32m━━━━━━━

In [None]:
# model.save("/content/drive/MyDrive/IOT/SoundClasification/Model_keras/model_libarosa.h5")

# =============================
# Convert to TFLite
# =============================
def convert_to_tflite(model_path, output_path):
    model = tf.keras.models.load_model(model_path)
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    tflite_model = converter.convert()
    with open(output_path, "wb") as f:
        f.write(tflite_model)

convert_to_tflite("/content/drive/MyDrive/IOT/SoundClasification/Model_keras/model_libarosa.h5", "/content/drive/MyDrive/IOT/SoundClasification/Model_keras/model_libarosa.tflite")



Saved artifact at '/tmp/tmpjxrq61w6'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 13, 260, 1), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 8), dtype=tf.float32, name=None)
Captures:
  132676635141392: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132676635132368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132676635135440: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132676635135056: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132676635141584: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132676635135824: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132676635137552: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132676635143312: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132676635134864: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132676635145424: TensorSpec(shape=(), dtype=tf.resource, name=None)


In [4]:
# =============================
# CONFIG
# =============================
DATASET_PATH = "/content/drive/MyDrive/IOT/SoundClasification/AgumentedData"  # Change to your dataset path
SAMPLE_RATE = 16000
DURATION = 9  # second
N_MFCC = 13
TARGET_FRAMES = 260
NUM_CLASSES = 6  # Changed to 6 for your 6-class dataset
BATCH_SIZE = 16
EPOCHS = 20

# =============================
# Load and Preprocess Dataset
# =============================
def extract_mfcc(file_path):
    audio, sr = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)
    if len(audio) < SAMPLE_RATE * DURATION:
        padding = SAMPLE_RATE * DURATION - len(audio)
        audio = np.pad(audio, (0, padding))
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=N_MFCC)
    if mfcc.shape[1] < TARGET_FRAMES:
        pad_width = TARGET_FRAMES - mfcc.shape[1]
        mfcc = np.pad(mfcc, ((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :TARGET_FRAMES]
    return mfcc.reshape(N_MFCC, TARGET_FRAMES, 1)

from tqdm import tqdm  # Add this import at the top

def load_dataset():
    X, y = [], []
    class_names = sorted(os.listdir(DATASET_PATH))
    label_map = {name: idx for idx, name in enumerate(class_names)}

    # If you want to explicitly add a "no class" folder (even if empty), it could be added manually
    if 'no_class' not in class_names:
        os.makedirs(os.path.join(DATASET_PATH, 'no_class'))  # Create a dummy 'no_class' folder

    total_files = sum([len(files) for _, _, files in os.walk(DATASET_PATH)])

    with tqdm(total=total_files, desc="Loading dataset", unit="file") as pbar:
        for class_name in class_names:
            class_dir = os.path.join(DATASET_PATH, class_name)
            for file in os.listdir(class_dir):
                if file.endswith(".wav"):
                    file_path = os.path.join(class_dir, file)
                    mfcc = extract_mfcc(file_path)
                    X.append(mfcc)
                    y.append(label_map.get(class_name, NUM_CLASSES-1))  # Default to 'no class' label
                    pbar.update(1)

    X = np.array(X, dtype=np.float32)
    y = to_categorical(np.array(y), num_classes=NUM_CLASSES)
    return X, y, label_map

# =============================
# Model Definition
# =============================
def build_cnn_patch_embedding(input_shape=(13, 260, 1), embed_dim=256):
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=input_shape),
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')
    ])
    return model

# =============================
# Train the Model
# =============================
X, y, label_map = load_dataset()
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y)

model = build_cnn_patch_embedding()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# =============================
# Train the Model & Capture History
# =============================
history = model.fit(
    X_train, y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(X_val, y_val)
)

# =============================
# Print Final Accuracies
# =============================
final_train_acc = history.history['accuracy'][-1]
final_val_acc = history.history['val_accuracy'][-1]

print(f"Final Training Accuracy: {final_train_acc * 100:.2f}%")
print(f"Final Validation Accuracy: {final_val_acc * 100:.2f}%")


Loading dataset: 100%|██████████| 1196/1196 [00:44<00:00, 26.69file/s]


Epoch 1/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 62ms/step - accuracy: 0.4368 - loss: 20.8393 - val_accuracy: 0.8417 - val_loss: 0.4312
Epoch 2/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9162 - loss: 0.2632 - val_accuracy: 0.9458 - val_loss: 0.1495
Epoch 3/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9500 - loss: 0.1466 - val_accuracy: 0.9083 - val_loss: 0.2046
Epoch 4/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9577 - loss: 0.1278 - val_accuracy: 0.9417 - val_loss: 0.1438
Epoch 5/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9784 - loss: 0.0556 - val_accuracy: 0.9917 - val_loss: 0.0433
Epoch 6/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9934 - loss: 0.0192 - val_accuracy: 0.9917 - val_loss: 0.0308
Epoch 7/20
[1m60/60[0m [32m━━━━━━━━

In [5]:
model.save("/content/drive/MyDrive/IOT/SoundClasification/Model_keras/model_libarosa_1.h5")

# =============================
# Convert to TFLite
# =============================
def convert_to_tflite(model_path, output_path):
    model = tf.keras.models.load_model(model_path)
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    tflite_model = converter.convert()
    with open(output_path, "wb") as f:
        f.write(tflite_model)

convert_to_tflite("/content/drive/MyDrive/IOT/SoundClasification/Model_keras/model_libarosa_1.h5", "/content/drive/MyDrive/IOT/SoundClasification/Model_keras/model_libarosa_1.tflite")



Saved artifact at '/tmp/tmpk4q0jw31'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 13, 260, 1), dtype=tf.float32, name='input_layer_1')
Output Type:
  TensorSpec(shape=(None, 6), dtype=tf.float32, name=None)
Captures:
  135046512961168: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135046512965584: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135046512956560: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135046512960208: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135046516218320: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135046516211792: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135046516213328: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135046516212944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135046516214672: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135046516211984: TensorSpec(shape=(), dtype=tf.resource, name=None)
