In [1]:
import os
import numpy as np
import librosa
import librosa.display
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
from tensorflow.keras.models import Sequential
import matplotlib.pyplot as plt


In [2]:
DATASET_PATH = "dataset/"

# Detect only folders (ignore .DS_Store)
class_names = sorted([
    d for d in os.listdir(DATASET_PATH)
    if os.path.isdir(os.path.join(DATASET_PATH, d))
])

print("Classes detected:", class_names)

# Save class names
np.save("class_names.npy", np.array(class_names))


Classes detected: ['baby_cry', 'door_open', 'glass', 'gunshot', 'man_shout', 'silence', 'women_shout']


In [3]:
def extract_features(path):
    audio, sr = librosa.load(path, sr=22050)
    mel = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128)
    mel_db = librosa.power_to_db(mel, ref=np.max)
    mel_db = librosa.util.fix_length(mel_db, size=128, axis=1)
    return mel_db


In [4]:
X = []
Y = []

for idx, cls in enumerate(class_names):
    folder = os.path.join(DATASET_PATH, cls)
    print("Loading:", cls)

    for f in os.listdir(folder):
        if f.startswith("."):
            continue

        file_path = os.path.join(folder, f)

        try:
            mel = extract_features(file_path)
            X.append(mel)
            Y.append(idx)
        except Exception as e:
            print("Skipped:", f, "| Error:", e)

X = np.array(X)
Y = np.array(Y)

print("Dataset loaded!")
print("X shape:", X.shape)
print("Y shape:", Y.shape)

np.save("X.npy", X)
np.save("Y.npy", Y)


Loading: baby_cry
Loading: door_open
Loading: glass
Loading: gunshot
Loading: man_shout
Loading: silence
Loading: women_shout
Dataset loaded!
X shape: (58, 128, 128)
Y shape: (58,)


In [5]:
# Normalize and add 3 channels
X_norm = X / 255.0
X_norm = np.stack([X_norm, X_norm, X_norm], axis=-1)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_norm, Y, test_size=0.2, random_state=42
)

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)


Train shape: (46, 128, 128, 3)
Test shape: (12, 128, 128, 3)


In [6]:
model = Sequential([
    Conv2D(32, (3,3), activation="relu", input_shape=(128,128,3)),
    MaxPool2D(2,2),

    Conv2D(64, (3,3), activation="relu"),
    MaxPool2D(2,2),

    Conv2D(128, (3,3), activation="relu"),
    MaxPool2D(2,2),

    Flatten(),
    Dense(128, activation="relu"),
    Dense(len(class_names), activation="softmax")
])

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_test, y_test)
)


Epoch 1/20


In [None]:
plt.figure(figsize=(12,5))

# Accuracy graph
plt.subplot(1,2,1)
plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.title("Accuracy")
plt.legend(["Train", "Validation"])

# Loss graph
plt.subplot(1,2,2)
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.title("Loss")
plt.legend(["Train", "Validation"])

plt.show()


In [None]:
model.save("models/sound_model.h5")
print("Model saved successfully!")


In [None]:
import tensorflow as tf
tf.config.list_physical_devices()


In [None]:
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


In [None]:
import os

for c in os.listdir("dataset"):
    print(c, "→", len(os.listdir("dataset/" + c)))


In [1]:
import numpy as np
import librosa
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
import matplotlib.pyplot as plt


In [2]:
DATASET_PATH = "dataset/"

class_names = sorted(os.listdir(DATASET_PATH))
class_names = [c for c in class_names if not c.startswith(".")]

print("Detected classes:", class_names)


Detected classes: ['baby_cry', 'door_open', 'fight', 'glass', 'gunshot', 'man_shout', 'silence', 'women_shout']


In [3]:
X = []
y = []

for idx, cname in enumerate(class_names):
    folder = os.path.join(DATASET_PATH, cname)
    for file in os.listdir(folder):
        if file.endswith(".wav") or file.endswith(".mp3"):
            path = os.path.join(folder, file)

            audio, sr = librosa.load(path, sr=22050)

            mel = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128)
            mel_db = librosa.power_to_db(mel, ref=np.max)
            mel_db = librosa.util.fix_length(mel_db, size=128, axis=1)

            mel_db = mel_db / 255.0

            mel_db = np.stack([mel_db, mel_db, mel_db], axis=-1)

            X.append(mel_db)
            y.append(idx)

X = np.array(X)
y = np.array(y)

print("Feature shape:", X.shape)
print("Labels shape:", y.shape)


Feature shape: (65, 128, 128, 3)
Labels shape: (65,)


In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=True
)

y_train = to_categorical(y_train, num_classes=len(class_names))
y_test = to_categorical(y_test, num_classes=len(class_names))


In [5]:
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(128,128,3)),
    MaxPooling2D((2,2)),

    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D((2,2)),

    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D((2,2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(len(class_names), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=20,
    batch_size=16
)


Epoch 1/20


In [None]:
model.save("models/sound_model.h5")
np.save("class_names.npy", np.array(class_names))
