In [2]:
import os
import numpy as np
import librosa
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

In [3]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [4]:
DATASET_PATH = r'C:\Users\KIIT\Desktop\Projects\Beatbox Sound Identifier\BeatBox Dataset'
labels = []
features = []

for label in os.listdir(DATASET_PATH):
    folder = os.path.join(DATASET_PATH, label)
    if not os.path.isdir(folder):
        continue
    for file in os.listdir(folder):
        file_path = os.path.join(folder, file)
        y, sr = librosa.load(file_path, sr=16000)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        mfcc_scaled = np.mean(mfcc.T, axis=0)
        features.append(mfcc_scaled)
        labels.append(label)

  "cipher": algorithms.TripleDES,
  "class": algorithms.Blowfish,
  "class": algorithms.TripleDES,


In [5]:
X = np.array(features)
y = np.array(labels)

In [6]:
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
y_onehot = to_categorical(y_encoded)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

In [8]:
model = tf.keras.Sequential([
    tf.keras.Input(shape=(X.shape[1],)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(len(encoder.classes_), activation='softmax')
])

In [9]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [19]:
model.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test))

Epoch 1/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.9389 - loss: 0.1553 - val_accuracy: 0.9792 - val_loss: 0.0627
Epoch 2/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.9391 - loss: 0.1264 - val_accuracy: 0.9861 - val_loss: 0.0485
Epoch 3/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.9491 - loss: 0.1207 - val_accuracy: 0.9861 - val_loss: 0.0552
Epoch 4/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.9405 - loss: 0.1900 - val_accuracy: 0.9792 - val_loss: 0.0649
Epoch 5/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9542 - loss: 0.0968 - val_accuracy: 0.9792 - val_loss: 0.0396
Epoch 6/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9483 - loss: 0.1356 - val_accuracy: 0.9583 - val_loss: 0.0516
Epoch 7/30
[1m18/18[0m [32m━━━━

<keras.src.callbacks.history.History at 0x185dc3b8740>

In [12]:
import sounddevice as sd

In [18]:
import sounddevice as sd

def record_audio(duration=1, fs=16000):
    print("Listening...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
    sd.wait()
    return recording.flatten()

def predict_sound():
    audio = record_audio()
    mfcc = librosa.feature.mfcc(y=audio, sr=16000, n_mfcc=13)
    mfcc_scaled = np.mean(mfcc.T, axis=0).reshape(1, -1)
    prediction = model.predict(mfcc_scaled)
    predicted_label = encoder.inverse_transform([np.argmax(prediction)])
    print("Predicted:", predicted_label[0].upper())
predict_sound()

Listening...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
Predicted: CLOP


In [None]:
model.save("C:/Users/KIIT/Desktop/Projects/Beatbox Sound Identifier/beatbox_model.keras")