In [2]:
# Training and saving the model
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Function to extract MFCC features from audio files
def extract_mfcc(audio_file, max_pad_len=174):
    audio, sr = librosa.load(audio_file, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)

    # Ensure the fixed length
    if mfccs.shape[1] > max_pad_len:
        mfccs = mfccs[:, :max_pad_len]
    else:
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')

    return mfccs

# Function to load and process the audio data
def load_data(data_folder):
    mfccs = []
    labels = []

    for folder in os.listdir(data_folder):
        label = 'Spark' if folder == 'Spark' else 'Not_Spark'
        folder_path = os.path.join(data_folder, folder)

        for file_name in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file_name)
            mfcc = extract_mfcc(file_path)
            mfccs.append(mfcc)
            labels.append(label)

    return np.array(mfccs), np.array(labels)

# Load and preprocess the data
data_folder = r"C:\Users\srika\Data"
X, y = load_data(data_folder)

# Convert labels to one-hot encoding
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_onehot = to_categorical(y_encoded)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

# Reshape data for CNN input
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

# Build the CNN model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(X_train.shape[1], X_train.shape[2], 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))  # Two classes: 'Spark' and 'Not_Spark'

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Save the model
model.save('spark_detection_model.h5')


Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100


Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Test Accuracy: 100.00%


  saving_api.save_model(


In [3]:
X_train.shape

(480, 40, 174, 1)

In [4]:
X_test.shape

(120, 40, 174, 1)

In [7]:
# Testing with small dataset
import librosa
import numpy as np
from keras.models import load_model

# Load the trained model
model = load_model('spark_detection_model.h5')

# Function to extract MFCC features from a new audio file
def extract_mfcc(audio_file, max_pad_len=174):
    audio, sr = librosa.load(audio_file, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)

    # Ensure the fixed length
    if mfccs.shape[1] > max_pad_len:
        mfccs = mfccs[:, :max_pad_len]
    else:
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')

    return mfccs.reshape(1, mfccs.shape[0], mfccs.shape[1], 1)

# Test a new audio file
test_audio_file = "C:/Users/srika/Data_Test/test44.wav"
test_mfcc = extract_mfcc(test_audio_file)

# Make a prediction
prediction = model.predict(test_mfcc)
predicted_class = np.argmax(prediction)

# Map the predicted class index back to the original label
class_labels = {0: 'Not_Spark', 1: 'Spark'}
predicted_label = class_labels[predicted_class]

print(f"Predicted class: {predicted_label}")
print(f"Prediction probabilities: {prediction}")

Predicted class: Spark
Prediction probabilities: [[2.613446e-24 1.000000e+00]]


In [9]:
import sounddevice as sd
import numpy as np
import librosa
from keras.models import load_model

# Load the trained model
model = load_model('spark_detection_model.h5')

# Function to extract MFCC features from audio data
def extract_mfcc(audio_data, sr, max_pad_len=174):
    mfccs = librosa.feature.mfcc(y=audio_data, sr=sr, n_mfcc=40)

    # Ensure the fixed length
    if mfccs.shape[1] > max_pad_len:
        mfccs = mfccs[:, :max_pad_len]
    else:
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')

    return mfccs.reshape(1, mfccs.shape[0], mfccs.shape[1], 1)

# Real-time audio recording and prediction
def real_time_prediction(device, channels=1, duration=6, sample_rate=22050):
    audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=channels, dtype=np.float32)
    print("Recording...")

    sd.wait()
    print("Recording complete")

    # Extract MFCC features
    mfcc = extract_mfcc(audio_data.flatten(), sr=sample_rate)

    # Make a prediction
    prediction = model.predict(mfcc)
    predicted_class = np.argmax(prediction)

    # Map the predicted class index back to the original label
    class_labels = {0: 'Not_Spark', 1: 'Spark'}
    predicted_label = class_labels[predicted_class]

    print(f"Predicted class: {predicted_label}")
    print(f"Prediction probabilities: {prediction}")

# Test real-time prediction
real_time_prediction(device=0, channels=1, duration=10, sample_rate=22050)


Recording...
Recording complete
Predicted class: Not_Spark
Prediction probabilities: [[0.997209   0.00279102]]
