In [1]:
import tensorflow as tf
import numpy as np
import librosa
import os

# Define the CNN model
def snore_detector():
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(128, 44, 1)),
        tf.keras.layers.Conv2D(16, (3,3), activation='relu', padding='same'),
        tf.keras.layers.MaxPooling2D((2,2)),
        tf.keras.layers.Conv2D(32, (3,3), activation='relu', padding='same'),
        tf.keras.layers.MaxPooling2D((2,2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# Preprocess the audio file into a spectrogram
def preprocess_audio(file_path):
    y, sr = librosa.load(file_path)
    spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
    spectrogram = librosa.power_to_db(spectrogram, ref=np.max)
    spectrogram = np.expand_dims(spectrogram, axis=-1)
    return spectrogram

# Load the data
def load_data(data_path):
    x_train = []
    y_train = []
    for folder_name in os.listdir(data_path):
        folder_path = os.path.join(data_path, folder_name)
        for file_name in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file_name)
            # Preprocess the audio files into spectrograms
            spectrogram = preprocess_audio(file_path)
            x_train.append(spectrogram)
            # Label snoring sounds as 1 and non-snoring sounds as 0
            if folder_name == 'snoring':
                y_train.append(1)
            else:
                y_train.append(0)
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    return x_train, y_train

# Split the data into training and testing sets
def split_data(x, y, test_size=0.2):
    indices = np.random.permutation(len(x))
    test_size = int(len(x) * test_size)
    x_train, y_train = x[indices[test_size:]], y[indices[test_size:]]
    x_test, y_test = x[indices[:test_size]], y[indices[:test_size]]
    return x_train, y_train, x_test, y_test

# Train the model
def train_model(model, x_train, y_train, x_test, y_test, epochs=10):
    model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=epochs)
    model.save('snoring_local.h5')
    return model

# Evaluate the model
def evaluate_model(model, x_test, y_test):
    loss, accuracy = model.evaluate(x_test, y_test)
    return loss, accuracy

# Run the program
if __name__ == '__main__':
    data_path = 'snoriing_1/'
    x, y = load_data(data_path)
    x_train, y_train, x_test, y_test = split_data(x, y)
    model = snore_detector()
    model = train_model(model, x_train, y_train, x_test, y_test)
    loss, accuracy = evaluate_model(model, x_test, y_test)
    print('Test loss:', loss)
    print('Test accuracy:', accuracy)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.1622239351272583
Test accuracy: 0.953177273273468


In [2]:
from tensorflow.keras.models import load_model

# Load the model from a file
model = load_model('snoring_local.h5')


In [16]:
import sounddevice as sd
from scipy.io.wavfile import write

# Sampling frequency
freq = 44100

# Duration of recording in seconds
duration = 1

# Start recording audio
print("Recording...")
audio = sd.rec(int(freq * duration), samplerate=freq, channels=1)

# Wait for recording to finish
sd.wait()

# Save audio to a WAV file
write("audio.wav", freq, audio)
print(f"Audio file saved to audio.wav")
x=preprocess_audio("audio.wav")
model.predict(x)

Recording...
Audio file saved to audio.wav


array([[1.8746925e-15]], dtype=float32)

In [15]:
def preprocess_audio(filename):
    # load audio file
    audio, sample_rate = librosa.load(filename, sr=16000)

    # extract features
    stft = np.abs(librosa.stft(audio, hop_length=512, n_fft=2048))
    mel = librosa.feature.melspectrogram(sr=sample_rate, S=stft**2)
    log_mel = librosa.power_to_db(mel)

    # normalize to mean=0, std=1
    mean = np.mean(log_mel)
    std = np.std(log_mel)
    norm_mel = (log_mel - mean) / std

    # resize to match expected input shape of model
    resized = cv2.resize(norm_mel, (44, 128), interpolation=cv2.INTER_AREA)
    return resized.reshape((1, 128, 44, 1))


In [9]:
import cv2 
