In [1]:
import tensorflow as tf
import numpy as np
import librosa
import os
import pyaudio
import wave
import scipy.io.wavfile as wav
import numpy as np


In [None]:
def snore_detector():
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(128, 44, 1)),
        tf.keras.layers.Conv2D(16, (3,3), activation='relu', padding='same'),
        tf.keras.layers.MaxPooling2D((2,2)),
        tf.keras.layers.Conv2D(32, (3,3), activation='relu', padding='same'),
        tf.keras.layers.MaxPooling2D((2,2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# Preprocess the audio file into a spectrogram
def preprocess_audio(file_path):
    y, sr = librosa.load(file_path)
    spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
    spectrogram = librosa.power_to_db(spectrogram, ref=np.max)
    spectrogram = np.expand_dims(spectrogram, axis=-1)
    return spectrogram

# Load the data
def load_data(data_path):
    x_train = []
    y_train = []
    for folder_name in os.listdir(data_path):
        folder_path = os.path.join(data_path, folder_name)
        for file_name in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file_name)
            # Preprocess the audio files into spectrograms
            spectrogram = preprocess_audio(file_path)
            x_train.append(spectrogram)
            # Label snoring sounds as 1 and non-snoring sounds as 0
            if folder_name == 'snoring':
                y_train.append(1)
            else:
                y_train.append(0)
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    return x_train, y_train

# Split the data into training and testing sets
def split_data(x, y, test_size=0.2):
    indices = np.random.permutation(len(x))
    test_size = int(len(x) * test_size)
    x_train, y_train = x[indices[test_size:]], y[indices[test_size:]]
    x_test, y_test = x[indices[:test_size]], y[indices[:test_size]]
    return x_train, y_train, x_test, y_test

# Train the model
def train_model(model, x_train, y_train, x_test, y_test, epochs=10):
    model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=epochs)
    model.save('snoring_local.h5')
    return model

# Evaluate the model
def evaluate_model(model, x_test, y_test):
    loss, accuracy = model.evaluate(x_test, y_test)
    return loss, accuracy

# Run the program
if __name__ == '__main__':
    data_path = 'snoriing_1/'
    x, y = load_data(data_path)
    x_train, y_train, x_test, y_test = split_data(x, y)
    model = snore_detector()
    model = train_model(model, x_train, y_train, x_test, y_test)
    loss, accuracy = evaluate_model(model, x_test, y_test)
    print('Test loss:', loss)
    print('Test accuracy:', accuracy)


In [221]:
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = 1024
RECORD_SECONDS = 1
WAVE_OUTPUT_FILENAME = "output.wav"

audio = pyaudio.PyAudio()

stream = audio.open(format=FORMAT, channels=CHANNELS,
                rate=RATE, input=True,
                frames_per_buffer=CHUNK)

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

stream.stop_stream()
stream.close()
audio.terminate()

wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(audio.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()

# Read the wave file
sample_rate, data = wav.read('output.wav')

# Calculate the current number of frames
num_frames = len(data)

# Calculate the number of frames to add
frames_to_add = 44100 - num_frames

# Create a zero-filled array with the same number of channels
padding = np.zeros((frames_to_add, 44032))
z=np.zeros(padding.shape[0])
# Concatenate the padding with the original data
padded_data = np.concatenate((data,z), axis=0)

# Write the padded data to a new wave file
wav.write('my_padded_wave_file.wav', sample_rate, padded_data)
model = tf.keras.models.load_model('snoring_local.h5')
def preprocess_audio(file_path):
    y, sr = librosa.load(file_path)
    spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
    spectrogram = librosa.power_to_db(spectrogram, ref=np.max)
    spectrogram = np.expand_dims(spectrogram, axis=-1)
    return spectrogram

model = tf.keras.models.load_model('snoring_local.h5')

new_audio_file_path = 'my_padded_wave_file.wav'
new_spectrogram = preprocess_audio(new_audio_file_path)

# Make a prediction
prediction = model.predict(np.array([new_spectrogram]))
if prediction[0][0] > 0.01:
    print('The audio file contains snoring.',prediction)
else:
    print('The audio file does not contain snoring.',prediction)


The audio file does not contain snoring. [[0.00072291]]


In [186]:
model = tf.keras.models.load_model('snoring_local.h5')
def preprocess_audio(file_path):
    y, sr = librosa.load(file_path)
    spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
    spectrogram = librosa.power_to_db(spectrogram, ref=np.max)
    spectrogram = np.expand_dims(spectrogram, axis=-1)
    return spectrogram

model = tf.keras.models.load_model('snoring_local.h5')

new_audio_file_path = 'my_padded_wave_file.wav'
new_spectrogram = preprocess_audio(new_audio_file_path)

# Make a prediction
prediction = model.predict(np.array([new_spectrogram]))
if prediction[0][0] > 0.01:
    print('The audio file contains snoring.',prediction)
else:
    print('The audio file does not contain snoring.',prediction)


The audio file does not contain snoring. [[0.00185027]]


In [None]:


while True:
    # your code here
    if keyboard.is_pressed('a'):
        break