In [4]:
import librosa
import numpy as np
import os

# Definir la ruta de la carpeta de sonidos
sounds_folder = 'dataset'

# Obtener la lista de archivos de audio y sus etiquetas
data = []
labels = []

# Definir los tipos de alarmas
alarm_types = ['Autisim mild','Autisim moderate', 'normal']

max_time_steps = 0

for alarm_type in alarm_types:
    folder_path = os.path.join(sounds_folder, alarm_type)
    for file in os.listdir(folder_path):
        if file.endswith('.wav'):
            file_path = os.path.join(folder_path, file)
            signal, sr = librosa.load(file_path, sr=None)
            spectrogram = librosa.feature.melspectrogram(y=signal, sr=sr)
            data.append(spectrogram)
            labels.append(alarm_type)
            max_time_steps = max(max_time_steps, spectrogram.shape[1])

# Convertir las listas a matrices numpy
num_samples = len(data)
num_mel_bins = data[0].shape[0]

# Crear un arreglo numpy tridimensional para los datos
data_padded = np.zeros((num_samples, num_mel_bins, max_time_steps))

# Rellenar el arreglo con los espectrogramas y sus longitudes reales
for i in range(num_samples):
    data_padded[i, :, :data[i].shape[1]] = data[i]

# Convertir las etiquetas a un arreglo numpy
labels = np.array(labels)

print('Shape of the data after padding:', data_padded.shape)
print('Shape of the labels:', labels.shape)


Shape of the data after padding: (422, 128, 2508)
Shape of the labels: (422,)


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
import numpy as np

# Encode the labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

In [6]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data_padded, labels_encoded, test_size=0.2, random_state=42)

# Expand the dimensions of the input data to add the channel
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

# Build the CNN model
model = Sequential([
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=X_train.shape[1:]),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(3, activation='softmax')  # 3 alarm classes
])

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Accuracy on test data:', test_acc)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy on test data: 0.9529411792755127


In [7]:
# Save the model
model.save("Sound.h5")



  saving_api.save_model(


In [5]:
from keras.models import load_model

# Load the model
loaded_model = load_model("sound.h5")


In [6]:
import librosa
import numpy as np
from keras.models import load_model
from playsound import playsound
from IPython.display import Audio

def print_prediction(file_name, play_audio=True, display_audio=True):
    # Load the audio file
    signal, sr = librosa.load(file_name, sr=None)
    # Extract the spectrogram
    spectrogram = librosa.feature.melspectrogram(y=signal, sr=sr)
    # Pad the spectrogram to match the input shape of the model
    padded_spectrogram = np.zeros((1, spectrogram.shape[0], data_padded.shape[2]))
    padded_spectrogram[0, :, :spectrogram.shape[1]] = spectrogram
    # Expand the dimensions to add the channel
    padded_spectrogram = np.expand_dims(padded_spectrogram, axis=-1)
    # Make predictions using the loaded model
    prediction = loaded_model.predict(padded_spectrogram)
    # Decode the predicted label
    predicted_label = label_encoder.inverse_transform([np.argmax(prediction)])
    print("Predicted alarm type:", predicted_label[0])
    
    # Check if display_audio is True
    if display_audio:
        # Display the audio widget
        display(Audio(file_name))
    
    # Check if play_audio is True
    if play_audio:
        # Play the sound
        playsound(file_name)

# Define the file name
file_name = r'C:\Users\Admin\Desktop\Ambulance_Project\1\sound_1.wav'
# Call the print_prediction function with play_audio set to False to suppress audio playback
print_prediction(file_name, play_audio=False)


  signal, sr = librosa.load(file_name, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\Admin\\Desktop\\Ambulance_Project\\1\\sound_1.wav'

In [7]:
pip install playsound

Collecting playsound
  Downloading playsound-1.3.0.tar.gz (7.7 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: playsound
  Building wheel for playsound (setup.py): started
  Building wheel for playsound (setup.py): finished with status 'done'
  Created wheel for playsound: filename=playsound-1.3.0-py3-none-any.whl size=7046 sha256=66d5981dbe86502e1e8af708892894dcfadcc9e20d8443b32854bd20ccedfa9e
  Stored in directory: c:\users\admin\appdata\local\pip\cache\wheels\50\98\42\62753a9e1fb97579a0ce2f84f7db4c21c09d03bb2091e6cef4
Successfully built playsound
Installing collected packages: playsound
Successfully installed playsound-1.3.0
Note: you may need to restart the kernel to use updated packages.
