In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.callbacks import EarlyStopping
from keras.utils.vis_utils import plot_model
from tensorflow.keras import regularizers
import cv2
from keras.callbacks import EarlyStopping

In [None]:
# Definir o caminho das pastas de treinamento, teste e classificação
train_dir = 'treinamentov2'
test_dir = 'testev2'
predict_dir = 'validacaov2'

# Definir o tamanho do espectrograma
spec_height = 128
spec_width = 128

# Definir o número de categorias
num_classes = 4

# Definir o tamanho do batch e o número de épocas
batch_size = 32
#100
epochs = 200
learningRate = 0.00008


In [None]:
import librosa


# Definir a função para carregar os dados de áudio e criar os espectrogramas
def load_audio_data(directory, height, width):
    X = []
    y = []
    for folder in os.listdir(directory):
        subfolder = os.path.join(directory, folder)
        for filename in os.listdir(subfolder):
            filepath = os.path.join(subfolder, filename)
            label = folder
            y.append(label)
            spectrogram = create_spectrogram(filepath, height, width)
            X.append(spectrogram)
    X = np.array(X)
    y = np.array(y)
    return X, y

# Definir a função para criar os espectrogramas
def create_spectrogram(filepath, height, width):
    # Carregar o arquivo de áudio com o librosa
    signal, sr = librosa.load(filepath, sr=22050)

    # Extrair o espectrograma mel
    spectrogram = librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=height, n_fft=1024, hop_length=512)

    # Converter o espectrograma em dB
    spectrogram = librosa.power_to_db(spectrogram, ref=np.max)

    # Expandir a dimensão do canal
    spectrogram = np.expand_dims(spectrogram, axis=-1)

    # Redimensionar o espectrograma
    spectrogram = cv2.resize(spectrogram, (width, height))

    return spectrogram


# Carregar os dados de treinamento e teste
X_train, y_train = load_audio_data(train_dir, spec_height, spec_width)
X_test, y_test = load_audio_data(test_dir, spec_height, spec_width)


In [None]:
# Codificar as categorias de saída
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)
y_train_onehot = to_categorical(y_train_encoded)
y_test_onehot = to_categorical(y_test_encoded)

# Dividir o conjunto de treinamento em treinamento e validação
X_train, X_val, y_train_onehot, y_val_onehot = train_test_split(X_train, y_train_onehot, test_size=0.2, random_state=42)

# Normalizar os espectrogramas
X_train_norm = X_train / 255.0
X_test_norm = X_test / 255.0
X_val_norm = X_val / 255.0


In [None]:
height = 128
width = 128

model = Sequential()
model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=(height, width, 1)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.3))

model.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.4))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))


opt = Adam(learning_rate=learningRate)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

model.summary()


In [None]:
#0.0001
optimizer = Adam(learning_rate=learningRate)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto')

history = model.fit(X_train_norm, y_train_onehot, batch_size=batch_size, epochs=epochs, validation_data=(X_val_norm, y_val_onehot), callbacks=[early_stopping])


In [None]:
loss, accuracy = model.evaluate(X_test_norm, y_test_onehot, batch_size=batch_size)

print(f'Test loss: {loss:.4f}')
print(f'Test accuracy: {accuracy:.4f}')


In [None]:
import os

# Carregar os dados de classificação
X_predict, y_predict = load_audio_data(predict_dir, spec_height, spec_width)

# Normalizar os espectrogramas
X_predict_norm = X_predict / 255.0

# Fazer as previsões
y_predict_onehot = model.predict(X_predict_norm)

# Converter as previsões em classes
y_predict_encoded = np.argmax(y_predict_onehot, axis=1)
y_predict_labels = le.inverse_transform(y_predict_encoded)

# Imprimir as previsões
predict_files = os.listdir(predict_dir)
for i in range(len(y_predict_labels)):
    print(f'{predict_files[i]}: {y_predict_labels[i]}')


In [None]:
# Salvar o modelo treinado em disco
model.save('meu_modelo.h5')

In [None]:
import numpy as np
from keras.models import load_model
import librosa

# Carregar o modelo
model = load_model('meu_modelo.h5')

# Carregar o áudio
audio, sr = librosa.load('treinamentov2/Choro/0a983cd2-0078-4698-a048-99ac01eb167a-1433917038889-1.7-f-04-hu.wav')

# Pré-processar o áudio
spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128, fmax=8000)
spectrogram = librosa.power_to_db(spectrogram, ref=np.max)

# Ajustar o tamanho do espectrograma para a forma esperada pelo modelo
expected_shape = (128, 128)
if spectrogram.shape[1] > expected_shape[1]:
    spectrogram = spectrogram[:, :expected_shape[1]]
else:
    spectrogram = np.pad(spectrogram, ((0, 0), (0, expected_shape[1] - spectrogram.shape[1])), mode='constant', constant_values=0)

spectrogram = np.expand_dims(spectrogram, axis=-1)

# Fazer a previsão
class_names = ['Barulho', 'Choro', 'Risada', 'Silencio']

preds = model.predict(np.array([spectrogram]))

# Obter a classe com maior probabilidade
class_idx = np.argmax(preds, axis=1)[0]

# Mapear o índice da classe para o rótulo da classe
class_label = class_names[class_idx]

print('O áudio é da classe: ', class_label)