<a href="https://colab.research.google.com/github/MoAbner/Classificador-de-Audios/blob/main/Classificador_de_A%C3%BAdio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [28]:
!pip install librosa

# Instala a librosa



In [29]:
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Caminho da base
DATASET_PATH = "/content/Dataset/Audios"

# Par√¢metros
SAMPLE_RATE = 22050
MFCC_NUM = 13
MAX_PAD_LEN = 100


# Fun√ß√£o para extrair MFCC
def extract_features(file_path):
    audio, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)
    mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=MFCC_NUM)

    # Padding para tamanho fixo
    if mfcc.shape[1] < MAX_PAD_LEN:
        pad_width = MAX_PAD_LEN - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :MAX_PAD_LEN]

    return mfcc


# Carregar dados
X = []
y = []

for label in os.listdir(DATASET_PATH):
    folder_path = os.path.join(DATASET_PATH, label)

    # S√≥ continua se for uma pasta
    if not os.path.isdir(folder_path):
        continue

    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)

        # S√≥ processa arquivos .wav
        if file.endswith(".wav"):
            features = extract_features(file_path)
            X.append(features)
            y.append(label)


X = np.array(X)
y = np.array(y)

# Transformar labels em n√∫meros
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Separar treino e teste
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)

# Ajustar formato para CNN
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]

# Criar modelo
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(MFCC_NUM, MAX_PAD_LEN, 1)),

    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2,2)),

    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2,2)),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),

    tf.keras.layers.Dropout(0.5),  # üëà importante

    tf.keras.layers.Dense(3, activation='softmax')
])


model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Treinar
history = model.fit(
    X_train, y_train,
    epochs=20,
    validation_data=(X_test, y_test)
)

# Avaliar
loss, accuracy = model.evaluate(X_test, y_test)
print("Accuracy:", accuracy)


Epoch 1/20
[1m2/2[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m2s[0m 450ms/step - accuracy: 0.3318 - loss: 18.8224 - val_accuracy: 0.2857 - val_loss: 20.7324
Epoch 2/20
[1m2/2[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 174ms/step - accuracy: 0.4807 - loss: 14.9256 - val_accuracy: 0.4286 - val_loss: 13.0379
Epoch 3/20
[1m2/2[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 189ms/step - accuracy: 0.5759 - loss: 5.7485 - val_accuracy: 0.4286 - val_loss: 7.2977
Epoch 4/20
[1m2/2[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.4583 - loss: 5.1534 - val_accuracy: 0.5000 - val_loss: 3.0412
Epoch 5/20
[1m2/2[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 219ms/step - accuracy: 0.5461 - loss: 2.2676 - val_accuracy: 0.5000 - val_loss: 1.7

In [30]:
def prever_audio(caminho_audio):

    # Extrai caracter√≠sticas (mesma fun√ß√£o usada no treino)
    features = extract_features(caminho_audio)

    # Ajusta formato para o modelo
    features = features[np.newaxis, ..., np.newaxis]

    # Faz previs√£o
    prediction = model.predict(features)

    # Classe prevista
    classe_index = np.argmax(prediction)
    classe_nome = label_encoder.inverse_transform([classe_index])[0]

    # Probabilidades
    probabilidades = prediction[0]

    print("üîé Resultado da an√°lise:")
    print(f"Classe prevista: {classe_nome}")
    print("\nProbabilidades por classe:")

    for i, prob in enumerate(probabilidades):
        nome_classe = label_encoder.inverse_transform([i])[0]
        print(f"{nome_classe}: {prob:.4f}")

    return classe_nome


In [35]:
prever_audio("/content/Dataset/Testes/Teste.wav")


[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 41ms/step
üîé Resultado da an√°lise:
Classe prevista: Desligar

Probabilidades por classe:
Abrir: 0.0181
Desligar: 0.4922
Ligar: 0.4897


np.str_('Desligar')