<a href="https://colab.research.google.com/github/ReiAkio/AI_Project_Semester2/blob/main/Maua_AI_Projeto_Semestre2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Filtro dos dados

### Import das Bibliotecas

In [2]:
import librosa
import numpy as np
import os
import re
import wave


from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier


import requests
import json

### Utlização dos dados

In [3]:
!apt-get install git
!git clone https://github.com/ReiAkio/AI_Project_Semester2.git

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git is already the newest version (1:2.34.1-1ubuntu1.10).
0 upgraded, 0 newly installed, 0 to remove and 19 not upgraded.
Cloning into 'AI_Project_Semester2'...
remote: Enumerating objects: 171, done.[K
remote: Counting objects: 100% (171/171), done.[K
remote: Compressing objects: 100% (166/166), done.[K
remote: Total 171 (delta 1), reused 165 (delta 1), pack-reused 0[K
Receiving objects: 100% (171/171), 27.78 MiB | 14.66 MiB/s, done.
Resolving deltas: 100% (1/1), done.
Updating files: 100% (163/163), done.


### Carregar arquivos de audio do path especificado e aplicar STFT

In [4]:
def process_audio_file(file_path):
    # Carregar o arquivo de áudio
    y, sr = librosa.load(file_path, mono=True)
    # Calcular o STFT
    stft = np.abs(librosa.stft(y))
    return stft

### Extraindo arquivos de audio com STFT

In [5]:
%pip install python_speech_features

Collecting python_speech_features
  Downloading python_speech_features-0.6.tar.gz (5.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: python_speech_features
  Building wheel for python_speech_features (setup.py) ... [?25l[?25hdone
  Created wheel for python_speech_features: filename=python_speech_features-0.6-py3-none-any.whl size=5870 sha256=0b2aeae0c779ec7881d79595f2758a029fb3628079158aa335e8ff8a074b85a6
  Stored in directory: /root/.cache/pip/wheels/5a/9e/68/30bad9462b3926c29e315df16b562216d12bdc215f4d240294
Successfully built python_speech_features
Installing collected packages: python_speech_features
Successfully installed python_speech_features-0.6


In [6]:
# Função para extrair características do STFT (frequência dominante e outras)
def extract_features(stft, y, sr):
    # Calcular frequências para cada bin da FFT
    freqs = librosa.fft_frequencies(sr=22050, n_fft=2048)
    # Calcular a média do STFT em cada frequência
    mean_stft = np.mean(stft, axis=1)
    # Encontrar a frequência dominante
    dominant_freq = freqs[np.argmax(mean_stft)]

    # Calcular MFCCs
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

    # Calcular o espectrograma de Mel
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)

    # Calcular o cromagrama
    chromagram = librosa.feature.chroma_stft(S=stft, sr=sr)

    # Calcular o contraste espectral
    spectral_contrast = librosa.feature.spectral_contrast(S=stft, sr=sr)

    # Calcular o Rolloff espectral
    spectral_rolloff = librosa.feature.spectral_rolloff(S=stft, sr=sr)

    # Calcular o Tom médio
    pitch = librosa.pitch_tuning(y)
    mean_pitch = np.mean(pitch)

    # Calcular o Ponto culminante do Spectrograma
    spec_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)

    # Filtrar o sinal harmônico e percussivo
    y_harmonic, y_percussive = librosa.effects.hpss(y)

    # Calcular o STFT para o sinal harmônico
    stft_harmonic = librosa.stft(y_harmonic)

    # Calcular o STFT para o sinal percussivo
    stft_percussive = librosa.stft(y_percussive)

    # Concatenar todas as características extraídas
    features = np.concatenate((np.mean(mfccs, axis=1),
                               np.mean(mel_spectrogram, axis=1),
                               np.mean(chromagram, axis=1),
                               np.mean(spectral_contrast, axis=1),
                               [dominant_freq],
                               [np.mean(spectral_rolloff)],
                               [mean_pitch],
                               [np.max(spec_centroid)]))

    return features

## Aprendizado Supervisionado

### Rotulando por caminho

In [7]:
def get_label_from_path(file_path):
    # Extracts the label 'speaker' from 'speakerX.wav'
    base_name = os.path.splitext(os.path.basename(file_path))[0]
    # Use regular expression to remove trailing digits
    label = re.sub(r'\d+', '', base_name)
    return label

### Organizando os dados para treinos e testes

In [8]:
def prepare_data(audio_files):
    X = []
    y = []
    for file_path, label in audio_files:

        y_data, sr = librosa.load(file_path)
        stft = process_audio_file(file_path)

        features = extract_features(stft,y_data, sr)


        X.append(features)
        y.append(label)

    X = np.array(X)
    y = np.array(y)

    print(f"Shape of X: {X.shape}, Shape of y: {y.shape}")
    return X, y

In [9]:
def load_data_from_directory(directory_path):
    audio_files = []

    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.wav') or file.endswith('.mp3'):
                file_path = os.path.join(root, file)
                label = get_label_from_path(file_path)
                audio_files.append((file_path, label))

    print(f"Number of audio files: {len(audio_files)}")
    return prepare_data(audio_files)

### Normalização

In [10]:
def normalization(X_train,X_test):
    global scaler
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

### Preparando os dados

In [17]:
train_data_directory = 'AI_Project_Semester2/data/train_audio/AudiosDeTreino'
test_data_directory = 'AI_Project_Semester2/data/test_audio/_audios_teste'
X_train, y_train = load_data_from_directory(train_data_directory)
X_test, y_test = load_data_from_directory(test_data_directory)
normalization(X_train, X_test)


Number of audio files: 73
Shape of X: (73, 164), Shape of y: (73,)
Number of audio files: 25
Shape of X: (25, 164), Shape of y: (25,)


### Aplicando Logistic Regression

In [18]:
logreg_model = LogisticRegression()
logreg_model.fit(X_train, y_train)
logreg_predictions = logreg_model.predict(X_test)
print('Logistic Regression Accuracy: ', accuracy_score(y_test, logreg_predictions))
print('Classification Report for logistic Regression Model:')
print(classification_report(y_test, logreg_predictions))

Logistic Regression Accuracy:  0.4
Classification Report for logistic Regression Model:
              precision    recall  f1-score   support

       Acoba       1.00      1.00      1.00         1
        Akio       0.00      0.00      0.00         0
      Alexia       0.00      0.00      0.00         0
          Ba       0.00      0.00      0.00         1
       Cindy       0.00      0.00      0.00         1
     Eduardo       0.00      0.00      0.00         2
          Ha       1.00      1.00      1.00         2
       Harry       1.00      0.50      0.67         2
       Nakai       1.00      1.00      1.00         2
   Penterist       0.00      0.00      0.00         2
     Raphael       1.00      1.00      1.00         2
       Ruivo       1.00      1.00      1.00         2
        Teko       0.00      0.00      0.00         2
         bia       0.00      0.00      0.00         1
     eduardo       0.00      0.00      0.00         0
    fernanda       0.00      0.00      0.00    

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Aplicando SVM

In [19]:
svm_clf = SVC()
svm_clf.fit(X_train, y_train)
svm_predictions = svm_clf.predict(X_test)
print('SVM Accuracy: ', accuracy_score(y_test, svm_predictions))
print('Classification Report for SVM Model:')
print(classification_report(y_test, svm_predictions))

SVM Accuracy:  0.44
Classification Report for SVM Model:
              precision    recall  f1-score   support

       Acoba       0.00      0.00      0.00         1
          Ba       0.50      1.00      0.67         1
       Cindy       0.00      0.00      0.00         1
     Eduardo       0.00      0.00      0.00         2
          Ha       0.00      0.00      0.00         2
       Harry       0.33      0.50      0.40         2
       Nakai       0.00      0.00      0.00         2
   Penterist       0.67      1.00      0.80         2
     Raphael       0.40      1.00      0.57         2
       Ruivo       1.00      1.00      1.00         2
        Teko       0.25      0.50      0.33         2
         bia       0.00      0.00      0.00         1
    fernanda       0.00      0.00      0.00         1
irmamichelli       0.00      0.00      0.00         1
    michelli       0.50      1.00      0.67         1
      nicole       0.00      0.00      0.00         1
      sergio       0.33 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Aplicando KNN

In [20]:
knn_clf = KNeighborsClassifier()
knn_clf.fit(X_train, y_train)
knn_predictions = knn_clf.predict(X_test)
print('KNN Accuracy: ', accuracy_score(y_test, knn_predictions))
print('Classification Report for KNN Model:')
print(classification_report(y_test, knn_predictions))

KNN Accuracy:  0.4
Classification Report for KNN Model:
              precision    recall  f1-score   support

       Acoba       0.00      0.00      0.00         1
        Akio       0.00      0.00      0.00         0
      Alexia       0.00      0.00      0.00         0
          Ba       0.50      1.00      0.67         1
       Cindy       0.00      0.00      0.00         1
     Eduardo       0.40      1.00      0.57         2
          Ha       0.00      0.00      0.00         2
       Harry       0.33      0.50      0.40         2
       Nakai       0.00      0.00      0.00         2
   Penterist       1.00      1.00      1.00         2
     Raphael       0.00      0.00      0.00         2
       Ruivo       1.00      1.00      1.00         2
        Teko       0.50      0.50      0.50         2
         bia       0.00      0.00      0.00         1
    fernanda       0.00      0.00      0.00         1
irmamichelli       0.00      0.00      0.00         1
    michelli       1.00  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Aplicando Neural Network

In [22]:
nn_clf = MLPClassifier()
nn_clf.fit(X_train, y_train)
nn_predictions = nn_clf.predict(X_test)
print('Neural Network Accuracy: ', accuracy_score(y_test, nn_predictions))
print('Classification Report for Neural Network Model:')
print(classification_report(y_test, nn_predictions))

Neural Network Accuracy:  0.0
Classification Report for Neural Network Model:
              precision    recall  f1-score   support

       Acoba       0.00      0.00      0.00       1.0
          Ba       0.00      0.00      0.00       1.0
       Cindy       0.00      0.00      0.00       1.0
     Eduardo       0.00      0.00      0.00       2.0
          Ha       0.00      0.00      0.00       2.0
       Harry       0.00      0.00      0.00       2.0
       Nakai       0.00      0.00      0.00       2.0
   Penterist       0.00      0.00      0.00       2.0
     Raphael       0.00      0.00      0.00       2.0
       Ruivo       0.00      0.00      0.00       2.0
        Teko       0.00      0.00      0.00       2.0
         bia       0.00      0.00      0.00       1.0
    fernanda       0.00      0.00      0.00       1.0
irmamichelli       0.00      0.00      0.00       1.0
    michelli       0.00      0.00      0.00       1.0
      nicole       0.00      0.00      0.00       1.0
   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
