# Trabalho 02 - Extração de Atributos e k-NN com k-fold   


## Setup inicial

### Importando bibliotecas:

In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

import scipy.io as io
import scipy.stats as stats
from IPython.display import Audio
from collections import Counter
from typing import Tuple

### Criando a base de dados
Vou criar uma base de dados original, limpa e pura, contendo apenas os endereços dos audios e a classe deles.

In [16]:
dataFolder = "./Acordes"
files = os.listdir(dataFolder)

columns = ["file", "class"]

chords = pd.DataFrame(
    data= [],
    columns = columns,
    index= range(len(files))
)

for index, file in enumerate(files):
    
    className = file.split("_").pop(0)
    className = className.lower()

    sampleRate, audio = io.wavfile.read(f"{dataFolder}/{file}")

    chords.iloc[index] = [file, className]

chords.shape

(100, 2)

- Visualizando o início do banco de dados:

In [17]:
chords.head(10)

Unnamed: 0,file,class
0,Major_0.wav,major
1,Major_1.wav,major
2,Major_10.wav,major
3,Major_11.wav,major
4,Major_12.wav,major
5,Major_13.wav,major
6,Major_14.wav,major
7,Major_15.wav,major
8,Major_16.wav,major
9,Major_17.wav,major


- Função para facilitar a leitura do arquivo de audio:

In [18]:
#Função para ler o arquivo de audio
def readAudio(audioFile:str)->Tuple[int, np.array]:
    return io.wavfile.read(f"{dataFolder}/{file}")

### Importando os dataframes

Agora vou importar os dataframes que gerei no notebook "parameters_comparision"

#### Chords_attibutes

In [19]:
chords_attributes = pd.read_csv('./parameters_dataframes/chords_attributes.csv')

chords_attributes.head()

Unnamed: 0,file,mean,std,skewness,kurtosis,class,FFT_skewness,FFT_kurtosis,interval_1,interval_2,interval_3,interval_4,interval_5,interval_6,interval_7,interval_8,Interval[1-3],Interval[1-4]
0,Major_0.wav,-0.978784,0.841934,-0.648675,-0.611911,major,-0.776569,-0.704986,-0.566282,-0.512331,1.945798,1.304765,0.165638,1.493784,-0.963349,-0.236693,-0.913464,-0.010816
1,Major_1.wav,1.099954,0.015608,0.954677,-0.120714,major,0.800353,0.619204,-0.588024,-0.492308,1.883701,1.380998,0.145091,1.489714,-0.991063,-0.179521,-0.920559,-0.045621
2,Major_10.wav,-0.979883,0.813109,-0.981712,-0.935719,major,-0.173633,-0.420248,0.191236,0.088398,-0.633153,1.266145,-0.529336,-1.293051,0.370359,0.600499,0.305355,-0.022188
3,Major_11.wav,0.737474,-0.695597,1.368172,0.111714,major,-0.141709,-0.183398,-0.164321,-1.500025,0.991384,-0.58155,1.013087,-1.758674,-0.641737,-1.034352,-1.265897,-0.788659
4,Major_12.wav,-0.978954,0.871567,-0.805537,-0.978717,major,-0.507814,-0.711351,0.199858,-1.002333,1.022064,1.275756,-0.498283,-1.335796,-0.967921,1.543078,-0.553036,-0.043427


#### Chords_MFCCs

In [20]:
chords_MFCCs= pd.read_csv('./parameters_dataframes/chords_MFCCs.csv')

chords_MFCCs.head()

Unnamed: 0,file,class,mfcc_0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,...,chroma_7,chroma_8,chroma_9,chroma_10,chroma_11,freq1,freq2,freq3,interval1,interval2
0,Major_0.wav,major,0.877174,1.026916,-0.556108,-0.66597,0.666087,0.012591,-0.983117,-1.165128,...,0.681732,-0.356065,-1.299441,-0.981533,0.480302,1.514925,0.702296,0.816067,-1.198063,0.494488
1,Major_1.wav,major,-0.804121,-0.883676,1.255906,1.283044,-0.155795,-0.165989,0.862655,1.190435,...,0.54697,-0.008841,-0.696605,-0.857771,-0.051511,-0.946726,-0.727729,-1.231282,0.522439,-1.112879
2,Major_10.wav,major,0.446118,0.876868,0.642371,-0.262782,0.120034,0.490266,-0.478995,0.009408,...,0.356846,-0.839851,0.553774,2.1553,0.332027,0.798311,0.465696,0.374065,-0.556437,0.067761
3,Major_11.wav,major,-1.140415,-0.767082,0.963276,1.070627,-0.292478,-0.328488,1.012192,1.327301,...,-0.003783,-0.398096,0.210546,0.73061,0.673509,-0.946726,-1.041753,-1.234431,0.276442,-0.770738
4,Major_12.wav,major,1.001941,0.778585,-1.048394,-1.738154,-2.445553,-1.957739,-0.362844,-0.162074,...,-0.440171,0.998102,0.387419,1.818919,1.46496,0.798311,0.02644,0.374065,-0.900538,0.553207


#### Dataframe de features

In [21]:
features_df = pd.merge(chords_attributes, chords_MFCCs, on=['file', 'class'])

features_df.head()

Unnamed: 0,file,mean,std,skewness,kurtosis,class,FFT_skewness,FFT_kurtosis,interval_1,interval_2,...,chroma_7,chroma_8,chroma_9,chroma_10,chroma_11,freq1,freq2,freq3,interval1,interval2
0,Major_0.wav,-0.978784,0.841934,-0.648675,-0.611911,major,-0.776569,-0.704986,-0.566282,-0.512331,...,0.681732,-0.356065,-1.299441,-0.981533,0.480302,1.514925,0.702296,0.816067,-1.198063,0.494488
1,Major_1.wav,1.099954,0.015608,0.954677,-0.120714,major,0.800353,0.619204,-0.588024,-0.492308,...,0.54697,-0.008841,-0.696605,-0.857771,-0.051511,-0.946726,-0.727729,-1.231282,0.522439,-1.112879
2,Major_10.wav,-0.979883,0.813109,-0.981712,-0.935719,major,-0.173633,-0.420248,0.191236,0.088398,...,0.356846,-0.839851,0.553774,2.1553,0.332027,0.798311,0.465696,0.374065,-0.556437,0.067761
3,Major_11.wav,0.737474,-0.695597,1.368172,0.111714,major,-0.141709,-0.183398,-0.164321,-1.500025,...,-0.003783,-0.398096,0.210546,0.73061,0.673509,-0.946726,-1.041753,-1.234431,0.276442,-0.770738
4,Major_12.wav,-0.978954,0.871567,-0.805537,-0.978717,major,-0.507814,-0.711351,0.199858,-1.002333,...,-0.440171,0.998102,0.387419,1.818919,1.46496,0.798311,0.02644,0.374065,-0.900538,0.553207


## Lista de principais atributos e combinações entre eles:

Principais atributos observados:


- Skewness da FFT e curtose da FFT


- Intervalos entre os picos 
    > Intervalo 1,3,4,6, (1,3) e (1,4)

- MFFCs
    > MFCC (8-12), (10-11), (10-12) e (11-12)

- Chromas



## Extraindo atributos

In [22]:
#Definindo as colunas de interesse

fft_features = ['FFT_skewness', 'FFT_kurtosis']

interval_features = ['interval_1', 'interval_3', 'interval_4', 'interval_6', 'Interval[1-3]', 'Interval[1-4]']

mfcc_features = ['mfcc_8', 'mfcc_9', 'mfcc_10', 'mfcc_11', 'mfcc_12']

chroma_features = [
    'chroma_0', 'chroma_1', 'chroma_2', 'chroma_3',
    'chroma_4', 'chroma_5', 'chroma_6', 'chroma_7',
    'chroma_8', 'chroma_9', 'chroma_10', 'chroma_11'
]

## Criando o modelo do K-NN

In [23]:
def euclideanDistance(a: np.array, b: np.array) -> float:
    return np.sqrt(np.sum((a - b) ** 2))


class KNN():
    """Classificador KNN"""
    
    def __init__(self, k: int=3, dataset: pd.DataFrame=chords):
        self.k = k
        self.data = dataset.copy()

    def fit(self, features, labels):
        self.X_train = np.array(features)
        self.y_train = np.array(labels)

    def predict(self, features:np.array) -> str:

        """Prediz a classe de um conjunto de features

            > [INPUT]: Conjunto de features \n
            > [OUTPUT]: Classe prevista"""	
        
        X = np.array(features)
        return [self._predict(x) for x in X]
    
    def _predict(self, x):
        distances = [euclideanDistance(x, x_train) for x_train in self.X_train]
        k_indices = np.argsort(distances)[:self.k]
        k_labels = [self.y_train[i] for i in k_indices]
        most_common = Counter(k_labels).most_common(1)
        return most_common[0][0]

## Criando o modelo do K-Fold


In [24]:
class KFold:
    """Classificador K-Fold de validação cruzada"""

    def __init__(self, n_splits: int = 10, shuffle: bool = True, random_seed: int = None):
        self.n_splits = n_splits
        self.shuffle = shuffle
        self.random_seed = random_seed

    def split(self, features: np.array, labels=np.array)-> Tuple[np.ndarray, np.ndarray]:
        
        """"Divide os dados em n partes para validação cruzada
            > [INPUT]: Array de features e labels\n
            > [OUTPUT]: Tupla de arrays com os índices de treino e teste para cada fold
        """

        n_samples = len(features)
        indices = np.arange(n_samples)

        if self.shuffle:
            rng = np.random.default_rng(self.random_seed)
            indices = rng.permutation(indices)
        
        fold_sizes = np.full(self.n_splits, n_samples // self.n_splits, dtype=int)
        fold_sizes[:n_samples % self.n_splits] += 1  # Distribui o resto


        current = 0
        for fold_size in fold_sizes:
            start, stop = current, current + fold_size

            #indices para teste e treino
            test_idx = indices[start:stop]
            train_idx = np.concatenate([indices[:start], indices[stop:]])

            current = stop
            yield train_idx, test_idx

## Função para facilitar o uso do KNN com qualquer grupo de atributos

In [31]:
def evaluate_features(features_df, feature_list, k=3):
    X = features_df[feature_list].values
    y = features_df['class'].values

    kf = KFold(n_splits=10, shuffle=True, random_seed=42)
    knn = KNN(k=k)

    accuracies = []

    for train_idx, test_idx in kf.split(X, y):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        knn.fit(X_train, y_train)
        y_pred = knn.predict(X_test)

        acc = np.mean(np.array(y_pred) == y_test)
        accuracies.append(acc)

    print(f'✅ Usando: {feature_list}')
    print(f'📈 Acurácias por fold: {np.round(accuracies, 3)}')
    print(f'🎯 Acurácia média: {np.mean(accuracies):.4f}\n')

def evaluate_features_k_range(features_df, feature_list, k_range=range(2, 8)):
    X = features_df[feature_list].values
    y = features_df['class'].values

    print(f'✅ Usando: {feature_list}\n')

    for k in k_range:
        kf = KFold(n_splits=10, shuffle=True, random_seed=42)
        knn = KNN(k=k)

        accuracies = []

        for train_idx, test_idx in kf.split(X, y):
            X_train, X_test = X[train_idx], X[test_idx]
            y_train, y_test = y[train_idx], y[test_idx]

            knn.fit(X_train, y_train)
            y_pred = knn.predict(X_test)

            acc = np.mean(np.array(y_pred) == y_test)
            accuracies.append(acc)

        print(f'🔢 k = {k}')
        print(f'📈 Acurácias por fold: {np.round(accuracies, 3)}')
        print(f'🎯 Acurácia média: {np.mean(accuracies):.4f}\n')


## Testando com diferentes atributos:

In [None]:
evaluate_features_k_range(features_df, fft_features)
evaluate_features_k_range(features_df, interval_features)
evaluate_features_k_range(features_df, mfcc_features)
evaluate_features_k_range(features_df, chroma_features)

✅ Usando: ['FFT_skewness', 'FFT_kurtosis']

🔢 k = 2
📈 Acurácias por fold: [0.5 0.6 0.5 0.6 0.5 0.7 0.1 0.5 0.4 0.4]
🎯 Acurácia média: 0.4800

🔢 k = 3
📈 Acurácias por fold: [0.2 0.5 0.5 0.4 0.5 0.6 0.3 0.4 0.3 0.3]
🎯 Acurácia média: 0.4000

🔢 k = 4
📈 Acurácias por fold: [0.5 0.6 0.5 0.4 0.4 0.6 0.1 0.4 0.3 0.3]
🎯 Acurácia média: 0.4100

🔢 k = 5
📈 Acurácias por fold: [0.4 0.4 0.4 0.3 0.5 0.5 0.2 0.4 0.5 0.4]
🎯 Acurácia média: 0.4000

🔢 k = 6
📈 Acurácias por fold: [0.5 0.4 0.3 0.3 0.5 0.6 0.2 0.3 0.3 0.3]
🎯 Acurácia média: 0.3700

🔢 k = 7
📈 Acurácias por fold: [0.4 0.  0.4 0.4 0.7 0.4 0.3 0.3 0.3 0.6]
🎯 Acurácia média: 0.3800

✅ Usando: ['interval_1', 'interval_3', 'interval_4', 'interval_6', 'Interval[1-3]', 'Interval[1-4]']

🔢 k = 2
📈 Acurácias por fold: [0.9 0.7 0.9 1.  0.9 0.9 0.9 0.8 0.9 0.9]
🎯 Acurácia média: 0.8800

🔢 k = 3
📈 Acurácias por fold: [0.8 0.9 1.  0.9 0.8 0.8 0.8 0.6 0.7 0.7]
🎯 Acurácia média: 0.8000

🔢 k = 4
📈 Acurácias por fold: [0.8 0.8 1.  1.  0.8 0.8 0.8 0.6 0.8 0.9

📢 Recomendo que abra o resto do output acima para verificar todos os casos.

🔍 Observações gerais:

- Um valor de K muito alto faz o K-NN ter resultados piores nessa base de dados (provavelmente porque a base de dados é pequena).
- FFT_skewness e FFT_kurtosis foram parâmetros que se mostraram horríveis para reconhecer os acordes.
- Os parametros de chroma também se mostraram não eficientes para a base de dados
- Os MFCCs que eu escolhi também não desempenharam bem e isso me surpreendeu, porque nos gráficos eles pareciam bons atributos. Talvez eu devesse testar usando uma combinação menor de MFCCs.
- Os intervalos, como é de se esperar, foram os MELHORES atributos possíveis para classificar os acordes. Isso é obvio, já que pela própria definição musical os intervalos determinam os acordes.

O melhor resultado que consegui foi usando os intervalos dos acordes como atributos de classificação e usando k=2 como hiperparâmetro do K-NN. De qualquer forma, esse resultado também é sucetível a sorte, porque a base de dados é pequena. Também posso ter causado um overfiting por ter usado 6 parâmetros. De qualquer forma, se eu tivesse mais tempo eu tentaria polir um pouco melhor essa classificação, possivelmente ficando um pouco acima de 90%.

In [32]:
evaluate_features(features_df, interval_features, k=2)

✅ Usando: ['interval_1', 'interval_3', 'interval_4', 'interval_6', 'Interval[1-3]', 'Interval[1-4]']
📈 Acurácias por fold: [0.9 0.7 0.9 1.  0.9 0.9 0.9 0.8 0.9 0.9]
🎯 Acurácia média: 0.8800

