In [15]:
import os

import numpy as np
import pandas as pd

import librosa
import librosa.display
import soundfile as sf # librosa fails when reading files on Kaggle.

import matplotlib.pyplot as plt
import IPython.display as ipd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix

In [16]:
def load_wav_16k_mono(filename):
  file_contents = tf.io.read_file(filename)
  wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
  wav = tf.squeeze(wav, axis=-1)
  sample_rate = tf.cast(sample_rate, dtype=tf.int64)
  wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
  return wav

In [17]:
PIANO = os.path.join('data', 'pia')
GUITAR = os.path.join('data', 'gac')
VIOLIN = os.path.join('data', 'vio')

In [18]:
# Tensor com caminho dos arquivos
# piano = tf.data.Dataset.list_files(PIANO + '/*.wav')
# guitar = tf.data.Dataset.list_files(GUITAR + '/*.wav')
# violin = tf.data.Dataset.list_files(VIOLIN + '/*.wav')

In [81]:
def mean_mfccs(x):
    return [np.mean(feature) for feature in librosa.feature.mfcc(y=x)]

def parse_audio(x):
    return x.flatten('F')[:x.shape[0]] 

def get_audios_label(path, label):
    # train_path = "../input/train/Train/"
    file_names = os.listdir(path)
    #file_names.sort(key=lambda x: int(x.partition('.')[0]))
    
    samples = []
    for file_name in file_names:
        # TODO: Deveríamos reduzir a qualidade dos áudios para 16K:
        x, sr = sf.read(os.path.join(path, file_name), always_2d=True)
        x = parse_audio(x)
        samples.append(mean_mfccs(x))
        
    return np.array(samples), np.full((len(samples)), label)

In [95]:
pianos, pianos_lab = get_audios_label(PIANO, 0) 
guitars, guitars_lab = get_audios_label(GUITAR, 1)
violins, violins_lab = get_audios_label(VIOLIN, 2)

In [96]:
data = np.concatenate((pianos, guitars, violins))
labels = np.concatenate((pianos_lab, guitars_lab, violins_lab))

shuffler = np.random.permutation(len(data))

X = data[shuffler]
Y = labels[shuffler]

In [97]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=0.7)

In [98]:
print(f'Shape: {x_train.shape}')
print(f'Observation: \n{x_train[0]}')
print(f'Labels: {y_train[:5]}')

Shape: (1356, 20)
Observation: 
[-241.63508536  138.46386254  -41.10277019   26.19139333   -5.937273
    9.09176476  -13.17019891    7.32286867  -13.49215399   -5.28453328
  -13.10782863   -0.87355979   -3.94783182    6.71187563    1.45589265
    2.13836847   -1.33073469   -1.64434215   -7.56497945   -7.77527432]
Labels: [2 0 0 0 2]


# Treinando modelo

In [108]:
grid_params = {
    'n_neighbors': [3, 5, 7, 9, 11, 15],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

model = GridSearchCV(KNeighborsClassifier(), grid_params, cv=5, n_jobs=-1, scoring='accuracy',)

In [109]:
model.fit(x_train, y_train)

In [111]:
print(f'Model Score: {model.score(x_test, y_test)}')

Model Score: 0.7903780068728522


In [104]:
model.cv_results_

{'mean_fit_time': array([0.00209656, 0.00228958, 0.00182757, 0.0013422 , 0.0015801 ,
        0.00159693, 0.00178347, 0.00125465, 0.00218801, 0.00142412,
        0.00150719, 0.00204816, 0.00131269, 0.00214725, 0.0009686 ,
        0.00097332, 0.00130324, 0.00123634, 0.00103736, 0.00147877,
        0.00127392, 0.00118632, 0.00097933, 0.00106673]),
 'std_fit_time': array([0.00116387, 0.00058129, 0.00056622, 0.00038064, 0.00030762,
        0.00080076, 0.00060278, 0.00010886, 0.00110039, 0.00020395,
        0.00029365, 0.00172629, 0.00044959, 0.00125649, 0.0001116 ,
        0.00015523, 0.00036108, 0.00038557, 0.00020223, 0.00094211,
        0.00049102, 0.00055408, 0.00019701, 0.00018176]),
 'mean_score_time': array([0.05272512, 0.02821617, 0.02619371, 0.006213  , 0.02083135,
        0.00731874, 0.02183514, 0.00747447, 0.02392168, 0.00970402,
        0.02327738, 0.0096137 , 0.01420784, 0.01859841, 0.01633019,
        0.01677504, 0.01818972, 0.01867242, 0.01856198, 0.01660175,
        0.020557