In [1]:
# %pip install librosa
# %pip install scikit-learn

In [3]:
import pathlib
import subprocess
from IPython.display import Audio
import numpy as np
import librosa
import librosa.display as dp
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import seaborn as sns
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
import time
import joblib 
import pickle
from sklearn.pipeline import make_pipeline

In [4]:
def cleasing(song):
    # Fazer tratamento para se o nome da música já tiver no Bucket
    song = pathlib.Path(song)
    song_name = song.stem
    if song.suffix != '.wav':
        subprocess.call(['ffmpeg', '-i', str(song),
                  f'{song_name}.wav'])
    return song

In [5]:
def stft(song): 
    # https://willdrevo.com/fingerprinting-and-audio-recognition-with-python/

    y, sr = librosa.load(song, sr=None, mono=True)
    Audio(y, rate= sr)
    
    S = librosa.stft(y, n_fft=2048, hop_length=512, win_length=1024)
    S = np.abs(S)

    max_indices = np.argwhere(S == np.max(S, axis=0))
    times = librosa.frames_to_time(max_indices[:, 1], sr=sr)
    frequencies = librosa.fft_frequencies(sr=sr)[max_indices[:, 0]]
    df = pd.DataFrame({'Frequencia': frequencies, 'Tempo': times})

    return df

In [6]:
def data_2_training(song_stft, number):
    df_list_songs_for_training = []
    
    song_stft['target'] = number
    df_list_songs_for_training.append(song_stft)
    df_list_songs_for_training = pd.concat(df_list_songs_for_training, ignore_index=True)
    return df_list_songs_for_training

In [7]:
def training_model(data):
    target = data.pop('target')
    X = data
    y = target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    scaler = StandardScaler()

    X_train_n = X_train / 255
    X_test_n = X_test / 255

    pca = PCA(n_components=2) 
    X_train_pca = pca.fit_transform(X_train_n)
    X_test_pca = pca.transform(X_test_n)
    # print(f"dados de treino: {X_train.shape}")
    # print(f"dados de teste: {X_test.shape}")

    y_train_int = y_train.astype(int)
    y_test_int = y_test.astype(int)

    #KNN    
    k_values = list(range(2, 15))
    metrics = []

    for k in k_values:
        knn = KNeighborsClassifier(n_neighbors=k)
        knn.fit(X_train_pca, y_train_int)
        
        y_test_int_pred = knn.predict(X_test_pca)
        test_accuracy = accuracy_score(y_test_int, y_test_int_pred)
        metrics.append({
            'k': k,
            'accuracy': test_accuracy,
            'classification_report': classification_report(y_test_int, y_test_int_pred, output_dict=True),
        })
    best_metric = None
    for metric in metrics:
        if best_metric is None or metric['accuracy'] > best_metric['accuracy']:
            best_metric = metric
    k_best = best_metric["k"]
    knn_best = KNeighborsClassifier(n_neighbors=k_best)
    pipe = make_pipeline(scaler, knn_best)

    predction = knn.predict(X_test_pca)

    return predction

In [8]:
def visualization(prediction):
    cont0 = 0
    cont1 = 0
    cont2 = 0
    cont3 = 0
    cont4 = 0
    cont5 = 0

    for i in range(len(prediction)):
        if prediction[i] == 5:
            cont5 +=1
        elif prediction[i] == 4:
            cont4 += 1       
        elif prediction[i] == 3:
            cont3 += 1
        elif prediction[i] == 2:
            cont2 += 1
        elif prediction[i] == 1:
            cont1 += 1
        elif prediction[i] == 0:
            cont0 += 1

    labels = ['Assovio Matheus', 'Assovio João', 'Assovio Carol', 'Ratos', 'Soulfly', 'Dido']
    frequencies = [cont0, cont1, cont2, cont3, cont4, cont5]

    plt.bar(labels, frequencies, color='blue')
    plt.xlabel('Valores')
    plt.ylabel('Frequência')
    plt.title('Frequência dos Valores')
    plot = plt.show()
    pred = print(len(prediction))

    return plot, frequencies

In [9]:
df_musics = pd.DataFrame()
df_trained = pd.DataFrame()

def etl(song, type, number):
    if type == 'training':
        song_clean = cleasing(song)
        song_stft = stft(song_clean)
        training_list = data_2_training(song_stft, number)
        if not training_list.empty:
            df_musics = pd.concat(training_list)
            training = training_model(df_musics)
            df_trained = pd.concat(training)
            return print('Treinamento realizado com sucesso!')
        else:
            return print('Treinamento com dados insuficientes/vazio...')
    elif type == 'predict':
        song_clean = cleasing(song)
        song_stft = stft(song_clean)
        prediction = print('Add a função para validar a predição')
        return prediction


In [None]:
etl('./audio.mp3', 'training', 0)
etl('./dido2.mp3', 'training', 1)
etl('./dido3.mp3', 'training', 2)
etl('./leans.mp3', 'training', 3)

etl('./dido3.mp3', 'training', 0)
