In [None]:
# Librería de procesamiento de audio
import pyAudioAnalysis
# Matplotlib para gráficos
import matplotlib.pyplot as plt
# Librerías fundamentales
import numpy as np
import glob
import os
import pandas as pd
# Sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectFromModel, SelectKBest, VarianceThreshold, chi2
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, roc_curve, confusion_matrix
from sklearn.svm import SVC
from sklearn.decomposition import PCA

In [None]:
# Importa nombres de archivos y lee matriz de labels
path = "pyaudio/data"
filenames_ravdess = glob.glob(path+"/*.npy")
ravdess_labels = pd.read_csv('pyaudio/labels.csv',delimiter=',',names=['filename','label'])
ravdess_labels.head(5)

In [None]:
# Crea el dataset RAVDESS
dataset_ravdess = []
for filepath in filenames_ravdess:
    data = np.load(filepath).flatten()
    filename = filepath.split("/")[-1].split(".")[0]
    label = ravdess_labels[ravdess_labels['filename']==filename]['label'].to_numpy()[0]
    dataset_ravdess.append([data,label])  
dataset_ravdess = pd.DataFrame(dataset_ravdess,columns=['feats','label'])
dataset_ravdess.head(5)

In [None]:
# Separación de dataset en conjuntos de entrenamiento, validación y test
X = dataset_ravdess['feats']
y = dataset_ravdess['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, 
                                                  random_state=42)

In [None]:
# Definición de experimentos a realizar 
def experiment_1():
    param_grid= {'svm__kernel':('linear', 'rbf'), 'svm__C':[0.1, 1, 10, 20]}
    pipe = Pipeline(steps=[('scaler',StandardScaler()),
                         ('svm',SVC(gamma='auto'))])
    clf = GridSearchCV(pipe,param_grid,cv=5,n_jobs=-1)
    clf.fit(X_train.to_list(),y_train)
    print("Mejores parámetros encontrados: ",clf.best_params_)
    return clf
    
def experiment_2():
    param_grid= {'svm__kernel':('linear', 'rbf'), 'svm__C':[0.1, 1, 10, 20]}
    pipe = Pipeline(steps=[('svm',SVC(gamma='auto'))])
    clf = GridSearchCV(pipe,param_grid,cv=5,n_jobs=-1)
    clf.fit(X_train.to_list(),y_train)
    print("Mejores parámetros encontrados: ", clf.best_params_)
    return clf

def experiment_3():
    clf = RandomForestClassifier(n_estimators=1000)
    clf.fit(X_train.to_list(),y_train)
    return clf

def experiment_4():
    clf = Pipeline([('feat_selection',VarianceThreshold(threshold=(.8 * (1 - .8)))),
                    ('svm',SVC(gamma='auto',C=10,kernel='rbf'))])
    clf.fit(X_train.to_list(),y_train)
    return clf

def experiment_5():
    clf = Pipeline([('feat_selection',SelectFromModel(RandomForestClassifier(n_estimators=500))),
                    ('svm',SVC(gamma='auto',C=10,kernel='rbf'))])
    clf.fit(X_train.to_list(),y_train)
    return clf
    
def experiment_6():
    param_grid= {'feat_selection__k':[100,500,1000]}
    pipe = Pipeline([('feat_selection',SelectKBest()),
                    ('svm',SVC(gamma='auto',C=10,kernel='rbf'))])
    clf = GridSearchCV(pipe,param_grid,cv=5,n_jobs=-1)
    clf.fit(X_train.to_list(),y_train)
    print("Mejor Cantidad de Features: ",clf.best_params_)
    return clf

def experiment_7():
    param_grid= {'pca__n_components':[10,50,100,500]}
    pipe = Pipeline([('feat_selection',SelectFromModel(RandomForestClassifier(n_estimators=500))),
                     ('pca',PCA()),
                    ('svm',SVC(gamma='auto',C=10,kernel='rbf'))])
    clf = GridSearchCV(pipe,param_grid,cv=5,n_jobs=-1)
    clf.fit(X_train.to_list(),y_train)
    print("Mejor Reducción de Dimensionalidad: ",clf.best_params_)
    return clf

def experiment_8():
    param_grid= {'pca__n_components':[10,50,100,500,1000]}
    pipe = Pipeline([('pca',PCA()),
                    ('svm',SVC(gamma='auto',C=10,kernel='rbf'))])
    clf = GridSearchCV(pipe,param_grid,cv=5,n_jobs=-1)
    clf.fit(X_train.to_list(),y_train)
    print("Mejor Reducción de Dimensionalidad: ",clf.best_params_)
    return clf

In [None]:
# Ejecuta un experimento
clf = experiment_6()

In [None]:
# Evalúa sobre el conjunto de validación
pred = clf.predict(X_val.to_list())
print(classification_report(y_val, pred))
print("MATRIZ DE CONFUSIÓN:\n\n{}".format(confusion_matrix(y_val,pred)))