Environment: voice

# Libraries

In [1]:
import os
import pandas as pd
import numpy as np
import time
import random
import matplotlib.pyplot as plt
from collections import Counter
from csv import DictWriter

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import GroupKFold, StratifiedGroupKFold # GroupShuffleSplit, RepeatedStratifiedKFold, PredefinedSplit, StratifiedKFold, LeavePGroupsOut
from sklearn.model_selection import ParameterGrid, GridSearchCV

from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.linear_model import LogisticRegression

from imblearn.pipeline import Pipeline # Se usa este y no sklearn.pipeline.Pipeline para poder añadir over_sampling
from imblearn.over_sampling import RandomOverSampler, SMOTE # ADASYN, SMOTENC
# from sklearn.feature_selection import SequentialFeatureSelector as SFS

from sklearn.metrics import make_scorer, accuracy_score, confusion_matrix, cohen_kappa_score, f1_score, balanced_accuracy_score
from sklearn.metrics import roc_auc_score, recall_score, precision_score, recall_score, roc_curve

# Funciones

In [2]:
dict_classifiers = {
        "SVC": SVC(C=1, kernel= 'rbf', gamma = 'scale', probability=True),
        "KNN": KNN(n_neighbors=5, weights='uniform', metric='minkowski'),
}
dict_parameters = {
        "SVC": {'SVC__kernel': ['rbf','sigmoid'],
                'SVC__gamma': [0.001, 0.01, 0.1, 1, 'auto','scale'],
                'SVC__C': [1, 10, 100, 1000],
                'SVC__probability': [True]},
        "KNN": {'KNN__n_neighbors': [1,3,5,7],
                'KNN__weights': ['uniform','distance'],
                'KNN__metric': ['euclidean','manhattan','minkowski']},
        }

# dict_resample = {'SMOTE': SMOTE(random_state=11, k_neighbors=4), # si son todo variables numericas, se puede usar SMOTE
#                  'RandomOverSampler': RandomOverSampler(sampling_strategy='not majority')}

scoring = {'WAcc': 'accuracy',
           'UAcc': make_scorer(balanced_accuracy_score),
           'kappa': make_scorer(cohen_kappa_score), 
           'auc':make_scorer(roc_auc_score, multi_class = 'ovr', needs_proba=True),
           'f1':make_scorer(f1_score, average = 'weighted'), # metric for each labe -> average weighted by support (the number of true instances for each label)
           'precision':make_scorer(precision_score, average = 'weighted', zero_division = 0),
           'recall':make_scorer(recall_score, average = 'weighted'), # Sensitivity = recall
#            'specificity': make_scorer(recall_score, pos_label=0), 
          }

def transform_dict(dict_params):
    new_dict = dict()
    for k in dict_params.keys():
        new_name = k.split('__')[1]
        new_dict[new_name] = dict_params[k]
    return new_dict

def binarizar(data):
    """ Funcion para pasar de la escala [-2,-1,0,1,2] a la escala [-1,0,1] """
    new_data = list()
    for n in data:
        if n == -2:
            label = -1
        elif n == -1:
            label = -1
        elif n == 0:
            label = 0
        elif n == 1:
            label = 1
        elif n == 2:
            label = 1
        else:
            print('Problemas en',n)
        new_data.append(label)
    return new_data

def load_dataframe(dataset):
    # Load features
    df_features = pd.read_excel('data/'+dataset+'/features/eGeMAPS_functionals.xlsx')
    df_features.rename(columns={"id": "file"}, inplace=True)
    
    # Load partitions
    test_partition = pd.read_csv('data/'+dataset+'/data_partitions/TrainValTest/split_testing.csv',sep="\t")
    test_partition['split'] = 'test'
    validation_partition = pd.read_csv('data/'+dataset+'/data_partitions/TrainValTest/split_validation.csv',sep="\t")
    validation_partition['split'] = 'val'
    training_partition = pd.read_csv('data/'+dataset+'/data_partitions/TrainValTest/split_training.csv',sep="\t")
    training_partition['split'] = 'train'
    partitions = pd.concat([training_partition, validation_partition, test_partition])
    partitions.reset_index(inplace=True, drop = True)
    
    if dataset == 'VOSOME':
        
        # Incluir archivos con id negativo (seleccionados para ser evaluados los primeros)
        partitions['file'] = np.abs(partitions.file.values) 
    
        # Load labels from psychologists
        partitions.rename(columns={'valence':'valence_3r','arousal':'arousal_3r'}, inplace=True)
        
        df_gold = pd.read_excel('data/VOSOME/labels/labels_Psicologas.xlsx')
        df_gold['file'] = df_gold['Filename'].str[0:-4]
        df_gold['file'] = df_gold['file'].astype('int')
        df_gold['arousal_binarize'] = binarizar(df_gold['Arousal'].values)
        df_gold['valencia_binarize'] = binarizar(df_gold['Valencia'].values)

        merged_df = partitions.merge(df_gold[['file','valencia_binarize' ,'arousal_binarize','Emotion']], on='file', how='left')
        partitions['valence_psy'] = merged_df['valencia_binarize']
        partitions['arousal_psy'] = merged_df['arousal_binarize']
        partitions['emotion'] = merged_df['Emotion']

        # Add new column with only 4 emotions
        categories_out = ['Tristeza', 'Temor','Asco']  # List of strings to be replaced
        partitions['emotion4'] = partitions['emotion'].apply(lambda x: 10 if x in categories_out else x)

        # Merge dataframes
        df = pd.merge(partitions, df_features, on="file")

        # Add extra labels (acuerdo total evaludores, combinacion raters + evaluadores)
        # df_extra = pd.read_excel('data/'+dataset+'/labels/labels_extra.xlsx')
        df_extra = pd.read_excel('data/'+dataset+'/labels/labels_extra_v2.xlsx')
        new_partitions = df_extra.merge(df, on='file', how='left') 
    else:
        # Merge dataframes
        new_partitions = pd.merge(partitions, df_features, on="file")
    
    return new_partitions

def separate_Xy(df, label):
    # Nota: los casos a eliminar (label=10) se eliminan antes!
    X = df.loc[:,'F0semitoneFrom27.5Hz_sma3nz_amean':].copy() 
    if label in ['valence_3r', 'valencia_3rAcuerdo', 'valence_psy', 'valencia_combined', 'valencia_combined_v2', 
                 'arousal_3r', 'arousal_3rAcuerdo', 'arousal_psy', 'arousal_combined',  'arousal_combined_v2',
                 'valence','arousal']:
        y = df[label].values + 1 # Sumar uno a las labels para que [-1,0,1] => [0,1,2]
    elif label in ['emotion', 'emotion4']:
        emotion_mapping = {'Alegría': 0,'Ira': 1,'Neutral': 2,'Sorpresa': 3,'Asco': 4,'Temor': 5,'Tristeza': 6}
        df[label] = df[label].map(emotion_mapping)
        y = df[label].values
    elif label == 'categories':
        emotion_mapping = {'hap': 0,'ang': 1,'neu': 2,'sad': 3}
        df[label] = df[label].map(emotion_mapping)
        y = df[label].values 
    else: # emotion_code
        y = df[label].values 
    groups = df['sub_id'].values
    return X, y, groups

def pipeline_classic_models(database, labels, path_main = 'results/', save = False):
    set_seed(seed_value=13)
    tic = time.time()
    results = []
    for prediction in labels:
        if save:
            path = path_main +database+'/classicalSVM_FeatLong/'
            path_save = path_main +database+'/classicalSVM_FeatLong/'+database+'_classicalSVM_FeatLong_'+prediction+'.csv'
            
            field_names = ['label','% feat','resample','classifier','params','n-feat','features',
                           'shape train','shape sel','shape test',
                           'cv_mean_WAcc','cv_std_WAcc', 'cv_mean_UAcc','cv_std_UAcc','cv_mean_kappa','cv_std_kappa',
                           'cv_mean_auc','cv_std_auc', 'cv_mean_f1','cv_std_f1',
                           'cv_mean_precision','cv_std_precision', 'cv_mean_recall','cv_std_recall',
#                            'cv_mean_sensitivity','cv_std_sensitivity','cv_mean_specificity','cv_std_specificity',
                           'test_WAcc', 'test_UAcc', 'test_kappa', 'test_auc', 'test_f1', 'test_precision', 'test_recall',
#                            'test_sensitivity','test_specificity',
                           'test_cm']
            
            with open(path_save, 'w', newline='') as file:
                dw = DictWriter(file, delimiter=',', fieldnames=field_names)
                dw.writeheader()
        
        for percent_features in [0.25, 0.5, 0.75]:
            print('[INFO] Loading data.. (% features keep =',percent_features,') ',prediction)
            
            # Load data
            df = load_dataframe(database)
            
            # Eliminar audios con desacuerdo (label=10)
            if database == 'IEMOCAP' and prediction == 'categories':
                print('---------------- Convertir en int el 10!!')
                df[prediction] = df[prediction].replace(['10'], 10)
            df = df.loc[df[prediction]!=10].copy() 
            df.reset_index(drop=True,inplace=True)
            
            # Separate partitions
            df_test = df.loc[df.split == 'test'].copy()
            df_test.reset_index(inplace=True, drop = True)
            df_develop = df.loc[df.split != 'test'].copy()
            df_develop.reset_index(inplace=True, drop = True)
            
            # Obtain X,y,groups
            X_develop, y_develop, groups_develop = separate_Xy(df_develop, prediction)
            X_test, y_test, groups_test = separate_Xy(df_test, prediction)
            print('Development:',X_develop.shape, Counter(y_develop))
            print('Test:',X_test.shape, Counter(y_test))

            # Feature normalization
            scalar = StandardScaler()
            scalar.fit(X_develop)
            X_develop_scaled = pd.DataFrame(scalar.transform(X_develop), columns = X_develop.columns.values)
            X_test_scaled = pd.DataFrame(scalar.transform(X_test), columns = X_test.columns.values)
            
            # Remove high correlated features
            correlated_features = set()
            correlation_matrix = X_develop_scaled.corr()
            for i in range(len(correlation_matrix.columns)):
                for j in range(i):
                    if abs(correlation_matrix.iloc[i, j]) > 0.95:
                        colname = correlation_matrix.columns[i]
                        correlated_features.add(colname)
            X_develop_scaled.drop(labels=correlated_features, axis=1, inplace=True)
            X_test_scaled.drop(labels=correlated_features, axis=1, inplace=True)

            # Feature selection
            feat_sel = int(percent_features*X_develop_scaled.shape[1])
            selectK = SelectKBest(f_classif, k=feat_sel)
            selectK.fit(X_develop_scaled, y_develop)
            cols = selectK.get_support(indices=True)
            X_develop_scaled = X_develop_scaled.iloc[:,cols]
            X_test_scaled =  X_test_scaled.iloc[:,cols]
            selected_features = X_develop_scaled.columns.values
            
            X_develop_scaled = np.array(X_develop_scaled)
            X_test_scaled = np.array(X_test_scaled)
            
            if database == 'IEMOCAP':
                cv = StratifiedGroupKFold(n_splits=4, random_state=13, shuffle=True)
            else:
                cv = StratifiedGroupKFold(n_splits=5, random_state=13, shuffle=True) 
            for classifier in dict_classifiers:
                for resample in ['None']: # + list(dict_resample.keys()): 

                    if resample == 'None':
                        params = transform_dict(dict_parameters[classifier])
                        grid_search = GridSearchCV(estimator=dict_classifiers[classifier],
                                                   param_grid=params,
                                                   scoring=scoring,
                                                   cv=cv,
                                                   refit='auc')
                    else:
                        pipeline = Pipeline(steps = [[resample, dict_resample[resample]],
                                                     [classifier, dict_classifiers[classifier]]])

                        grid_search = GridSearchCV(estimator=pipeline,
                                                   param_grid=dict_parameters[classifier],
                                                   scoring=scoring,
                                                   cv=cv,
                                                   refit='auc', error_score="raise")
                    grid_search.fit(X=X_develop_scaled, y=y_develop, groups=groups_develop)
                    
                    cv_mean_WAcc = grid_search.cv_results_['mean_test_WAcc'][grid_search.best_index_]
                    cv_std_WAcc = grid_search.cv_results_['std_test_WAcc'][grid_search.best_index_]
                    cv_mean_UAcc = grid_search.cv_results_['mean_test_UAcc'][grid_search.best_index_]
                    cv_std_UAcc = grid_search.cv_results_['std_test_UAcc'][grid_search.best_index_]
                    cv_mean_kappa = grid_search.cv_results_['mean_test_kappa'][grid_search.best_index_]
                    cv_std_kappa = grid_search.cv_results_['std_test_kappa'][grid_search.best_index_]
                    cv_mean_auc = grid_search.cv_results_['mean_test_auc'][grid_search.best_index_]
                    cv_std_auc = grid_search.cv_results_['std_test_auc'][grid_search.best_index_]
                    cv_mean_f1 = grid_search.cv_results_['mean_test_f1'][grid_search.best_index_]
                    cv_std_f1 = grid_search.cv_results_['std_test_f1'][grid_search.best_index_]
                    cv_mean_precision = grid_search.cv_results_['mean_test_precision'][grid_search.best_index_]
                    cv_std_precision = grid_search.cv_results_['std_test_precision'][grid_search.best_index_]
                    cv_mean_recall = grid_search.cv_results_['mean_test_recall'][grid_search.best_index_]
                    cv_std_recall = grid_search.cv_results_['std_test_recall'][grid_search.best_index_]
#                     cv_mean_sensitivity = grid_search.cv_results_['mean_test_sensitivity'][grid_search.best_index_]
#                     cv_std_sensitivity = grid_search.cv_results_['std_test_sensitivity'][grid_search.best_index_]
#                     cv_mean_specificity = grid_search.cv_results_['mean_test_specificity'][grid_search.best_index_]
#                     cv_std_specificity = grid_search.cv_results_['std_test_specificity'][grid_search.best_index_]
    
                    if resample == 'None':
                        dict_params = grid_search.best_params_
                    else:
                        dict_params = transform_dict(grid_search.best_params_)

                    if classifier == 'SVC':
                        clf_test = SVC(**dict_params)
                    elif classifier == 'KNN':
                        clf_test = KNN(**dict_params)
                    elif classifier == 'LogReg':
                        clf_test = LogisticRegression(**dict_params)

                    clf_test.fit(X_develop_scaled, y_develop) 
                    y_true, y_pred = y_test, clf_test.predict(X_test_scaled)
                    y_pred_prob = clf_test.predict_proba(X_test_scaled)
                    
                    test_WAcc = accuracy_score(y_true, y_pred)
                    test_UAcc = balanced_accuracy_score(y_true, y_pred)
                    test_kappa = cohen_kappa_score(y_true, y_pred)
                    test_auc = roc_auc_score(y_true, y_pred_prob, multi_class = 'ovr')
                    test_f1 = f1_score(y_true, y_pred, average = 'weighted')
                    test_precision = precision_score(y_true, y_pred, average = 'weighted')
                    test_recall = recall_score(y_true, y_pred, average = 'weighted')
#                     sensitivity = recall_score(y_true,y_pred,pos_label=1)
#                     specificity = recall_score(y_true,y_pred,pos_label=0)    
                    cm_test = confusion_matrix(y_true, y_pred)
                    cm_test = cm_test.astype('float') / cm_test.sum(axis=1)[:, np.newaxis]
                    cm_string = ''
                    for i in range(len(cm_test)):
                        cm_string = cm_string + str(cm_test[i]) + ' '
    
                    row = {'label':prediction,
                           '% feat': percent_features,
                           'resample': resample,
                           'classifier':classifier,
                           'params':grid_search.best_params_,
                           'n-feat': feat_sel,'features':selected_features,
                           'shape train':X_develop.shape,'shape sel':X_develop_scaled.shape,'shape test':X_test_scaled.shape,
                           'cv_mean_WAcc':cv_mean_WAcc,'cv_std_WAcc':cv_std_WAcc,
                           'cv_mean_UAcc':cv_mean_UAcc,'cv_std_UAcc':cv_std_UAcc,
                           'cv_mean_kappa':cv_mean_kappa,'cv_std_kappa':cv_std_kappa,
                           'cv_mean_auc':cv_mean_auc,'cv_std_auc':cv_std_auc,
                           'cv_mean_f1':cv_mean_f1,'cv_std_f1':cv_std_f1,
                           'cv_mean_precision':cv_mean_precision,'cv_std_precision':cv_std_precision,
                           'cv_mean_recall':cv_mean_recall,'cv_std_recall':cv_std_recall,
#                            'cv_mean_sensitivity':cv_mean_sensitivity,'cv_std_sensitivity':cv_std_sensitivity,
#                            'cv_mean_specificity':cv_mean_specificity,'cv_std_specificity':cv_std_specificity,
                           'test_WAcc':test_WAcc,
                           'test_UAcc':test_UAcc,
                           'test_kappa':test_kappa,
                           'test_auc':test_auc,
                           'test_f1':test_f1,
                           'test_precision':test_precision,
                           'test_recall':test_recall,
#                            'test_sensitivity':test_sensitivity
#                            'test_specificity':test_specificity
                           'test_cm': cm_string}
                    results.append(row)
                    if save:
                        with open(path_save, 'a', newline='') as f_object:
                            dictwriter_object = DictWriter(f_object, fieldnames=list(row.keys()))
                            dictwriter_object.writerow(row)
                            f_object.close()
                            
                        # Save groups_test y_true, y_pred del test
                        folder = path + prediction + '/'
                        makedir(folder)
                        df_predictions = pd.DataFrame({'file': df_test['file'].values,
                                                        'subject': groups_test,
                                                        'y_true': y_true,
                                                        'y_pred': y_pred})
                        df_predictions.to_csv(folder + 'feat'+str(percent_features)+'_resample'+str(resample)+'_classsifier'+classifier+'.csv',index=False)
                    
            print('Done!')

    toc = time.time()
    print('Duration:',round(toc-tic,2)/60,'min')
    
    df_resultados_concat = pd.DataFrame.from_records(results)
    
    return df_resultados_concat

def set_seed(seed_value=13):
    """Set seed for reproducibility.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
#     torch.manual_seed(seed_value)
#     torch.cuda.manual_seed_all(seed_value)

def makedir(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

## RAVDESS

In [106]:
database = 'RAVDESS'
labels = ['valence','arousal','emotion_code']
df_resultados = pipeline_classic_models(database, labels, path_main = 'results_final/', save = True)
# Duration: 15 min

[INFO] Loading data.. (% features keep = 0.25 )  valence
Development: (1140, 88) Counter({0: 608, 2: 304, 1: 228})
Test: (300, 88) Counter({0: 160, 2: 80, 1: 60})
Done!
[INFO] Loading data.. (% features keep = 0.5 )  valence
Development: (1140, 88) Counter({0: 608, 2: 304, 1: 228})
Test: (300, 88) Counter({0: 160, 2: 80, 1: 60})
Done!
[INFO] Loading data.. (% features keep = 0.75 )  valence
Development: (1140, 88) Counter({0: 608, 2: 304, 1: 228})
Test: (300, 88) Counter({0: 160, 2: 80, 1: 60})
Done!
[INFO] Loading data.. (% features keep = 0.25 )  arousal
Development: (1140, 88) Counter({2: 608, 0: 304, 1: 228})
Test: (300, 88) Counter({2: 160, 0: 80, 1: 60})
Done!
[INFO] Loading data.. (% features keep = 0.5 )  arousal
Development: (1140, 88) Counter({2: 608, 0: 304, 1: 228})
Test: (300, 88) Counter({2: 160, 0: 80, 1: 60})
Done!
[INFO] Loading data.. (% features keep = 0.75 )  arousal
Development: (1140, 88) Counter({2: 608, 0: 304, 1: 228})
Test: (300, 88) Counter({2: 160, 0: 80, 1:

## VOSOME

In [8]:
database = 'VOSOME'
labels = ['valence_3r', 'arousal_3r', 
          'valence_psy', 'arousal_psy',
          'valencia_combined_v2', 'arousal_combined_v2',
          'emotion4']
df_resultados = pipeline_classic_models(database, labels, path_main = 'results_final/', save = True)
# Duration: 23 min

[INFO] Loading data.. (% features keep = 0.25 )  valence_3r
Development: (801, 88) Counter({1: 363, 2: 219, 0: 219})
Test: (192, 88) Counter({1: 70, 2: 66, 0: 56})
Done!
[INFO] Loading data.. (% features keep = 0.5 )  valence_3r
Development: (801, 88) Counter({1: 363, 2: 219, 0: 219})
Test: (192, 88) Counter({1: 70, 2: 66, 0: 56})
Done!
[INFO] Loading data.. (% features keep = 0.75 )  valence_3r
Development: (801, 88) Counter({1: 363, 2: 219, 0: 219})
Test: (192, 88) Counter({1: 70, 2: 66, 0: 56})
Done!
[INFO] Loading data.. (% features keep = 0.25 )  arousal_3r
Development: (731, 88) Counter({2: 411, 1: 244, 0: 76})
Test: (192, 88) Counter({2: 128, 1: 51, 0: 13})


  _warn_prf(average, modifier, msg_start, len(result))


Done!
[INFO] Loading data.. (% features keep = 0.5 )  arousal_3r
Development: (731, 88) Counter({2: 411, 1: 244, 0: 76})
Test: (192, 88) Counter({2: 128, 1: 51, 0: 13})
Done!
[INFO] Loading data.. (% features keep = 0.75 )  arousal_3r
Development: (731, 88) Counter({2: 411, 1: 244, 0: 76})
Test: (192, 88) Counter({2: 128, 1: 51, 0: 13})


  _warn_prf(average, modifier, msg_start, len(result))


Done!
[INFO] Loading data.. (% features keep = 0.25 )  valence_psy
Development: (807, 88) Counter({2: 309, 0: 251, 1: 247})
Test: (192, 88) Counter({2: 87, 0: 59, 1: 46})
Done!
[INFO] Loading data.. (% features keep = 0.5 )  valence_psy
Development: (807, 88) Counter({2: 309, 0: 251, 1: 247})
Test: (192, 88) Counter({2: 87, 0: 59, 1: 46})
Done!
[INFO] Loading data.. (% features keep = 0.75 )  valence_psy
Development: (807, 88) Counter({2: 309, 0: 251, 1: 247})
Test: (192, 88) Counter({2: 87, 0: 59, 1: 46})
Done!
[INFO] Loading data.. (% features keep = 0.25 )  arousal_psy
Development: (807, 88) Counter({1: 347, 0: 265, 2: 195})
Test: (192, 88) Counter({2: 82, 1: 76, 0: 34})


  _warn_prf(average, modifier, msg_start, len(result))


Done!
[INFO] Loading data.. (% features keep = 0.5 )  arousal_psy
Development: (807, 88) Counter({1: 347, 0: 265, 2: 195})
Test: (192, 88) Counter({2: 82, 1: 76, 0: 34})
Done!
[INFO] Loading data.. (% features keep = 0.75 )  arousal_psy
Development: (807, 88) Counter({1: 347, 0: 265, 2: 195})
Test: (192, 88) Counter({2: 82, 1: 76, 0: 34})


  _warn_prf(average, modifier, msg_start, len(result))


Done!
[INFO] Loading data.. (% features keep = 0.25 )  valencia_combined_v2
Development: (807, 88) Counter({1: 305, 2: 261, 0: 241})
Test: (192, 88) Counter({2: 73, 1: 62, 0: 57})
Done!
[INFO] Loading data.. (% features keep = 0.5 )  valencia_combined_v2
Development: (807, 88) Counter({1: 305, 2: 261, 0: 241})
Test: (192, 88) Counter({2: 73, 1: 62, 0: 57})
Done!
[INFO] Loading data.. (% features keep = 0.75 )  valencia_combined_v2
Development: (807, 88) Counter({1: 305, 2: 261, 0: 241})
Test: (192, 88) Counter({2: 73, 1: 62, 0: 57})
Done!
[INFO] Loading data.. (% features keep = 0.25 )  arousal_combined_v2
Development: (807, 88) Counter({2: 331, 1: 328, 0: 148})
Test: (192, 88) Counter({2: 109, 1: 70, 0: 13})
Done!
[INFO] Loading data.. (% features keep = 0.5 )  arousal_combined_v2
Development: (807, 88) Counter({2: 331, 1: 328, 0: 148})
Test: (192, 88) Counter({2: 109, 1: 70, 0: 13})
Done!
[INFO] Loading data.. (% features keep = 0.75 )  arousal_combined_v2
Development: (807, 88) Coun

  _warn_prf(average, modifier, msg_start, len(result))


Done!
[INFO] Loading data.. (% features keep = 0.5 )  emotion4
Development: (710, 88) Counter({0: 264, 2: 184, 1: 157, 3: 105})
Test: (174, 88) Counter({0: 78, 1: 42, 2: 41, 3: 13})
Done!
[INFO] Loading data.. (% features keep = 0.75 )  emotion4
Development: (710, 88) Counter({0: 264, 2: 184, 1: 157, 3: 105})
Test: (174, 88) Counter({0: 78, 1: 42, 2: 41, 3: 13})
Done!
Duration: 22.943166666666666 min


# IEMOCAP

In [3]:
database = 'IEMOCAP'
labels = ['categories'] # 'valence','arousal', 

df_resultados = pipeline_classic_models(database, labels, path_main = 'results_final/', save = True)
# Duration = 3h valence, 2.5h arousal, 45min categories

[INFO] Loading data.. (% features keep = 0.25 )  categories
---------------- Convertir en int el 10!!
Development: (4290, 88) Counter({2: 1324, 0: 1194, 1: 933, 3: 839})
Test: (1241, 88) Counter({0: 442, 2: 384, 3: 245, 1: 170})
Done!
[INFO] Loading data.. (% features keep = 0.5 )  categories
---------------- Convertir en int el 10!!
Development: (4290, 88) Counter({2: 1324, 0: 1194, 1: 933, 3: 839})
Test: (1241, 88) Counter({0: 442, 2: 384, 3: 245, 1: 170})
Done!
[INFO] Loading data.. (% features keep = 0.75 )  categories
---------------- Convertir en int el 10!!
Development: (4290, 88) Counter({2: 1324, 0: 1194, 1: 933, 3: 839})
Test: (1241, 88) Counter({0: 442, 2: 384, 3: 245, 1: 170})
Done!
Duration: 43.339000000000006 min
