Environment: huggingFace_gpu

In [38]:
import time
import os
import copy
import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedGroupKFold
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import f1_score, cohen_kappa_score, confusion_matrix
from sklearn.metrics import make_scorer, accuracy_score, balanced_accuracy_score
from sklearn.metrics import roc_auc_score, recall_score, precision_score

# Functions

In [39]:
def binarizar(data):
    """ Funcion para pasar de la escala [-2,-1,0,1,2] a la escala [-1,0,1] """
    new_data = list()
    for n in data:
        if n == -2:
            label = -1
        elif n == -1:
            label = -1
        elif n == 0:
            label = 0
        elif n == 1:
            label = 1
        elif n == 2:
            label = 1
        else:
            print('Problemas en',n)
        new_data.append(label)
    return new_data

def load_partitions(dataset):
    test_partition = pd.read_csv('data/'+dataset+'/data_partitions/TrainValTest/split_testing.csv',sep="\t")
    test_partition['split'] = 'test'
    validation_partition = pd.read_csv('data/'+dataset+'/data_partitions/TrainValTest/split_validation.csv',sep="\t")
    validation_partition['split'] = 'val'
    training_partition = pd.read_csv('data/'+dataset+'/data_partitions/TrainValTest/split_training.csv',sep="\t")
    training_partition['split'] = 'train'
    partitions = pd.concat([training_partition, validation_partition, test_partition])
    partitions.reset_index(inplace=True, drop = True)
    
    if dataset == 'IEMOCAP':
        partitions['categories'] = partitions['categories'].replace(['10'], 10)
    
    if dataset == 'VOSOME':
        # Incluir archivos con id negativo (seleccionados para ser evaluados los primeros)
        partitions['file'] = np.abs(partitions.file.values) 

        # Load labels from psychologists
        partitions.rename(columns={'valence':'valence_3r','arousal':'arousal_3r'}, inplace=True)
        
        df_gold = pd.read_excel('data/VOSOME/labels/labels_Psicologas.xlsx')
        df_gold['file'] = df_gold['Filename'].str[0:-4]
        df_gold['file'] = df_gold['file'].astype('int')
        df_gold['arousal_binarize'] = binarizar(df_gold['Arousal'].values)
        df_gold['valencia_binarize'] = binarizar(df_gold['Valencia'].values)

        merged_df = partitions.merge(df_gold[['file','valencia_binarize' ,'arousal_binarize','Emotion']], on='file', how='left')
        partitions['valence_psy'] = merged_df['valencia_binarize']
        partitions['arousal_psy'] = merged_df['arousal_binarize']
        partitions['emotion'] = merged_df['Emotion']

        # Add new column with only 4 emotions
        categories_out = ['Tristeza', 'Temor','Asco']  # List of strings to be replaced
        partitions['emotion4'] = partitions['emotion'].apply(lambda x: 10 if x in categories_out else x)

        # Add extra labels (acuerdo total evaludores, combinacion raters + evaluadores)
    #     df_extra = pd.read_excel('data/'+dataset+'/labels/labels_extra.xlsx')
        df_extra = pd.read_excel('data/'+dataset+'/labels/labels_extra_v2.xlsx')
        new_partitions = partitions.merge(df_extra, on='file', how='left')    
    else:
        new_partitions = partitions.copy()
        
    return new_partitions

def load_Xygroup(approach, dataset, path_emb, split, prediction, partitions):
    
    # Borrar labels = 10
    ix = partitions.loc[partitions[prediction] == 10].index
    partitions.drop(index=ix, inplace=True)
    partitions.reset_index(inplace=True,drop=True)
    
    if 'egemaps' in approach: #  <=======   (^o^)
        df_features = pd.read_excel('data/'+dataset+'/features/eGeMAPS_functionals.xlsx')
        df_features.rename(columns={"id": "file"}, inplace=True)
    
    X = []; y = []; group = []
    if split == 'develop':
        files = partitions.loc[partitions.split != 'test']['file'].values
    else:
        files = partitions.loc[partitions.split == split]['file'].values
        
    for file in files:
        embedding = np.load(path_emb+str(file)+'.npy')
        label = partitions.loc[partitions.file == file][prediction].values[0]
        subject = partitions.loc[partitions.file == file]['sub_id'].values[0]
        if 'egemaps' in approach: #  <=======   (^o^)  
            egemaps = df_features.loc[df_features.file == file].values[0][1:]
            emb_egemaps = np.concatenate([embedding,egemaps]) # 1024 emb + 88 egemaps
            X.append(emb_egemaps)
        else:
            X.append(embedding)
        y.append(label)
        group.append(subject)
        
    X = np.array(X)
    y = np.array(y)
    group = np.array(group)
    files = np.array(files)
    
    y = y.reshape(-1, 1)
    group = group.reshape(-1, 1)
    files = files.reshape(-1, 1)
    
    return X, y, group, files

def prepare_data(approach, path_emb, dataset, label, verbose = False):
    
    # Load X, y
    partitions = load_partitions(dataset)
        
    X_develop, y_develop, group_develop, files_develop = load_Xygroup(approach, dataset, path_emb, 'develop', label, partitions)
    X_test, y_test, group_test, files_test = load_Xygroup(approach, dataset, path_emb, 'test', label, partitions)
    
    # One-hot encoding labels
    ohe = OneHotEncoder(handle_unknown='ignore',sparse=False).fit(y_develop)
    y_develop = ohe.transform(y_develop)
    y_test = ohe.transform(y_test)
    
    if verbose:
        print('Classes:',ohe.categories_) 
        print('X_develop:',X_develop.shape, 'y_develop:',y_develop.shape, 'group_develop:',group_develop.shape)
        print('X_test:',X_test.shape, 'y_test:',y_test.shape, 'group_test:',group_test.shape)
    
    return X_develop, X_test, y_develop, y_test, group_develop, group_test, files_develop, files_test

def load_architecture(architecture, ini_shape, num_labels, verbose=False):
    
    if architecture == 1:
        dropout = 0.2
        hidden_size = ini_shape # embedding dimension
        model = nn.Sequential(
            nn.Linear(hidden_size, num_labels),
            )
    if verbose:
        print(model)
    return model

def calculate_metrics(y_true, y_pred):
    
    softmax_function = nn.Softmax(dim=1)  # sin softmax
    y_pred_prob = softmax_function(y_pred)
    
    y_true = y_true.cpu()
    # y_pred = y_pred.cpu()
    y_pred_prob = y_pred_prob.cpu()
    
    WAcc = accuracy_score(torch.argmax(y_true, 1), torch.argmax(y_pred_prob, 1))
    UAcc = balanced_accuracy_score(torch.argmax(y_true, 1), torch.argmax(y_pred_prob, 1))
    kappa = cohen_kappa_score(torch.argmax(y_true, 1), torch.argmax(y_pred_prob, 1))
    auc = roc_auc_score(y_true.detach().numpy(), y_pred_prob.detach().numpy(), multi_class = 'ovr')
    f1 = f1_score(torch.argmax(y_true, 1), torch.argmax(y_pred_prob, 1), average='weighted')
    precision = precision_score(torch.argmax(y_true, 1), torch.argmax(y_pred_prob, 1), average='weighted', zero_division=0)
    recall = recall_score(torch.argmax(y_true, 1), torch.argmax(y_pred_prob, 1), average='weighted')
    
    return WAcc, UAcc, kappa, auc, f1, precision, recall

def train_model(dataset, approach, batch_size, X_develop, y_develop, group_develop, files_develop, path_save):
    
    n_epochs = 3000
    loss_fn = nn.CrossEntropyLoss()
    patience = 50  # Number of epochs to wait for improvement (ans: n_epochs+1  # DOES NOT HAVE A EFFECT!)
    early_stop_counter = 0  # Counter for early stopping 
    num_labels = y_develop.shape[1]
    
    data_CV = []
    # Cross validation 
    if dataset == 'IEMOCAP':
        cv = StratifiedGroupKFold(n_splits=4, random_state=13, shuffle=True)
    else:
        cv = StratifiedGroupKFold(n_splits=5, random_state=13, shuffle=True)
    for fold, (train_idx, val_idx) in enumerate(cv.split(X_develop, y_develop.argmax(1).reshape(-1, 1), groups=group_develop)):
#         print(f"Running Fold {fold+1}")
        
        # Instantiate model
        model = load_architecture(architecture = 1, ini_shape = X_develop.shape[1], num_labels = num_labels, verbose=False)
        optimizer = optim.Adam(model.parameters(), lr=0.001) # lr=0.001
        lr_scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=n_epochs+1) # DOES NOT HAVE A EFFECT!
        
        X_train_fold, X_val_fold = X_develop[train_idx], X_develop[val_idx]
        y_train_fold, y_val_fold = y_develop[train_idx], y_develop[val_idx]
        groups_train_fold, groups_val_fold = group_develop[train_idx], group_develop[val_idx]
        files_train_fold, files_val_fold = files_develop[train_idx], files_develop[val_idx]
        
        if 'egemaps' in approach: #  <=======   (^o^)  
            # Add feature normalization
            scalar = StandardScaler()
            scalar.fit(X_train_fold)
            X_train_fold = scalar.transform(X_train_fold)
            X_val_fold = scalar.transform(X_val_fold)
            
        # Convert numpy array to tensor
        X_train_fold = torch.tensor(X_train_fold, dtype=torch.float32)
        X_val_fold = torch.tensor(X_val_fold, dtype=torch.float32)
        y_train_fold = torch.tensor(y_train_fold, dtype=torch.float32)
        y_val_fold = torch.tensor(y_val_fold, dtype=torch.float32)
        
        # Move data to gpu
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        X_train_fold = X_train_fold.to(device)
        X_val_fold = X_val_fold.to(device)
        y_train_fold = y_train_fold.to(device)
        y_val_fold = y_val_fold.to(device)
        model = model.to(device)
        
        # TRAINING ==========================================================
        # (with validation)
        best_loss = np.inf   
        best_weights = None
        train_loss_hist = []; train_WAcc_hist = []; train_UAcc_hist = []; train_kappa_hist = []
        train_auc_hist = []; train_f1_hist = []; train_precision_hist = []; train_recall_hist = []
        val_loss_hist = []; val_WAcc_hist = []; val_UAcc_hist = []; val_kappa_hist = []
        val_auc_hist = []; val_f1_hist = []; val_precision_hist = []; val_recall_hist = []
        for epoch in range(n_epochs):
            epoch_loss = 0.0; epoch_WAcc = 0.0; epoch_UAcc = 0.0; epoch_kappa = 0.0
            epoch_auc = 0.0; epoch_f1 = 0.0; epoch_precision = 0.0; epoch_recall = 0.0

            # Set model in training mode
            model.train()

            # Run through each batch
            for i in range(len(X_train_fold) // batch_size):
                # Take a batch
                start = i * batch_size
                X_batch = X_train_fold[start:start+batch_size]
                y_batch = y_train_fold[start:start+batch_size]

                # Forward pass
                y_pred = model(X_batch)
                loss = loss_fn(y_pred, y_batch)

                # Backward pass and update weights
                optimizer.zero_grad()
                loss.backward() #torch.autograd.backward(loss)
                optimizer.step()

                # Compute metrics
                WAcc, UAcc, kappa, auc, f1, precision, recall = calculate_metrics(y_batch, y_pred)
                
                # Sum all the evaluation metrics
                epoch_loss += loss.item()
                epoch_WAcc += WAcc
                epoch_UAcc += UAcc
                epoch_kappa += kappa
                epoch_auc += auc
                epoch_f1 += f1
                epoch_precision += precision
                epoch_recall += recall
            
            # Divide the sum of the evaluation metrics by the number of batches
            epoch_loss /= (len(X_train_fold) // batch_size)
            epoch_WAcc /= (len(X_train_fold) // batch_size)
            epoch_UAcc /= (len(X_train_fold) // batch_size)
            epoch_kappa /= (len(X_train_fold) // batch_size)
            epoch_auc /= (len(X_train_fold) // batch_size)
            epoch_f1 /= (len(X_train_fold) // batch_size)
            epoch_precision /= (len(X_train_fold) // batch_size)
            epoch_recall /= (len(X_train_fold) // batch_size)

            # set model in evaluation mode and run through the validation set
            model.eval()
            with torch.no_grad():
                y_pred = model(X_val_fold)
                ce = loss_fn(y_pred, y_val_fold).item()
                WAcc, UAcc, kappa, auc, f1, precision, recall = calculate_metrics(y_val_fold, y_pred)

            train_loss_hist.append(epoch_loss)
            train_WAcc_hist.append(epoch_WAcc)
            train_UAcc_hist.append(epoch_UAcc)
            train_kappa_hist.append(epoch_kappa)
            train_auc_hist.append(epoch_auc)
            train_f1_hist.append(epoch_f1)
            train_precision_hist.append(epoch_precision)
            train_recall_hist.append(epoch_recall)
            
            val_loss_hist.append(ce)
            val_WAcc_hist.append(WAcc)
            val_UAcc_hist.append(UAcc)
            val_kappa_hist.append(kappa)
            val_auc_hist.append(auc)
            val_f1_hist.append(f1)
            val_precision_hist.append(precision)
            val_recall_hist.append(recall)

            lr_scheduler.step(ce)  # Update learning rate based on validation loss
                
            if ce < best_loss:  # If validation loss improves
                best_loss = ce
                best_WAcc = WAcc
                best_UAcc = UAcc
                best_kappa = kappa
                best_auc = auc
                best_f1 = f1
                best_precision = precision
                best_recall = recall
                best_epoch = epoch
                best_weights = copy.deepcopy(model.state_dict()) # Save weights of best model!
                early_stop_counter = 0  # Reset early stopping counter
                y_pred_save = y_pred
                
            else:
                early_stop_counter += 1
                if early_stop_counter >= patience:
                    # print("Early stopping: No improvement for {} epochs.".format(patience))
                    break
            
        # Save history from training  
        data = {'train_loss_hist': train_loss_hist,
                'train_WAcc_hist': train_WAcc_hist,
                'train_UAcc_hist': train_UAcc_hist,
                'train_kappa_hist': train_kappa_hist,
                'train_auc_hist': train_auc_hist,
                'train_f1_hist': train_f1_hist,
                'train_precision_hist': train_precision_hist,
                'train_recall_hist': train_recall_hist,
                
                'val_loss_hist': val_loss_hist,
                'val_WAcc_hist': val_WAcc_hist,
                'val_UAcc_hist': val_UAcc_hist,
                'val_kappa_hist': val_kappa_hist,
                'val_auc_hist': val_auc_hist,
                'val_f1_hist': val_f1_hist,
                'val_precision_hist': val_precision_hist,
                'val_recall_hist': val_recall_hist,
            }
        df = pd.DataFrame(data)
        df.to_csv(path_save+'training_metrics_fold'+str(fold)+'.csv', index=False)

        # Save the predictions
        softmax_function = nn.Softmax(dim=1)  # sin softmax
        y_pred_prob = softmax_function(y_pred_save)  # sin softmax
        # comprobado que: torch.argmax(y_pred_prob.to("cpu"),1).detach().numpy() == torch.argmax(y_pred.to("cpu"), 1).detach().numpy()
        df_predictions = pd.DataFrame({
                                        'file': files_val_fold.reshape(1,-1)[0],
                                        'subject': groups_val_fold.reshape(1,-1)[0],
                                        'y_true': torch.argmax(y_val_fold.to("cpu"), 1).detach().numpy(),
                                        'y_pred': torch.argmax(y_pred_save.to("cpu"), 1).detach().numpy(),
    #                                     'y_pred_prob': [str(i) for i in y_pred.cpu().detach().numpy()], # con softmax
                                        'y_pred_prob': [str(i) for i in y_pred_prob.cpu().detach().numpy()], # sin softmax
                                        })
        df_predictions.to_csv(path_save+'predictions_training_fold'+str(fold)+'.csv',index=False)
        
        # Create a dictionary to hold the metrics
        data = {'fold':fold,
                'val_epoch':best_epoch,
                'val_loss': best_loss,
                'val_WAcc': best_WAcc,
                'val_UAcc': best_UAcc,
                'val_kappa': best_kappa,
                'val_auc': best_auc,
                'val_f1': best_f1,
                'val_precision': best_precision,
                'val_recall': best_recall,
        }
        data_CV.append(data)
        
    df = pd.DataFrame.from_dict(data_CV)
    df.to_csv(path_save+'training_metrics_bestCV.csv', index=False)
    
        
def evaluate_model(approach, n_epochs, batch_size, X_develop, y_develop, X_test, y_test, group_test, files_test, path_save, verbose=False):
    
    if 'egemaps' in approach: #  <=======   (^o^)  
        # Add feature normalization
        scalar = StandardScaler()
        scalar.fit(X_develop)
        X_develop = scalar.transform(X_develop)
        X_test = scalar.transform(X_test)
        
    loss_fn = nn.CrossEntropyLoss()
    num_labels = y_develop.shape[1]
    
    # Instantiate model
    model = load_architecture(architecture = 1, ini_shape = X_develop.shape[1], num_labels = num_labels, verbose=False)
    optimizer = optim.Adam(model.parameters(), lr=0.001) # lr=0.001
    
    # Convert numpy array to tensor
    X_develop = torch.tensor(X_develop, dtype=torch.float32)
    y_develop = torch.tensor(y_develop, dtype=torch.float32)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.float32)

    # Move data to gpu
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    X_develop = X_develop.to(device)
    y_develop = y_develop.to(device)
    X_test = X_test.to(device)
    y_test = y_test.to(device)
    model = model.to(device)

    # TRAINING ==========================================================
    # (without validation)
    best_loss = np.inf   
    best_weights = None
    train_loss_hist = []; train_WAcc_hist = []; train_UAcc_hist = []; train_kappa_hist = []
    train_auc_hist = []; train_f1_hist = []; train_precision_hist = []; train_recall_hist = []
    for epoch in range(n_epochs):
        epoch_loss = 0.0; epoch_WAcc = 0.0; epoch_UAcc = 0.0; epoch_kappa = 0.0
        epoch_auc = 0.0; epoch_f1 = 0.0; epoch_precision = 0.0; epoch_recall = 0.0

        # Set model in training mode
        model.train()

        # Run through each batch
        for i in range(len(X_develop) // batch_size):
            # Take a batch
            start = i * batch_size
            X_batch = X_develop[start:start+batch_size]
            y_batch = y_develop[start:start+batch_size]

            # Forward pass
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)

            # Backward pass and update weights
            optimizer.zero_grad()
            loss.backward() #torch.autograd.backward(loss)
            optimizer.step()

            # Compute metrics
            WAcc, UAcc, kappa, auc, f1, precision, recall = calculate_metrics(y_batch, y_pred)

            # Sum all the evaluation metrics
            epoch_loss += loss.item()
            epoch_WAcc += WAcc
            epoch_UAcc += UAcc
            epoch_kappa += kappa
            epoch_auc += auc
            epoch_f1 += f1
            epoch_precision += precision
            epoch_recall += recall

        # Divide the sum of the evaluation metrics by the number of batches
        epoch_loss /= (len(X_develop) // batch_size)
        epoch_WAcc /= (len(X_develop) // batch_size)
        epoch_UAcc /= (len(X_develop) // batch_size)
        epoch_kappa /= (len(X_develop) // batch_size)
        epoch_auc /= (len(X_develop) // batch_size)
        epoch_f1 /= (len(X_develop) // batch_size)
        epoch_precision /= (len(X_develop) // batch_size)
        epoch_recall /= (len(X_develop) // batch_size)

        train_loss_hist.append(epoch_loss)
        train_WAcc_hist.append(epoch_WAcc)
        train_UAcc_hist.append(epoch_UAcc)
        train_kappa_hist.append(epoch_kappa)
        train_auc_hist.append(epoch_auc)
        train_f1_hist.append(epoch_f1)
        train_precision_hist.append(epoch_precision)
        train_recall_hist.append(epoch_recall)

    # Save history from training
    data = {'train_loss_hist': train_loss_hist,
            'train_WAcc_hist': train_WAcc_hist,
            'train_UAcc_hist': train_UAcc_hist,
            'train_kappa_hist': train_kappa_hist,
            'train_auc_hist': train_auc_hist,
            'train_f1_hist': train_f1_hist,
            'train_precision_hist': train_precision_hist,
            'train_recall_hist': train_recall_hist,
        }
    df = pd.DataFrame(data)
    df.to_csv(path_save+'training_metrics_modelTest.csv', index=False)
    
    # TESTING ==========================================================
    model.eval()
    y_pred = model(X_test)
    
    #  Compute metrics
    ce = loss_fn(y_pred, y_test)
    WAcc, UAcc, kappa, auc, f1, precision, recall = calculate_metrics(y_test, y_pred)
    cm = confusion_matrix(torch.argmax(y_test.to("cpu"), 1), torch.argmax(y_pred.to("cpu"), 1))
    
    # Create a dictionary to hold the metrics
    data = {'test_loss': [ce.cpu().detach().numpy()],
            'test_WAcc': [WAcc],
            'test_UAcc': [UAcc],
            'test_kappa': [kappa],
            'test_auc': [auc],
            'test_f1': [f1],
            'test_precision': [precision],
            'test_recall': [recall],
            'test_cm':[cm]
        }
    df = pd.DataFrame(data)
    df.to_csv(path_save+'test_metrics.csv', index=False)
    
    # Save the predictions
    softmax_function = nn.Softmax(dim=1)  # sin softmax
    y_pred_prob = softmax_function(y_pred)  # sin softmax
    # comprobado que: torch.argmax(y_pred_prob.to("cpu"),1).detach().numpy() == torch.argmax(y_pred.to("cpu"), 1).detach().numpy()
    df_predictions = pd.DataFrame({
                                    'file': files_test.reshape(1,-1)[0],
                                    'subject': group_test.reshape(1,-1)[0],
                                    'y_true': torch.argmax(y_test.to("cpu"), 1).detach().numpy(),
                                    'y_pred': torch.argmax(y_pred.to("cpu"), 1).detach().numpy(),
#                                     'y_pred_prob': [str(i) for i in y_pred.cpu().detach().numpy()], # con softmax
                                    'y_pred_prob': [str(i) for i in y_pred_prob.cpu().detach().numpy()], # sin softmax
                                    })
    df_predictions.to_csv(path_save+'predictions_test.csv',index=False)
    
    if verbose:
        print('Loss:',ce.cpu().detach().numpy())
        print('WAcc:',WAcc)
        print('UAcc:',UAcc)
        print('kappa:',kappa)
        print('AUC:',auc)
        print('F1 score:',f1)
        print('precision:',precision)
        print('recall:',recall)
        print('CM:',cm)
    
    return df_predictions

def makedir(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)
        
def set_seed(seed_value=13):
    """Set seed for reproducibility.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

# Run models

In [62]:
# EMBEDDINGS
define_epochs = {
    'RAVDESS': {
        'facebook-hubert-large-ll60k':                {'sum':100,   'max':1000,   'mean':1000},
        'facebook/wav2vec2-xls-r-300m':               {'sum':150,   'max':2000,  'mean':3000},
        'facebook/wav2vec2-large-xlsr-53':            {'sum':250,   'max':2000,  'mean':3000},
        'facebook/wav2vec2-large-robust':             {'sum':100,   'max':3000,  'mean':3000},
        'facebook/wav2vec2-large-xlsr-53-spanish':    {'sum':100,   'max':1000,  'mean':3000},
        'microsoft/unispeech-sat-large':              {'sum':100,   'max':1000,  'mean':1000},
        'speechbrain-x-vector':                                                 {'mean':150}
    },
    'VOSOME': { 
        'facebook-hubert-large-ll60k':                {'sum':100,   'max':1000,   'mean':500},
        'facebook/wav2vec2-xls-r-300m':               {'sum':150,   'max':1000,   'mean':3000},
        'facebook/wav2vec2-large-xlsr-53':            {'sum':100,   'max':1000,   'mean':3000},
        'facebook/wav2vec2-large-robust':             {'sum':100,   'max':1000,   'mean':3000},
        'facebook/wav2vec2-large-xlsr-53-spanish':    {'sum':150,   'max':1000,   'mean':1500},
        'microsoft/unispeech-sat-large':              {'sum':100,   'max':1000,   'mean':500},
        'speechbrain-x-vector':                                                  {'mean':150}
    },
    'IEMOCAP': {
            'facebook-hubert-large-ll60k':                {'sum':150,   'max':150,   'mean':150},
            'facebook/wav2vec2-xls-r-300m':               {'sum':150,   'max':1000,  'mean':3000},
            'facebook/wav2vec2-large-xlsr-53':            {'sum':150,   'max':1000,  'mean':3000},
            'facebook/wav2vec2-large-robust':             {'sum':150,   'max':3000,  'mean':3000},
            'facebook/wav2vec2-large-xlsr-53-spanish':    {'sum':150,   'max':250,  'mean':1000},
            'microsoft/unispeech-sat-large':              {'sum':150,   'max':150,  'mean':150},
            'speechbrain-x-vector':                                                 {'mean':150}
        }
}           

    
labels = {
    "RAVDESS": ['valence','arousal','emotion_code'],
    "IEMOCAP": ['valence','arousal','categories'],
    "VOSOME":  ['valence_3r', 'arousal_3r', 
                'valence_psy', 'arousal_psy',
                'valencia_combined_v2', 'arousal_combined_v2',
                'emotion4']
}

strategies = ['mean','sum','max']

model_ids = [
             "facebook-hubert-large-ll60k",
             "facebook/wav2vec2-xls-r-300m", 
             "facebook/wav2vec2-large-xlsr-53",
             "facebook/wav2vec2-large-robust",
             "facebook/wav2vec2-large-xlsr-53-spanish",
             "microsoft/unispeech-sat-large",
             "speechbrain-x-vector"
            ]

datasets = ['RAVDESS','VOSOME','IEMOCAP']

approaches = ['embeddings_egemaps','embeddings'] # 'embeddings', 'embeddings30s', 'embeddings_egemaps'

In [None]:
# ===========================================
datasets = ['IEMOCAP']
labels = {"IEMOCAP": ['categories']}
# model_ids = ["microsoft/unispeech-sat-large"] # "microsoft/unispeech-sat-large"
# strategies = ['max','sum']
# approaches = ['embeddings']
# ===========================================
batch_size = 128 # 64, 256
path_folder_results = 'results_final/'

tic_all = time.time()
set_seed(seed_value=13)
for approach in approaches:
    for dataset in datasets:
        for label in labels[dataset]:
            makedir(path_folder_results+dataset+'/'+approach+'/'+ label +'/')
            for model_name in model_ids:
                makedir(path_folder_results+dataset+'/'+approach+'/'+ label+'/' + model_name.replace("/","-")+ '/')
                for emb_strategy in strategies:
                    if (model_name == "speechbrain-x-vector") & (emb_strategy == 'sum' or emb_strategy == 'max'):
                        continue
                    # Prepare params
                    path_save = path_folder_results + dataset+'/'+approach+'/'+ label+'/' + model_name.replace("/","-")+ '/' +emb_strategy+'/'
                    makedir(path_save)
                    path_emb = 'data/'+dataset+'/embeddings/' + model_name.replace("/","-") + '/audio_embeddings_'+emb_strategy+'/'
                    n_epochs = 50 if approach == 'embeddings_egemaps' else define_epochs[dataset][model_name][emb_strategy]
                    
                    # Train and test models
                    tic = time.time()
                    X_develop, X_test, y_develop, y_test, group_develop, group_test, files_develop, files_test = prepare_data(approach, path_emb, dataset, label, verbose = False)
#                     train_model(dataset, approach, batch_size, X_develop, y_develop, group_develop, files_develop, path_save)
                    df_predictions = evaluate_model(approach, n_epochs, batch_size, X_develop, y_develop, X_test, y_test, group_test, files_test, path_save, verbose=True)
                    toc = time.time()
                    print('======> ',approach,dataset,label,model_name,emb_strategy,' --> Duration:',round((toc-tic)/60,2),'min')
            
toc_all = time.time()
print('COMPLETED! Duration:',round((toc_all-tic_all)/60,2),'min')