---



# <font color='#ECA702'>**Análisis Multimodal de Síntomas en la Enfermedad de Parkinson**</font>

In [2]:
#@title **Importar librerías**

# Manipulación de datos
import os
import cv2
import numpy as np
import librosa
import pandas as pd

# Utils
from tqdm import tqdm

In [2]:
#@title **Cargamos los datos desde Drive**

from google.colab import drive
drive.mount('/content/drive')
os.chdir('/content/drive/My Drive/data_parkinson')
print(os.getcwd())

Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1XRruCPRodR4OhRCRZYKsyODqa_GuBkfC/data_parkinson


# <font color='#4C5FDA'> **1. Creación del DataFrame**</color> <a name="tema2">


In [3]:
data = pd.read_csv("../data/data.csv", index_col='patient')

def string2array(string: str):
  """ Convierte un string con datos a numpy array """
  string = string.strip('[]')
  return np.fromstring(string, sep=',')

data['flatten_log_mel_spectogram'] = data['flatten_log_mel_spectogram'].apply(string2array)
data.head()

Unnamed: 0_level_0,audio_path,frames_path,label,flatten_log_mel_spectogram
patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C2,AudioVisualData_v7/Control/C2/Vowels/audio/C2-...,AudioVisualData_v7/Control/C2/Vowels/frames/C2...,0.0,"[-60.66636657714844, -19.715749740600586, -24...."
C2,AudioVisualData_v7/Control/C2/Vowels/audio/C2-...,AudioVisualData_v7/Control/C2/Vowels/frames/C2...,0.0,"[-61.8838005065918, -28.279430389404297, -28.8..."
C2,AudioVisualData_v7/Control/C2/Vowels/audio/C2-...,AudioVisualData_v7/Control/C2/Vowels/frames/C2...,0.0,"[-59.091400146484375, -25.001014709472656, -28..."
C2,AudioVisualData_v7/Control/C2/Vowels/audio/C2-...,AudioVisualData_v7/Control/C2/Vowels/frames/C2...,0.0,"[-60.556915283203125, -20.145050048828125, -20..."
C2,AudioVisualData_v7/Control/C2/Vowels/audio/C2-...,AudioVisualData_v7/Control/C2/Vowels/frames/C2...,0.0,"[-69.82708740234375, -20.313282012939453, -26...."


In [4]:
data['flatten_log_mel_spectogram'][0].shape

(900,)

In [5]:
#@title **Función para obtener una muestra**

def get_sample(tipo:float):

  sample = data[(data['label']==tipo)].sample(1)

  if tipo == 1.0:
    tipo = 'Parkinson'
  else:
    tipo = 'Control'
  sample = [sample['audio_path'].item(), sample['frames_path'].item(), tipo]
  return sample

#  <font color='#4C5FDA'> **2. Preprocesamiento de los datos 🧐**</color> <a name="tema7">


## <font color='#EB9A54'>**2.1 Extracción de características para la hipomimia**</font> <a name = "tema8">

### <font color="52F17F">**Cantidad promedio de frames por vídeo**</font>

In [5]:
def get_mean_video_duration(df):
    frames = []

    for i, record in tqdm(df.iterrows(), total=df.shape[0]):
        try:
            n_frames = len(os.listdir(record['frames_path']))
            frames.append(n_frames)

        except EOFError:
            print(f'Error reading video: {i}')
            pass

    return sum(frames) / len(frames)

In [6]:
FRAMES_PROMEDIO = get_mean_video_duration(data)

  0%|          | 0/1091 [00:00<?, ?it/s]

100%|██████████| 1091/1091 [00:00<00:00, 6358.68it/s]


In [7]:
print(f"{FRAMES_PROMEDIO*0.1:.0f}")

14


### <font color="52F17F">**Ejemplo con un sample y función de preprocesamiento**</font>

In [5]:
"""
Constante sacada de la media de cantidad de frames de todos los vídeos multiplicada por 0.1
para tener el 10%
"""

FRAMES_PROMEDIO = 14

In [6]:
def preprocess_frame(frame_path: str):
  frame = cv2.imread(frame_path, cv2.IMREAD_GRAYSCALE) # Leemos la imagen desde la ruta en escala de grises
  # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # La pasamos de BGR a RGB
  frame = cv2.resize(frame, (112, 112)) # Resize a 112x112
  frame = frame / 255. # Normalizamos entre 0 y 1
  return frame.flatten()
  # return frame

Cuando aplicamos concatenate(), todos los valores de los píxeles de los frames se concatenan en una única secuencia lineal. El orden es: primero todos los valores del primer frame, seguidos por todos los valores del segundo frame, y así sucesivamente.

In [7]:
def preprocess_video(video_path: str, n_frames = FRAMES_PROMEDIO):
  frames = []
  for frame in sorted(os.listdir(video_path))[:n_frames]:
    frames.append(preprocess_frame(f"{video_path}/{frame}"))
  return np.concatenate(frames)
  # return frames

In [8]:
sample_preprocessed_video = get_sample(1.0)[1] # Obtengo la ruta de un vídeo
sample_preprocessed_video = preprocess_video(sample_preprocessed_video) # Aplico la función de preprocesamiento

In [9]:
sample_preprocessed_video.shape #14×112×112×1=175616

(175616,)

### <font color="52F17F">**Aplicar el preprocesamiento a todos los vídeos**</font>

In [10]:
# Preprocesar todos los audios y almacenar los espectrogramas mel
data['flatten_frame_sequence'] = data['frames_path'].apply(preprocess_video)

# Verificar los datos
data.head()

Unnamed: 0_level_0,audio_path,frames_path,label,flatten_log_mel_spectogram,flatten_frame_sequence
patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
C2,AudioVisualData_v7/Control/C2/Vowels/audio/C2-...,AudioVisualData_v7/Control/C2/Vowels/frames/C2...,0.0,"[-60.66636657714844, -19.715749740600586, -24....","[0.44313725490196076, 0.4470588235294118, 0.44..."
C2,AudioVisualData_v7/Control/C2/Vowels/audio/C2-...,AudioVisualData_v7/Control/C2/Vowels/frames/C2...,0.0,"[-61.8838005065918, -28.279430389404297, -28.8...","[0.4627450980392157, 0.4627450980392157, 0.458..."
C2,AudioVisualData_v7/Control/C2/Vowels/audio/C2-...,AudioVisualData_v7/Control/C2/Vowels/frames/C2...,0.0,"[-59.091400146484375, -25.001014709472656, -28...","[0.4666666666666667, 0.4666666666666667, 0.474..."
C2,AudioVisualData_v7/Control/C2/Vowels/audio/C2-...,AudioVisualData_v7/Control/C2/Vowels/frames/C2...,0.0,"[-60.556915283203125, -20.145050048828125, -20...","[0.44313725490196076, 0.44313725490196076, 0.4..."
C2,AudioVisualData_v7/Control/C2/Vowels/audio/C2-...,AudioVisualData_v7/Control/C2/Vowels/frames/C2...,0.0,"[-69.82708740234375, -20.313282012939453, -26....","[0.4627450980392157, 0.45098039215686275, 0.45..."


In [11]:
data['flatten_frame_sequence'][0].shape

(175616,)

## <font color='#EB9A54'>**2.2 Fusión de características**</font> <a name = "tema8">

### <font color="52F17F">**Ejemplo con un sample y función de preprocesamiento**</font>

In [12]:
def preprocess_and_fuse(flatten_frame_sequence, flatten_log_mel_spectogram):
    """
    Esta función concatena los frames aplanados y el espectrograma mel aplanado.

    Args:
        flatten_frame_sequence: array de frames aplanados.
        flatten_log_mel_spectogram: array de espectrograma mel aplanado.

    Returns:
        fused_features: Un array que contiene la concatenación de los frames y el espectrograma mel.
    """
    # Concatenar los frames y el espectrograma mel
    return np.concatenate([flatten_frame_sequence, flatten_log_mel_spectogram])

In [13]:
random_idx = np.random.choice(range(1, len(data)))
sample_fusioned = data[['flatten_frame_sequence', 'flatten_log_mel_spectogram']].iloc[random_idx].values
sample_fusioned.shape

In [15]:
sample_fusioned = preprocess_and_fuse(sample_fusioned[0], sample_fusioned[1])
sample_fusioned.shape # Debería ser (14 * 112 * 112 + 900,)

### <font color="52F17F">**Aplicar la fusión a todos los registros**</font>

In [17]:
# Fusionar todas las características aplanadas
data['fused_flattened_features'] = data.apply(
    lambda row: preprocess_and_fuse(row['flatten_frame_sequence'], row['flatten_log_mel_spectogram']),
    axis=1
)

# Verificar los datos
data.head()

Unnamed: 0_level_0,audio_path,frames_path,label,flatten_log_mel_spectogram,flatten_frame_sequence,fused_flattened_features
patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
C2,AudioVisualData_v7/Control/C2/Vowels/audio/C2-...,AudioVisualData_v7/Control/C2/Vowels/frames/C2...,0.0,"[-60.66636657714844, -19.715749740600586, -24....","[0.44313725490196076, 0.4470588235294118, 0.44...","[0.44313725490196076, 0.4470588235294118, 0.44..."
C2,AudioVisualData_v7/Control/C2/Vowels/audio/C2-...,AudioVisualData_v7/Control/C2/Vowels/frames/C2...,0.0,"[-61.8838005065918, -28.279430389404297, -28.8...","[0.4627450980392157, 0.4627450980392157, 0.458...","[0.4627450980392157, 0.4627450980392157, 0.458..."
C2,AudioVisualData_v7/Control/C2/Vowels/audio/C2-...,AudioVisualData_v7/Control/C2/Vowels/frames/C2...,0.0,"[-59.091400146484375, -25.001014709472656, -28...","[0.4666666666666667, 0.4666666666666667, 0.474...","[0.4666666666666667, 0.4666666666666667, 0.474..."
C2,AudioVisualData_v7/Control/C2/Vowels/audio/C2-...,AudioVisualData_v7/Control/C2/Vowels/frames/C2...,0.0,"[-60.556915283203125, -20.145050048828125, -20...","[0.44313725490196076, 0.44313725490196076, 0.4...","[0.44313725490196076, 0.44313725490196076, 0.4..."
C2,AudioVisualData_v7/Control/C2/Vowels/audio/C2-...,AudioVisualData_v7/Control/C2/Vowels/frames/C2...,0.0,"[-69.82708740234375, -20.313282012939453, -26....","[0.4627450980392157, 0.45098039215686275, 0.45...","[0.4627450980392157, 0.45098039215686275, 0.45..."


In [18]:
data['fused_flattened_features'][0].shape

(176516,)

# <font color='#4C5FDA'> **3. State-of-the-art ML 🦾**</color> <a name="tema8">

In [19]:
# Sklearn for datasets.
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold

# Sklearn metrics.
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score)

# Sklearn models
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
# from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [20]:
models = [
    ('GNB', GaussianNB()),
    ('DT DEFAULT', DecisionTreeClassifier()),
    ('DT2', DecisionTreeClassifier(max_depth=2)),
    ('DT40', DecisionTreeClassifier(max_depth=40)),
    ('RF50', RandomForestClassifier(n_estimators=50)),
    ('RF100', RandomForestClassifier(n_estimators=100)),
    ('RF200', RandomForestClassifier(n_estimators=200)),
    ('SVM LINEAR', SVC(kernel='linear')),
    ('SVM RBF', SVC(kernel='rbf')),
    ('SVM POLY DEGREE 3', SVC(kernel='poly')),
    ('LR', LogisticRegression(max_iter=2000)),
]

<font color="52F17F">**Escalando las características con sklearn Standard Scaler**</font>

Con esto conseguimos [estandarizar](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html) las características eliminando la media (es decir, $\mu = 0$) y escalando a la varianza unitaria (es decir $\sigma = 1$).

<center>
  <img src="https://imgs.search.brave.com/6nO8SCp_NlJdLQRnJRJDgmmNWZjgzK-BvBoRgq6HZfw/rs:fit:860:0:0/g:ce/aHR0cHM6Ly9taXJv/Lm1lZGl1bS5jb20v/bWF4LzE0MDAvMSpH/QWNFajM3c21DT0Na/TXJxcC1yampBLnBu/Zw" width="600" height="400">
</center>

The fit(data) method is used to compute the mean and std dev for a given feature to be used further for scaling. The transform(data) method is used to perform scaling using mean and std dev calculated using the . fit() method. The fit_transform() method does both fits and transform.

<font color="52F17F">**Partición de los datos**</font>

Dado que tenemos 14 pacientes, si hacemos 5 divisiones, cada división intentará mantener aproximadamente el mismo número de pacientes en cada conjunto. Cada división entrena con 11 pacientes y prueba con 3 pacientes. Esto asegura que los datos de un mismo paciente no se mezclen entre los conjuntos de entrenamiento y prueba. [Fuente](https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators-for-grouped-data).


In [82]:
# Crear el objeto GroupKFold
group_kfold = GroupKFold(n_splits=7)
X = np.ones((1091, 1))
y = np.ones((1091, 1))

groups = data.index.values

# Realizar la validación cruzada por grupos
for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups), 1):
    # X_train, X_test = X[train_index], X[test_index]
    # y_train, y_test = y[train_index], y[test_index]
    train_groups = groups[train_index]
    test_groups = groups[test_index]

    print(f"División {i}:")
    print("  Pacientes en entrenamiento:", np.unique(train_groups))
    print("  Pacientes en prueba:", np.unique(test_groups))
    print("  Número de pacientes en entrenamiento:", len(np.unique(train_groups)))
    print("  Número de pacientes en prueba:", len(np.unique(test_groups)))
    print()

División 1:
  Pacientes en entrenamiento: ['C0' 'C1' 'C2' 'C3' 'C4' 'C6' 'P0' 'P1' 'P4' 'P5' 'P6' 'P7']
  Pacientes en prueba: ['C5' 'P8']
  Número de pacientes en entrenamiento: 12
  Número de pacientes en prueba: 2

División 2:
  Pacientes en entrenamiento: ['C0' 'C1' 'C2' 'C3' 'C5' 'C6' 'P0' 'P1' 'P4' 'P5' 'P6' 'P8']
  Pacientes en prueba: ['C4' 'P7']
  Número de pacientes en entrenamiento: 12
  Número de pacientes en prueba: 2

División 3:
  Pacientes en entrenamiento: ['C0' 'C1' 'C2' 'C4' 'C5' 'C6' 'P0' 'P1' 'P4' 'P5' 'P7' 'P8']
  Pacientes en prueba: ['C3' 'P6']
  Número de pacientes en entrenamiento: 12
  Número de pacientes en prueba: 2

División 4:
  Pacientes en entrenamiento: ['C0' 'C1' 'C3' 'C4' 'C5' 'C6' 'P0' 'P1' 'P4' 'P6' 'P7' 'P8']
  Pacientes en prueba: ['C2' 'P5']
  Número de pacientes en entrenamiento: 12
  Número de pacientes en prueba: 2

División 5:
  Pacientes en entrenamiento: ['C0' 'C2' 'C3' 'C4' 'C5' 'C6' 'P0' 'P1' 'P5' 'P6' 'P7' 'P8']
  Pacientes en prueba: [

<font color="52F17F">**Métricas de evaluación**</font>

En el caso del Parkinson, es más importante saber cuándo mi modelo identifica correctamente a una persona con Parkinson (True Positive), ya que si mi modelo indica que no tiene Parkinson y en realidad sí lo tiene (False Negative), se afecta significativamente su calidad de vida.

<center>
  <img src="https://www.tutorialexample.com/wp-content/uploads/2022/01/how-to-compute-accuracy-precision-recall-and-f1-score-in-machine-learning.png" width="600" height="400">
</center>

Por ende, la métrica que nos interesa es el <font color= "52F17F"> **recall.** </font>




## <font color='#EB9A54'>**3.1 Solo audio features**</font> <a name = "tema10">

In [89]:
X = np.array(data['flatten_log_mel_spectogram'].tolist())
y = data['label'].values

In [91]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [92]:
# Inicializar un diccionario para almacenar las métricas de evaluación
results = {name: {'accuracy': [], 'f1': [], 'precision': [], 'sensitivity': []} for name, _ in models}

# Crear el objeto GroupKFold
group_kfold = GroupKFold(n_splits=7)

groups = data.index.values

# Realizar la validación cruzada por grupos
for (train_index, test_index) in group_kfold.split(X, y, groups):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    train_groups = groups[train_index]
    test_groups = groups[test_index]

    # Entrenar y evaluar cada modelo
    for name, model in models:
        clf = model
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        sensitivity = recall_score(y_test, y_pred)

        results[name]['accuracy'].append(accuracy)
        results[name]['f1'].append(f1)
        results[name]['precision'].append(precision)
        results[name]['sensitivity'].append(sensitivity)

In [93]:
# Calcular las métricas promedio para cada modelo
avg_results = {name: {metric: np.mean(scores) for metric, scores in metrics.items()} for name, metrics in results.items()}

# Mostrar resultados promedio
for name, metrics in avg_results.items():
    print(f"{11*'='} Resultados para {name} {11*'='}")
    for metric, score in metrics.items():
        print(f"  {metric}: {score:.4f}")
    print()

  accuracy: 0.4765
  f1: 0.4620
  precision: 0.4758
  sensitivity: 0.5186

  accuracy: 0.5141
  f1: 0.4925
  precision: 0.5071
  sensitivity: 0.4860

  accuracy: 0.5022
  f1: 0.3943
  precision: 0.4901
  sensitivity: 0.4181

  accuracy: 0.5206
  f1: 0.5004
  precision: 0.5149
  sensitivity: 0.4951

  accuracy: 0.5407
  f1: 0.4600
  precision: 0.5164
  sensitivity: 0.4546

  accuracy: 0.5581
  f1: 0.4799
  precision: 0.5366
  sensitivity: 0.4728

  accuracy: 0.5379
  f1: 0.4439
  precision: 0.5131
  sensitivity: 0.4398

  accuracy: 0.5087
  f1: 0.4881
  precision: 0.4973
  sensitivity: 0.4861

  accuracy: 0.5773
  f1: 0.4470
  precision: 0.5688
  sensitivity: 0.4124

  accuracy: 0.5490
  f1: 0.2690
  precision: 0.6482
  sensitivity: 0.1797

  accuracy: 0.5105
  f1: 0.4796
  precision: 0.4957
  sensitivity: 0.4732



## <font color='#EB9A54'>**3.2 Solo video features**</font> <a name = "tema9">

In [18]:
X = np.array(data['flatten_frame_sequence'].tolist())
y = data['label'].values

In [19]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [20]:
# Inicializar un diccionario para almacenar las métricas de evaluación
results = {name: {'accuracy': [], 'f1': [], 'precision': [], 'sensitivity': []} for name, _ in models}

# Crear el objeto GroupKFold
group_kfold = GroupKFold(n_splits=7)

groups = data.index.values

# Realizar la validación cruzada por grupos
for (train_index, test_index) in group_kfold.split(X, y, groups):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    train_groups = groups[train_index]
    test_groups = groups[test_index]

    # Entrenar y evaluar cada modelo
    for name, model in models:
        clf = model
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        sensitivity = recall_score(y_test, y_pred)

        results[name]['accuracy'].append(accuracy)
        results[name]['f1'].append(f1)
        results[name]['precision'].append(precision)
        results[name]['sensitivity'].append(sensitivity)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [21]:
# Calcular las métricas promedio para cada modelo
avg_results = {name: {metric: np.mean(scores) for metric, scores in metrics.items()} for name, metrics in results.items()}

# Mostrar resultados promedio
for name, metrics in avg_results.items():
    print(f"{11*'='} Resultados para {name} {11*'='}")
    for metric, score in metrics.items():
        print(f"  {metric}: {score:.4f}")
    print()

  accuracy: 0.4766
  f1: 0.4328
  precision: 0.4571
  sensitivity: 0.5018

  accuracy: 0.4626
  f1: 0.4515
  precision: 0.4691
  sensitivity: 0.5136

  accuracy: 0.5759
  f1: 0.5692
  precision: 0.5408
  sensitivity: 0.6538

  accuracy: 0.4369
  f1: 0.4359
  precision: 0.4409
  sensitivity: 0.4597

  accuracy: 0.4831
  f1: 0.3917
  precision: 0.4556
  sensitivity: 0.4377

  accuracy: 0.4803
  f1: 0.3924
  precision: 0.4471
  sensitivity: 0.4341

  accuracy: 0.4785
  f1: 0.3715
  precision: 0.4275
  sensitivity: 0.4194

  accuracy: 0.7545
  f1: 0.6472
  precision: 0.7879
  sensitivity: 0.6300

  accuracy: 0.7779
  f1: 0.6830
  precision: 0.7697
  sensitivity: 0.7179

  accuracy: 0.4892
  f1: 0.1119
  precision: 0.4343
  sensitivity: 0.0660

  accuracy: 0.8019
  f1: 0.7029
  precision: 0.8754
  sensitivity: 0.6490



## <font color='#EB9A54'>**3.2 Fusion of features**</font> <a name = "tema9">

In [21]:
X = np.array(data['fused_flattened_features'].tolist())
y = data['label'].values

In [22]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [25]:
# Inicializar un diccionario para almacenar las métricas de evaluación
results = {name: {'accuracy': [], 'f1': [], 'precision': [], 'sensitivity': []} for name, _ in models}

# Crear el objeto GroupKFold
group_kfold = GroupKFold(n_splits=7)

groups = data.index.values

# Realizar la validación cruzada por grupos
for (train_index, test_index) in group_kfold.split(X, y, groups):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    train_groups = groups[train_index]
    test_groups = groups[test_index]

    # Entrenar y evaluar cada modelo
    for name, model in models:
        clf = model
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        sensitivity = recall_score(y_test, y_pred)

        results[name]['accuracy'].append(accuracy)
        results[name]['f1'].append(f1)
        results[name]['precision'].append(precision)
        results[name]['sensitivity'].append(sensitivity)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [26]:
# Calcular las métricas promedio para cada modelo
avg_results = {name: {metric: np.mean(scores) for metric, scores in metrics.items()} for name, metrics in results.items()}

# Mostrar resultados promedio
for name, metrics in avg_results.items():
    print(f"{11*'='} Resultados para {name} {11*'='}")
    for metric, score in metrics.items():
        print(f"  {metric}: {score:.4f}")
    print()

  accuracy: 0.4757
  f1: 0.4310
  precision: 0.4553
  sensitivity: 0.5000

  accuracy: 0.4094
  f1: 0.3870
  precision: 0.3817
  sensitivity: 0.4231

  accuracy: 0.4441
  f1: 0.4420
  precision: 0.4164
  sensitivity: 0.5256

  accuracy: 0.4766
  f1: 0.5213
  precision: 0.4909
  sensitivity: 0.6132

  accuracy: 0.4309
  f1: 0.3134
  precision: 0.3378
  sensitivity: 0.3736

  accuracy: 0.4501
  f1: 0.3690
  precision: 0.4206
  sensitivity: 0.4139

  accuracy: 0.4565
  f1: 0.3736
  precision: 0.4078
  sensitivity: 0.4249

  accuracy: 0.7462
  f1: 0.6393
  precision: 0.7813
  sensitivity: 0.6188

  accuracy: 0.7779
  f1: 0.6831
  precision: 0.7681
  sensitivity: 0.7198

  accuracy: 0.4827
  f1: 0.1116
  precision: 0.4336
  sensitivity: 0.0660

  accuracy: 0.7945
  f1: 0.6908
  precision: 0.8659
  sensitivity: 0.6380



# <a name="tema2"> <font color='#4C5FDA'> **4. Bibliografía**</font> </a>


* GroupKFold. (s. f.). Scikit-learn. https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GroupKFold.html
* StandardScaler. (s. f.). Scikit-learn. https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html







