# Etapa 2 - Paso 1: Carga y exploración multi-sujeto (WESAD)


```mermaid
    graph LR
        A[1 <br> Detecta carpetas de sujetos automáticamente]-->B[ 2 <br> Cargar y contar etiquetas por sujeto]
        B-->C[3 <br>Guardar tabla de conteos 0-7 y 1,2,3]
        C-->D[4 <br>Graficar barras apiladas 0..7 por sujeto]
        D-->E[5 <br>Gráfico normalizado para etiquetas 1,2,3]
        E-->F[6 <br>Impresión de resumen por consola]
```

In [None]:
import os
import re
import pickle
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
import csv

# 0) Configuración inicial
PATH_BASE = r"C:/Users/nat27/Desktop/Desktop/Proyectos/CienciaDatos/wesad-stress-classifier/data/WESAD"

# Excluir sujetos que no tienen datos 
EXCLUDE = {"S1", "S12"}  

# 1) Detecta carpetas de sujetos automáticamente
subject_dirs = []
if os.path.isdir(PATH_BASE):
    for name in os.listdir(PATH_BASE):
        if re.fullmatch(r"S\d+", name) and name not in EXCLUDE:
            # Confirmar que existe el .pkl
            pkl_path = os.path.join(PATH_BASE, name, f"{name}.pkl")
            if os.path.isfile(pkl_path):
                subject_dirs.append(name)

subject_dirs = sorted(subject_dirs, key=lambda s: int(s[1:]))

if not subject_dirs:
    raise FileNotFoundError(
        f"No se encontraron sujetos válidos en {PATH_BASE}. "
        "Verifica la ruta PATH_BASE o la estructura de carpetas."
    )

print(f"Sujetos detectados: {subject_dirs}")

Sujetos detectados: ['S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10', 'S11', 'S13', 'S14', 'S15', 'S16', 'S17']


In [None]:
# 2) Cargar y contar etiquetas por sujeto

all_labels = list(range(8))  # 0..7 según WESAD
labels_distribution = {}     # { 'S2': Counter({...}), ... }

for subj in subject_dirs:
    pkl_path = os.path.join(PATH_BASE, subj, f"{subj}.pkl")
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f, encoding='latin1')
    labels = data['label']
    labels_distribution[subj] = Counter(labels)


# 3) Guardar tabla de conteos (0-7) y (1,2,3)

csv_all = "labels_por_sujeto_0a7.csv"
csv_123 = "labels_por_sujeto_123.csv"

with open(csv_all, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["subject"] + [f"label_{i}" for i in all_labels] + ["total"])
    for subj in subject_dirs:
        counts = [labels_distribution[subj].get(i, 0) for i in all_labels]
        writer.writerow([subj] + counts + [sum(counts)])

with open(csv_123, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["subject", "label_1", "label_2", "label_3", "total_123"])
    for subj in subject_dirs:
        c1 = labels_distribution[subj].get(1, 0)
        c2 = labels_distribution[subj].get(2, 0)
        c3 = labels_distribution[subj].get(3, 0)
        writer.writerow([subj, c1, c2, c3, c1 + c2 + c3])

print(f"Tablas guardadas: {csv_all}, {csv_123}")

Tablas guardadas: labels_por_sujeto_0a7.csv, labels_por_sujeto_123.csv


In [None]:
# 4) Graficar barras apiladas 0..7 por sujeto

label_matrix = np.array([[labels_distribution[subj].get(i, 0) for i in all_labels]
                         for subj in subject_dirs], dtype=float)

fig, ax = plt.subplots(figsize=(12, 6))
bottom = np.zeros(len(subject_dirs))

for i, lbl in enumerate(all_labels):
    ax.bar(subject_dirs, label_matrix[:, i], bottom=bottom, label=f"Etiqueta {lbl}")
    bottom += label_matrix[:, i]

ax.set_title("Distribución de etiquetas (0–7) por sujeto - WESAD")
ax.set_xlabel("Sujeto")
ax.set_ylabel("Cantidad de muestras")
ax.legend(ncol=4, fontsize=9)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("labels_por_sujeto_0a7.png", dpi=160)
plt.close(fig)

print("Gráfico guardado: labels_por_sujeto_0a7.png")

Gráfico guardado: labels_por_sujeto_0a7.png


In [None]:
# 5) Gráfico normalizado para etiquetas 1,2,3

labels_123 = [1, 2, 3]
mat_123 = np.array([[labels_distribution[subj].get(i, 0) for i in labels_123]
                    for subj in subject_dirs], dtype=float)
totals_123 = mat_123.sum(axis=1, keepdims=True)
totals_123[totals_123 == 0] = 1.0  # evitar división por cero

mat_norm = mat_123 / totals_123

fig2, ax2 = plt.subplots(figsize=(12, 6))
bottom = np.zeros(len(subject_dirs))

for j, lbl in enumerate(labels_123):
    ax2.bar(subject_dirs, mat_norm[:, j], bottom=bottom, label=f"Etiqueta {lbl}")
    bottom += mat_norm[:, j]

ax2.set_title("Proporción de etiquetas 1 (baseline), 2 (estrés), 3 (diversión) por sujeto")
ax2.set_xlabel("Sujeto")
ax2.set_ylabel("Proporción")
ax2.legend(ncol=3, fontsize=9)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("labels_por_sujeto_123_prop.png", dpi=160)
plt.close(fig2)

print("Gráfico guardado: labels_por_sujeto_123_prop.png")

Gráfico guardado: labels_por_sujeto_123_prop.png


In [None]:
# 6) Impresión de resumen por consola

print("\nResumen rápido (etiquetas 1/2/3):")
for subj in subject_dirs:
    c1 = labels_distribution[subj].get(1, 0)
    c2 = labels_distribution[subj].get(2, 0)
    c3 = labels_distribution[subj].get(3, 0)
    total = c1 + c2 + c3
    print(f"  {subj}: 1={c1:>6} | 2={c2:>6} | 3={c3:>6} | total_123={total:>7}")


Resumen rápido (etiquetas 1/2/3):
  S2: 1=800800 | 2=430500 | 3=253400 | total_123=1484700
  S3: 1=798000 | 2=448000 | 3=262500 | total_123=1508500
  S4: 1=810601 | 2=444500 | 3=260400 | total_123=1515501
  S5: 1=838600 | 2=451500 | 3=261800 | total_123=1551900
  S6: 1=826000 | 2=455000 | 3=260400 | total_123=1541400
  S7: 1=830200 | 2=448000 | 3=260401 | total_123=1538601
  S8: 1=818300 | 2=469000 | 3=258999 | total_123=1546299
  S9: 1=826000 | 2=451500 | 3=260400 | total_123=1537900
  S10: 1=826000 | 2=507500 | 3=260400 | total_123=1593900
  S11: 1=826000 | 2=476000 | 3=257600 | total_123=1559600
  S13: 1=826001 | 2=464800 | 3=267400 | total_123=1558201
  S14: 1=826000 | 2=472500 | 3=260401 | total_123=1558901
  S15: 1=822500 | 2=480200 | 3=260400 | total_123=1563100
  S16: 1=826000 | 2=471101 | 3=257600 | total_123=1554701
  S17: 1=826700 | 2=506100 | 3=260400 | total_123=1593200
