# Construcción de Grafos y Análisis de Conectividad

(Proyecto 1.b - Análisis de Grafos de Conectividad)

**Objetivo**: Construir grafos de conectividad ponderados a partir de correlaciones entre electrodos

**Pipeline**:

1. Cargar datos preprocesados
2. Calcular correlaciones por pares entre electrodos
3. Convertir correlaciones a matrices de distancia
4. Crear series temporales de grafos
5. Visualizar patrones de conectividad
6. Guardar datos de grafos para TDA

**Salida**: Series temporales de matrices de distancia listas para análisis topológico


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from pathlib import Path
from tqdm import tqdm
np.random.seed(42)



In [None]:
# Rutas
PREPROCESSED_DIR = Path("preprocessed")
GRAPHS_DIR = Path("graphs")
GRAPHS_DIR.mkdir(exist_ok=True)
(GRAPHS_DIR / "slow").mkdir(exist_ok=True)
(GRAPHS_DIR / "fast").mkdir(exist_ok=True)

# Bandas de frecuencia
FREQ_BANDS = ["delta", "theta", "alpha", "beta", "gamma"]

# Número de electrodos
N_ELECTRODES = 47

print(f"Configuración:")
print(f"Datos preprocesados: {PREPROCESSED_DIR}")
print(f"Directorio de salida: {GRAPHS_DIR}")
print(f"Bandas de frecuencia: {FREQ_BANDS}")
print(f"Número de electrodos: {N_ELECTRODES}")



### Funciones de Correlación y Matriz de Distancia


In [None]:
def compute_correlation_matrix(window_data):
    # Compute Pearson correlation
    corr_matrix = np.corrcoef(window_data)

    # Handle any NaN values (can occur if signal is constant)
    corr_matrix = np.nan_to_num(corr_matrix, nan=0.0)

    return corr_matrix


def correlation_to_distance(corr_matrix, method="euclidean"):
    """
    Convert correlation to distance. Default uses a metric distance:
    d = sqrt(2 * (1 - r)), which is Euclidean for standardized vectors.
    """
    corr_matrix = np.clip(corr_matrix, -1, 1)

    if method == "euclidean":
        distance_matrix = np.sqrt(2 * (1 - corr_matrix))
    elif method == "abs":
        # Nota: no es una distancia métrica estricta (usa |r|)
        distance_matrix = 1 - np.abs(corr_matrix)
    elif method == "standard":
        distance_matrix = 1 - corr_matrix
    elif method == "sqrt":
        distance_matrix = np.sqrt(1 - corr_matrix**2)
    else:
        raise ValueError(f"Unknown method: {method}")

    distance_matrix = np.maximum(distance_matrix, 0)
    np.fill_diagonal(distance_matrix, 0)

    return distance_matrix




test_window = np.random.randn(N_ELECTRODES, 500)  # Datos de prueba aleatorios
corr_test = compute_correlation_matrix(test_window)
dist_test = correlation_to_distance(corr_test, method="euclidean")

print(f"Forma de la matriz de correlación: {corr_test.shape}")
print(f"Rango de correlación: [{corr_test.min():.3f}, {corr_test.max():.3f}]")
print(f"Es simétrica: {np.allclose(corr_test, corr_test.T)}")
print(f"Forma de la matriz de distancia: {dist_test.shape}")
print(f"Rango de distancia: [{dist_test.min():.3f}, {dist_test.max():.3f}]")
print(f"Es simétrica: {np.allclose(dist_test, dist_test.T)}")
print(f"La diagonal es cero: {np.allclose(np.diag(dist_test), 0)}")




## Serie de tiempo de Grafos de Conectividad EEG


In [None]:
def process_file_graphs(file_dir, output_dir, freq_bands, distance_method="euclidean"):
    """
    Process one file: compute correlation and distance matrices for all windows and bands.

    Parameters:
    -----------
    file_dir : Path
        Directory containing preprocessed data for one file
    output_dir : Path
        Directory to save graph data
    freq_bands : list
        List of frequency band names
    distance_method : str
        Method to convert correlation to distance

    Returns:
    --------
    metadata : dict
        Metadata about processed graphs
    """
    file_name = file_dir.name
    file_output_dir = output_dir / file_name
    file_output_dir.mkdir(exist_ok=True)

    metadata = {"filename": file_name, "bands": {}}

    for band_name in freq_bands:
        # Load windowed data
        band_file = file_dir / f"{band_name}.npy"
        if not band_file.exists():
            continue

        windows = np.load(band_file)  # shape: (n_windows, n_electrodes, window_samples)
        n_windows = windows.shape[0]

        # Preallocate arrays for time series of matrices
        correlation_matrices = np.zeros((n_windows, N_ELECTRODES, N_ELECTRODES))
        distance_matrices = np.zeros((n_windows, N_ELECTRODES, N_ELECTRODES))

        # Process each window
        for i in range(n_windows):
            window_data = windows[i, :, :]  # shape: (n_electrodes, window_samples)

            # Compute correlation
            corr_matrix = compute_correlation_matrix(window_data)
            correlation_matrices[i, :, :] = corr_matrix

            # Convert to distance
            dist_matrix = correlation_to_distance(corr_matrix, method=distance_method)
            distance_matrices[i, :, :] = dist_matrix

        # Save matrices
        np.save(file_output_dir / f"{band_name}_correlations.npy", correlation_matrices)
        np.save(file_output_dir / f"{band_name}_distances.npy", distance_matrices)

        metadata["bands"][band_name] = {
            "n_windows": n_windows,
            "n_electrodes": N_ELECTRODES,
        }

    return metadata


# Test on one file
test_file_dir = list((PREPROCESSED_DIR / "slow").iterdir())[0]

print(f"Testing graph construction on: {test_file_dir.name}")

metadata_test = process_file_graphs(
    test_file_dir, GRAPHS_DIR / "slow", FREQ_BANDS, distance_method="euclidean"
)

print(f"\n✓ Graph construction test successful!")
print(f"  File: {metadata_test['filename']}")
print(f"  Bands processed: {list(metadata_test['bands'].keys())}")
for band, info in metadata_test["bands"].items():
    print(f"    {band}: {info['n_windows']} windows")



In [None]:
test_file_dir2 = list((PREPROCESSED_DIR / "fast").iterdir())[0]



In [None]:
def batch_process_graphs(input_dir, output_dir, freq_bands, distance_method="euclidean"):
    """
    Process all files in a directory to create graph time series.

    Parameters:
    -----------
    input_dir : Path
        Directory containing preprocessed files
    output_dir : Path
        Directory to save graph data
    freq_bands : list
        List of frequency band names
    distance_method : str
        Distance conversion method

    Returns:
    --------
    all_metadata : list
        List of metadata for all processed files
    """
    file_dirs = sorted([d for d in input_dir.iterdir() if d.is_dir()])
    all_metadata = []
    failed_files = []

    print(f"Processing {len(file_dirs)} files from {input_dir.name}...")

    for file_dir in tqdm(file_dirs, desc="Building graphs"):
        try:
            metadata = process_file_graphs(
                file_dir, output_dir, freq_bands, distance_method
            )
            all_metadata.append(metadata)
        except Exception as e:
            print(f"\nError processing {file_dir.name}: {str(e)}")
            failed_files.append(file_dir.name)

    print(f"\n✓ Graph construction complete!")
    print(f"  Successfully processed: {len(all_metadata)} files")
    print(f"  Failed: {len(failed_files)} files")

    return all_metadata, failed_files


# Process slow files
print("=" * 60)
print("BUILDING GRAPHS FOR SLOW AUDIO FILES")
print("=" * 60)
metadata_slow_graphs, failed_slow_graphs = batch_process_graphs(
    PREPROCESSED_DIR / "slow", GRAPHS_DIR / "slow", FREQ_BANDS, distance_method="euclidean"
)

# Process fast files
print("\n" + "=" * 60)
print("BUILDING GRAPHS FOR FAST AUDIO FILES")
print("=" * 60)
metadata_fast_graphs, failed_fast_graphs = batch_process_graphs(
    PREPROCESSED_DIR / "fast", GRAPHS_DIR / "fast", FREQ_BANDS, distance_method="euclidean"
)



In [None]:
# Cargar datos de grafo de ejemplo para visualización
ruta_archivo_ejemplo = GRAPHS_DIR / "slow" / test_file_dir.name

ruta_archivo_fasr = GRAPHS_DIR / "fast" / test_file_dir2.name


# Cargar correlaciones y distancias de la banda alpha
correlaciones_alpha = np.load(ruta_archivo_ejemplo / "alpha_correlations.npy")
distancias_alpha = np.load(ruta_archivo_ejemplo / "alpha_distances.npy")

correlaciones_alpha2 = np.load(ruta_archivo_fasr / "alpha_correlations.npy")
distancias_alpha2 = np.load(ruta_archivo_fasr / "alpha_distances.npy")

print("Se cargaron datos del grafo:", ruta_archivo_ejemplo.name)
print("  Correlaciones de la banda alpha shape:", correlaciones_alpha.shape)
print("  Distancias de la banda alpha shape:", distancias_alpha.shape)

print("Se cargaron datos del grafo:", ruta_archivo_fasr.name)
print("  Correlaciones de la banda alpha shape:", correlaciones_alpha2.shape)
print("  Distancias de la banda alpha shape:", distancias_alpha2.shape)

# Plot de matrices de correlaciones y distancias para una ventana
indice_ventana = 30

fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Matriz de correlaciones
im1 = axes[0].imshow(
    correlaciones_alpha[indice_ventana], cmap="RdBu_r", vmin=-1, vmax=1
)
axes[0].set_title(
    "Matriz de Correlaciones - Ventana slow banda alpha",
    fontsize=12,
)
axes[0].set_xlabel("Electrodo")
axes[0].set_ylabel("Electrodo")
plt.colorbar(im1, ax=axes[0], label="Correlación")

# Matriz fast

im2 = axes[1].imshow(
    correlaciones_alpha2[indice_ventana], cmap="RdBu_r", vmin=-1, vmax=1
)
axes[1].set_title(
    "Matriz de Correlaciones - Ventana fast banda alfa",
    fontsize=12,
)
axes[1].set_xlabel("Electrodo")
axes[1].set_ylabel("Electrodo")
plt.colorbar(im2, ax=axes[1], label="Correlación")

plt.tight_layout()
plt.show()




### Cambios en Conectividad


In [None]:
# Analyze how connectivity changes over time
# Compute average correlation strength for each window

mean_corr_per_window = []
std_corr_per_window = []

for i in range(correlaciones_alpha.shape[0]):
    # Get upper triangle (exclude diagonal)
    upper_tri = correlaciones_alpha[i][np.triu_indices(N_ELECTRODES, k=1)]
    mean_corr_per_window.append(np.mean(upper_tri))
    std_corr_per_window.append(np.std(upper_tri))

mean_corr_per_window = np.array(mean_corr_per_window)
std_corr_per_window = np.array(std_corr_per_window)

# Plot temporal evolution
fig, axes = plt.subplots(2, 1, figsize=(15, 8))

# Mean correlation over time
axes[0].plot(mean_corr_per_window, linewidth=2, color="steelblue")
axes[0].set_title("Mean Correlation Strength Over Time", fontweight="bold", fontsize=14)
axes[0].set_xlabel("Window Index")
axes[0].set_ylabel("Mean Correlation")
axes[0].grid(True, alpha=0.3)
axes[0].axhline(y=0, color="r", linestyle="--", alpha=0.5)

# Std correlation over time
axes[1].plot(std_corr_per_window, linewidth=2, color="coral")
axes[1].set_title("Correlation Variability Over Time", fontweight="bold", fontsize=14)
axes[1].set_xlabel("Window Index")
axes[1].set_ylabel("Std of Correlation")
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"Mean correlation across all windows: {mean_corr_per_window.mean():.3f}")
print(f"Std of mean correlations: {mean_corr_per_window.std():.3f}")



resumen

1. Computed pairwise Pearson correlations between all electrode pairs
2. Converted correlations to distance matrices (distance = 1 - |correlation|)
3. Created time series of graphs for each file and frequency band
4. Saved correlation and distance matrices for TDA analysis
5. Visualized connectivity patterns and temporal evolution

**Output Structure:**

```
graphs/
├── slow/
│   ├── bb01_ut01/
│   │   ├── delta_correlations.npy
│   │   ├── delta_distances.npy
│   │   ├── theta_correlations.npy
│   │   ├── theta_distances.npy
│   │   ├── alpha_correlations.npy
│   │   ├── alpha_distances.npy
│   │   ├── beta_correlations.npy
│   │   ├── beta_distances.npy
│   │   ├── gamma_correlations.npy
│   │   └── gamma_distances.npy
│   └── ...
└── fast/
    └── ...
```

- Correlation matrices are symmetric and properly normalized
- Distance matrices are non-negative with zero diagonal
- Connectivity patterns vary over time (temporal dynamics captured)
- Ready for topological analysis with Ripser
