# EEG Recordings in pediatric patients with an epilepsy diagnosis based on the 10-20 system
https://openneuro.org/datasets/ds003555/versions/1.0.1

## Librerías

In [14]:
import mne
import numpy as np
import pandas as pd
from scipy.signal import coherence, hilbert
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import scipy.io
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.cm as cm
from matplotlib import colormaps
from networkx.algorithms.community import greedy_modularity_communities, modularity
import os

## Funciones

In [15]:
# creamos una función para leer los archivos
def leer_archivos_de_folder(carpeta_path):
    eeg_files_path = []
    for root, dirs, files in os.walk(carpeta_path):
        for file in files:
            if file.endswith(".edf"):
                eeg_files_path.append(os.path.join(root, file))
    return eeg_files_path

In [16]:
#16 mujeres
lista_paths_mujeres= leer_archivos_de_folder(r'C:\Users\jumma\OneDrive\Documentos\GitHub\Neurociencias-2026-1\S03_datasets\proyecto final\task-hfo-eeg\task-hfo-eeg-mujeres')
#14 hombres
lista_paths_hombres= leer_archivos_de_folder(r'C:\Users\jumma\OneDrive\Documentos\GitHub\Neurociencias-2026-1\S03_datasets\proyecto final\task-hfo-eeg\task-hfo-eeg-hombres')

In [44]:
ch_names= ['Fp1','A2','Fp2','F7','F3','Fz','F4','F8',
 'T3','C3','Cz','C4','T4','T5','P3','Pz',
 'P4','T6','O1','A1','O2','T1','T2']
n_channels= len(ch_names)

In [29]:
# creamos una función para procesar archivos
def cargayprocesamiento(eeg_file_path, fmin=4, fmax=7, duracion_seg=90):
    raw = mne.io.read_raw_edf(eeg_file_path, preload=False)
    raw.crop(tmin=0, tmax=duracion_seg)
    raw.load_data()
    raw.apply_function(lambda x: x.astype('float64'), picks=None)
    raw.filter(1., 45., fir_design='firwin')
    return raw.get_data()

In [33]:
eeg_files_procesados_mujeres = []
for path in lista_paths_mujeres:
    eeg_files_procesados_mujeres.append(cargayprocesamiento(path, fmin=4, fmax=7, duracion_seg=60))

Extracting EDF parameters from C:\Users\jumma\OneDrive\Documentos\GitHub\Neurociencias-2026-1\S03_datasets\proyecto final\task-hfo-eeg\task-hfo-eeg-mujeres\sub-01_ses-01_task-hfo_eeg.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 61440  =      0.000 ...    60.000 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 3381 samples (3.302 s)

Extracting EDF parameters from C:\Users\jumma\OneDrive\Documentos\GitHub\Neurociencias-2026-1\S03_datasets\pr

In [34]:
eeg_files_procesados_hombres = []
for path in lista_paths_hombres:
    eeg_files_procesados_hombres.append(cargayprocesamiento(path, fmin=4, fmax=7, duracion_seg=60))

Extracting EDF parameters from C:\Users\jumma\OneDrive\Documentos\GitHub\Neurociencias-2026-1\S03_datasets\proyecto final\task-hfo-eeg\task-hfo-eeg-hombres\sub-03_ses-01_task-hfo_eeg.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 61440  =      0.000 ...    60.000 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 3381 samples (3.302 s)

Extracting EDF parameters from C:\Users\jumma\OneDrive\Documentos\GitHub\Neurociencias-2026-1\S03_datasets\pr

ValueError: second must be in 0..59, not 60

In [35]:
#Creamos una función para obtener las matrices de adyacencia
def phase_locking_value(data, ch_names):
    n_channels = data.shape[0]
    analytic_signal = hilbert(data)
    phase_data = np.angle(analytic_signal)
    plv_matrix = np.zeros((n_channels, n_channels))
    for i in range(n_channels):
        for j in range(n_channels):
            phase_diff = phase_data[i] - phase_data[j]
            plv_matrix[i, j] = np.abs(
                np.sum(np.exp(1j * phase_diff)) / phase_diff.size)
    plv_df = pd.DataFrame(plv_matrix, index=ch_names, columns=ch_names)
    np.fill_diagonal(plv_df.values, 0)
    return plv_df

In [36]:
#Creamos una funcion para hacer un stack de los df
def stacks(lista_dfs_, th, porcentaje_):
    stack = np.stack([df.values for df in lista_dfs_])
    above = stack > th
    N = len(lista_dfs_) # = 109
    count_above = above.sum(axis=0)
    min_requerido = int(np.ceil(porcentaje_*N)) 
    result_bool = count_above >= min_requerido
    result_df = pd.DataFrame(result_bool, 
                             index=lista_dfs_[0].index, 
                             columns=lista_dfs_[0].columns).astype(int)
    return result_df

In [37]:
#Definir una función para crear un dataframe con los estadísticos: th25, th50, th75, mu, var, mu_sigma
def df_estadisticos(ensayos_dfs):
    th25, th50, th75, promedio, varianza, mu_sigma = [], [], [], [], [], []
    for sujeto in ensayos_dfs:
        data = sujeto.stack().values
        th25.append(np.percentile(data, 25))
        th50.append(np.percentile(data, 50))
        th75.append(np.percentile(data, 75))
        promedio.append(np.mean(data))
        varianza.append(np.var(data))
        mu_sigma.append(np.mean(data) + np.var(data))
    columnas = np.array([th25, th50, th75, promedio, varianza, mu_sigma]).T
    estadisticos_df = pd.DataFrame(
        index=['min', 'max', 'mediana', 'median_abs_deviation'],
        columns=['th25', 'th50', 'th75', 'promedio', 'varianza', 'mu_sigma']
    )
    estadisticos_df.loc['min'] = columnas.min(axis=0)
    estadisticos_df.loc['max'] = columnas.max(axis=0)
    estadisticos_df.loc['mediana'] = np.median(columnas, axis=0)
    estadisticos_df.loc['median_abs_deviation'] = stats.median_abs_deviation(columnas, axis=0)
    return estadisticos_df

In [38]:
#Crear una función para obtener las métricas del grafo
def metricas_grafo(G):
    clust_coeff = nx.average_clustering(G)
    try:
        path_length = nx.average_shortest_path_length(G)
    except nx.NetworkXError:
        path_length = np.nan 
    G_rand = nx.gnm_random_graph(n=G.number_of_nodes(), m=G.number_of_edges())
    clust_rand = nx.average_clustering(G_rand)
    path_rand = nx.average_shortest_path_length(G_rand)
    small_world_sigma = (clust_coeff / clust_rand) / (path_length / path_rand)
    from networkx.algorithms import community
    communities = community.greedy_modularity_communities(G)
    modularity = community.modularity(G, communities)
    degree_dict = dict(G.degree())
    betwenness = nx.betweenness_centrality(G)
    betwenness = sorted(betwenness.items(), key=lambda x: x[1], reverse=True)
    global_eff = nx.global_efficiency(G)
    local_eff = nx.local_efficiency(G)
    return (clust_coeff, path_length, small_world_sigma, communities, 
            modularity, betwenness, global_eff, local_eff, degree_dict)

In [39]:
# Grafo 2D
def grafo2D(df, pos):
    ensayo_grafo = nx.from_pandas_adjacency(df)
    nx.draw_circular(ensayo_grafo, with_labels=True, font_size=7, ax=pos)
    return ensayo_grafo

In [40]:
def grafo3D(coords, Hub, pos):
    x, y, z = coords['x'].values, coords['y'].values, coords['z'].values
    
    nodes_size = [30 if idx != Hub[0] else 200 for idx in coords.index]
    pos.scatter(x, y, z, alpha=0.5, s=nodes_size)
    for idx, (x_, y_, z_) in enumerate(zip(x, y, z)):
        pos.text(x_, y_, z_, coords.index[idx], fontsize=5)
        if coords.index[idx] == Hub[0]:
            pos.text(x_, y_, z_, 'HUB', color='red', fontweight='bold', fontsize=10)

In [41]:
def grafo_comunidades(comunidades, Hub, coords, pos):
    x, y, z = coords['x'].values, coords['y'].values, coords['z'].values
    
    nodes_size = [30 if idx != Hub[0] else 200 for idx in coords.index]
    pos.scatter(x, y, z, alpha=0.5, s=nodes_size)
    for idx, (x_, y_, z_) in enumerate(zip(x, y, z)):
        pos.text(x_, y_, z_, coords.index[idx], fontsize=5)
        if coords.index[idx] == Hub[0]:
            pos.text(x_, y_, z_, 'HUB', color='red', fontweight='bold', fontsize=10)
    
    colores = ['red', 'green', 'blue', 'black', 'orange']
    for n_comunidad, comunidad in enumerate(comunidades):
        for idx in range(len(comunidad)-1):
            n1, n2 = list(comunidad)[idx], list(comunidad)[idx+1]
            x_ = [coords.loc[n1, 'x'], coords.loc[n2, 'x']]
            y_ = [coords.loc[n1, 'y'], coords.loc[n2, 'y']]
            z_ = [coords.loc[n1, 'z'], coords.loc[n2, 'z']]
            pos.plot(x_, y_, z_, linewidth=3, alpha=0.4, color=colores[n_comunidad])

## Análisis para grupo mujeres

In [45]:
#Creamos una lista de las matrices de adyacencia para el grupo de mujeres
dfs_mujeres=[]
for data in eeg_files_procesados_mujeres:
    dfs_mujeres.append(phase_locking_value(data,ch_names))

ValueError: Shape of passed values is (24, 24), indices imply (23, 23)

In [46]:
raw = mne.io.read_raw_edf(lista_paths_mujeres[0], preload=False)
print("N canales EDF:", len(raw.ch_names))
print(raw.ch_names)

Extracting EDF parameters from C:\Users\jumma\OneDrive\Documentos\GitHub\Neurociencias-2026-1\S03_datasets\proyecto final\task-hfo-eeg\task-hfo-eeg-mujeres\sub-01_ses-01_task-hfo_eeg.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
N canales EDF: 23
['Fp1', 'A2', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'T3', 'C3', 'Cz', 'C4', 'T4', 'T5', 'P3', 'Pz', 'P4', 'T6', 'O1', 'A1', 'O2', 'T1', 'T2']


In [None]:
#Análisis para grupo mujeres
ensayos_dfs = dfs_mujeres
th25, th50, th75, promedio, varianza = [], [], [], [], []
mu_sigma = []

fig, axes = plt.subplots(2, 2, figsize=(12, 7))
for sujeto in ensayos_dfs:
# Calculo de estadísticos
    data = sujeto.stack().values
    th25.append(np.percentile(data, 25))
    th50.append(np.percentile(data, 50))
    th75.append(np.percentile(data, 75))
    promedio.append(np.mean(data))
    varianza.append(np.var(data))
    mu_sigma.append(np.mean(data) + np.var(data))
    # Plots
    sns.histplot(sujeto.stack().values, ax=axes[0][0], kde=True)
    sns.ecdfplot(sujeto.stack().values, ax=axes[0][1])
axes[1][0].plot(th25, label='Percentil al 25%')
axes[1][0].plot(th50, label='Percentil al 50%')
axes[1][0].plot(th75, label='Percentil al 75%')
axes[1][0].plot(promedio, label='Media')
axes[1][0].plot(varianza, label='Varianza')
axes[1][0].plot(mu_sigma, label='mu+sigma')
axes[1][0].legend()

In [None]:
 df_estadisticos(dfs_mujeres)

In [None]:
# Análisis para valor de umbral al 60% de la población en el grupo mujeres
resultado_mujeres_df1 = stacks(dfs_mujeres, 0.2, 0.6)
resultado_mujeres_df2 = stacks(dfs_mujeres, 0.3, 0.6)
resultado_mujeres_df3 = stacks(dfs_mujeres, 0.5, 0.6)
resultado_mujeres_df4 = stacks(dfs_mujeres, 0.6, 0.6)

fig, axes = plt.subplots(2, 2, figsize=(8, 6))

sns.heatmap(resultado_mujeres_df1, ax=axes[0, 0])
sns.heatmap(resultado_mujeres_df2, ax=axes[0, 1])
sns.heatmap(resultado_mujeres_df3, ax=axes[1, 0])
sns.heatmap(resultado_mujeres_df4, ax=axes[1, 1])
axes[0, 0].set_title('umbral=0.2, poblacion>60%', fontsize=10)
axes[0, 1].set_title('umbral=0.3, poblacion>60%', fontsize=10)
axes[1, 0].set_title('umbral=0.5, poblacion>60%', fontsize=10)
axes[1, 1].set_title('umbral=0.6, poblacion>60%', fontsize=10)
fig.suptitle('Análisis para valor de umbral al 60% de la población en el grupo mujeres')
plt.tight_layout()

# AQUI YA NO CORRER

In [None]:
# Convertir de dataframe a grafo
mujeres_grafo_ = nx.from_pandas_adjacency(resultado_mujeres_df4)
# Cálculo de métricas por ensayo
(clust_coeff_m, path_length_m, small_world_sigma_m, communities_m, 
 modularity_m, betwenness_m, global_eff_m, local_eff_m, degree_dict_m) = metricas_grafo(mujeres_grafo_)
# Grafo de resultados para ensayo 3
hub = betwenness_m[0]
fig = plt.figure(figsize=(10, 8))
axes = [fig.add_subplot(2, 2, 1, projection='3d'), 
        fig.add_subplot(2, 2, 2, projection='3d'), 
        fig.add_subplot(2, 2, 3)]
grafo_comunidades(communities_m, hub, eeg_coords, axes[0])
axes[0].set_title('comunidades mujeres')
grafo3D(eeg_coords, hub, axes[1])
axes[1].set_title('hub mujeres')
ensayo_grafo = grafo2D(resultado_mujeres_df4, axes[2])
axes[2].set_title('grafo 2D mujeres')

## Análisis para grupo hombres

In [None]:
#Creamos una lista de las matrices de adyacencia para el grupo 2
dfs_hombres=[]
for data in eeg_files_procesados_hombres:
    dfs_hombres.append(phase_locking_value(data,23))

In [None]:
#Análisis para grupo hombres
ensayos_dfs = dfs_hombres
th25, th50, th75, promedio, varianza = [], [], [], [], []
mu_sigma = []

fig, axes = plt.subplots(2, 2, figsize=(12, 7))
for sujeto in ensayos_dfs:
# Calculo de estadísticos
    data = sujeto.stack().values
    th25.append(np.percentile(data, 25))
    th50.append(np.percentile(data, 50))
    th75.append(np.percentile(data, 75))
    promedio.append(np.mean(data))
    varianza.append(np.var(data))
    mu_sigma.append(np.mean(data) + np.var(data))
    # Plots
    sns.histplot(sujeto.stack().values, ax=axes[0][0], kde=True)
    sns.ecdfplot(sujeto.stack().values, ax=axes[0][1])
axes[1][0].plot(th25, label='Percentil al 25%')
axes[1][0].plot(th50, label='Percentil al 50%')
axes[1][0].plot(th75, label='Percentil al 75%')
axes[1][0].plot(promedio, label='Media')
axes[1][0].plot(varianza, label='Varianza')
axes[1][0].plot(mu_sigma, label='mu+sigma')
axes[1][0].legend()

In [None]:
df_estadisticos(dfs_hombres)

In [None]:
# Análisis para valor de umbral al 60% de la población en el grupo hombres
resultado_hombres_df1 = stacks(dfs_hombres, 0.2, 0.6)
resultado_hombres_df2 = stacks(dfs_hombres, 0.3, 0.6)
resultado_hombres_df3 = stacks(dfs_hombres, 0.5, 0.6)
resultado_hombres_df4 = stacks(dfs_hombres, 0.6, 0.6)

fig, axes = plt.subplots(2, 2, figsize=(8, 6))

sns.heatmap(resultado_hombres_df1, ax=axes[0, 0])
sns.heatmap(resultado_hombres_df2, ax=axes[0, 1])
sns.heatmap(resultado_hombres_df3, ax=axes[1, 0])
sns.heatmap(resultado_hombres_df4, ax=axes[1, 1])
axes[0, 0].set_title('umbral=0.2, poblacion>60%', fontsize=10)
axes[0, 1].set_title('umbral=0.3, poblacion>60%', fontsize=10)
axes[1, 0].set_title('umbral=0.5, poblacion>60%', fontsize=10)
axes[1, 1].set_title('umbral=0.6, poblacion>60%', fontsize=10)
fig.suptitle('Análisis para valor de umbral al 60% de la población en el grupo hombres')
plt.tight_layout()

In [None]:
# Convertir de dataframe a grafo
hombres_grafo_ = nx.from_pandas_adjacency(resultado_hombres_df4)
# Cálculo de métricas por ensayo
(clust_coeff_h, path_length_h, small_world_sigma_h, communities_h, 
 modularity_h, betwenness_h, global_eff_h, local_eff_h, degree_dict_h) = metricas_grafo(hombres_grafo_)
# Grafo de resultados para ensayo 3
hub = betwenness_h[0]
fig = plt.figure(figsize=(10, 8))
axes = [fig.add_subplot(2, 2, 1, projection='3d'), 
        fig.add_subplot(2, 2, 2, projection='3d'), 
        fig.add_subplot(2, 2, 3)]
grafo_comunidades(communities_h, hub, eeg_coords, axes[0])
axes[0].set_title('comunidades hombres')
grafo3D(eeg_coords, hub, axes[1])
axes[1].set_title('hub hombres')
ensayo_grafo = grafo2D(resultado_hombres_df4, axes[2])
axes[2].set_title('grafo 2D hombres
')