In [12]:
# 扩展：遍历每位病人，打印通道名列表（含标准化：T8-P8-0/1 -> T8-P8）
from pathlib import Path
import pandas as pd
import re
import mne

ROOT = Path('D:\陈教授组')
DATA_DIR = ROOT / 'CHB-MIT'
assert DATA_DIR.exists(), f'未找到数据目录: {DATA_DIR}'

# 通道名标准化：将破折号统一为 '-'，并去除结尾的 -数字 后缀
_dash_regex = re.compile(r'[–—−]')
_tail_digits_regex = re.compile(r'-\d+$')

def normalize_channel_name(name: str) -> str:
	s = (name or '').strip()
	s = _dash_regex.sub('-', s)
	s = _tail_digits_regex.sub('', s)
	return s

# 读取某病人的一个代表性 EDF 获取通道名（原始与标准化）
def get_patient_channels(patient_dir: Path):
	edf_files = sorted([p for p in patient_dir.glob('*.edf') if p.is_file()])
	if not edf_files:
		return [], []
	try:
		raw = mne.io.read_raw_edf(str(edf_files[0]), preload=False, verbose='ERROR')
		ch_names = list(raw.info.get('ch_names', []))
		norm_names = [normalize_channel_name(ch) for ch in ch_names]
		return ch_names, norm_names
	except Exception as e:
		print(f'读取失败 {patient_dir.name}: {e}')
		return [], []

patient_to_channels = {}
patient_to_channels_norm = {}
for pdir in sorted([d for d in DATA_DIR.iterdir() if d.is_dir() and d.name.startswith('chb')]):
	pid = pdir.name[:5] if len(pdir.name) >= 5 and pdir.name[3:5].isdigit() else pdir.name
	raw_chs, norm_chs = get_patient_channels(pdir)
	patient_to_channels[pid] = raw_chs
	patient_to_channels_norm[pid] = norm_chs
	print(f'{pid}: 原始通道数={len(raw_chs)} | 规范化后通道数={len(set(norm_chs))}')
	print(norm_chs)

# 汇总为长表（每行一个通道），包含原始与标准化
rows = []
for pid, raw_chs in patient_to_channels.items():
	norm_chs = patient_to_channels_norm.get(pid, [])
	for ch_raw, ch_norm in zip(raw_chs, norm_chs):
		rows.append({'patient_id': pid, 'channel_raw': ch_raw, 'channel_norm': ch_norm})
per_patient_channels_df = pd.DataFrame(rows)
per_patient_channels_df


chb01: 原始通道数=23 | 规范化后通道数=22
['FP1-F7', 'F7-T7', 'T7-P7', 'P7-O1', 'FP1-F3', 'F3-C3', 'C3-P3', 'P3-O1', 'FP2-F4', 'F4-C4', 'C4-P4', 'P4-O2', 'FP2-F8', 'F8-T8', 'T8-P8', 'P8-O2', 'FZ-CZ', 'CZ-PZ', 'P7-T7', 'T7-FT9', 'FT9-FT10', 'FT10-T8', 'T8-P8']
chb02: 原始通道数=23 | 规范化后通道数=22
['FP1-F7', 'F7-T7', 'T7-P7', 'P7-O1', 'FP1-F3', 'F3-C3', 'C3-P3', 'P3-O1', 'FP2-F4', 'F4-C4', 'C4-P4', 'P4-O2', 'FP2-F8', 'F8-T8', 'T8-P8', 'P8-O2', 'FZ-CZ', 'CZ-PZ', 'P7-T7', 'T7-FT9', 'FT9-FT10', 'FT10-T8', 'T8-P8']
chb03: 原始通道数=23 | 规范化后通道数=22
['FP1-F7', 'F7-T7', 'T7-P7', 'P7-O1', 'FP1-F3', 'F3-C3', 'C3-P3', 'P3-O1', 'FP2-F4', 'F4-C4', 'C4-P4', 'P4-O2', 'FP2-F8', 'F8-T8', 'T8-P8', 'P8-O2', 'FZ-CZ', 'CZ-PZ', 'P7-T7', 'T7-FT9', 'FT9-FT10', 'FT10-T8', 'T8-P8']
chb04: 原始通道数=23 | 规范化后通道数=22
['FP1-F7', 'F7-T7', 'T7-P7', 'P7-O1', 'FP1-F3', 'F3-C3', 'C3-P3', 'P3-O1', 'FP2-F4', 'F4-C4', 'C4-P4', 'P4-O2', 'FP2-F8', 'F8-T8', 'T8-P8', 'P8-O2', 'FZ-CZ', 'CZ-PZ', 'P7-T7', 'T7-FT9', 'FT9-FT10', 'FT10-T8', 'T8-P8']
chb05: 原始通道数

Unnamed: 0,patient_id,channel_raw,channel_norm
0,chb01,FP1-F7,FP1-F7
1,chb01,F7-T7,F7-T7
2,chb01,T7-P7,T7-P7
3,chb01,P7-O1,P7-O1
4,chb01,FP1-F3,FP1-F3
...,...,...,...
593,chb24,P7-T7,P7-T7
594,chb24,T7-FT9,T7-FT9
595,chb24,FT9-FT10,FT9-FT10
596,chb24,FT10-T8,FT10-T8


In [None]:
# 遍历所有 EDF：统计每文件原始通道数，并按病人汇总（用于验证 chb15 通道数）
from collections import Counter

per_file_counts_rows = []

for pdir in sorted([d for d in DATA_DIR.iterdir() if d.is_dir() and d.name.startswith('chb')]):
	pid = pdir.name[:5] if len(pdir.name) >= 5 and pdir.name[3:5].isdigit() else pdir.name
	for edf_path in sorted(pdir.glob('*.edf')):
		try:
			raw = mne.io.read_raw_edf(str(edf_path), preload=False, verbose='ERROR')
			ch_raw = list(raw.info.get('ch_names', []))
			per_file_counts_rows.append({
				'patient_id': pid,
				'edf_file': edf_path.name,
				'channel_raw_count': len(ch_raw)
			})
		except Exception as e:
			print(f'读取失败 {edf_path.name}: {e}')

per_file_counts_df = pd.DataFrame(per_file_counts_rows).sort_values(['patient_id','edf_file']).reset_index(drop=True)
print('按文件统计的原始通道数（前20行）:')
print(per_file_counts_df.head(20).to_string(index=False))

# 按病人汇总：最大/最小/众数通道数
agg_rows = []
for pid, g in per_file_counts_df.groupby('patient_id'):
	counts = g['channel_raw_count'].tolist()
	cnt = Counter(counts)
	mode_count = cnt.most_common(1)[0][0] if cnt else None
	agg_rows.append({
		'patient_id': pid,
		'num_files': len(g),
		'channel_raw_min': int(min(counts)) if counts else None,
		'channel_raw_max': int(max(counts)) if counts else None,
		'channel_raw_mode': int(mode_count) if mode_count is not None else None,
	})

per_patient_counts_summary_df = pd.DataFrame(agg_rows).sort_values('patient_id').reset_index(drop=True)
print('\n每位病人的原始通道数汇总（最小/最大/众数）:')
print(per_patient_counts_summary_df.to_string(index=False))



按文件统计的原始通道数（前20行）:
patient_id     edf_file  channel_raw_count
     chb01 chb01_01.edf                 23
     chb01 chb01_02.edf                 23
     chb01 chb01_03.edf                 23
     chb01 chb01_04.edf                 23
     chb01 chb01_05.edf                 23
     chb01 chb01_06.edf                 23
     chb01 chb01_07.edf                 23
     chb01 chb01_08.edf                 23
     chb01 chb01_09.edf                 23
     chb01 chb01_10.edf                 23
     chb01 chb01_11.edf                 23
     chb01 chb01_12.edf                 23
     chb01 chb01_13.edf                 23
     chb01 chb01_14.edf                 23
     chb01 chb01_15.edf                 23
     chb01 chb01_16.edf                 23
     chb01 chb01_17.edf                 23
     chb01 chb01_18.edf                 23
     chb01 chb01_19.edf                 23
     chb01 chb01_20.edf                 23

每位病人的原始通道数汇总（最小/最大/众数）:
patient_id  num_files  channel_raw_min  channel_raw_m

In [None]:
# 在删除通道数不对的edf文件后，检查每一个chb文件的通道数是否统一
keyword = "Channel 1:"
for i in range(1, 25):
    channel_change = -1
    summary_file = f"D:\陈教授组\CHB-MIT\chb{i:02d}\chb{i:02d}-summary.txt"
    with open(summary_file, 'r', encoding='utf-8') as file:
        for line in file:
            if keyword in line:
                channel_change += 1
    if channel_change == 0:
        print(f"chb{i} 没有通道变化")
    else:
        print(f"chb{i} 有 {channel_change} 个通道变化")

chb1 没有通道变化
chb2 没有通道变化
chb3 没有通道变化
chb4 没有通道变化
chb5 没有通道变化
chb6 没有通道变化
chb7 没有通道变化
chb8 没有通道变化
chb9 没有通道变化
chb10 没有通道变化
chb11 没有通道变化
chb12 有 3 个通道变化
chb13 有 5 个通道变化
chb14 没有通道变化
chb15 没有通道变化
chb16 没有通道变化
chb17 没有通道变化
chb18 没有通道变化
chb19 没有通道变化
chb20 没有通道变化
chb21 没有通道变化
chb22 没有通道变化
chb23 没有通道变化
chb24 没有通道变化


In [None]:
import pathlib
import mne
from collections import Counter
import warnings

# 遍历病人编号 chb01 到 chb24
for i in range(1, 25):
    patient_dir = pathlib.Path(f"D:\\陈教授组\\CHB-MIT\\chb{i:02d}")

    if not patient_dir.exists():
        print(f"路径不存在: {patient_dir}")
        continue

    edf_files = list(patient_dir.glob("*.edf"))
    if not edf_files:
        print(f"未找到 .edf 文件: {patient_dir}")
        continue

    file_channels = []

    for file in edf_files:
        try:
            # 👇 使用 warnings.filterwarnings 临时屏蔽 MNE 的通道重复警告
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", message="Channel names are not unique", category=RuntimeWarning)
                raw = mne.io.read_raw_edf(file, preload=True)

            # ✅ 检查原始 ch_names 是否有重复
            ch_names = raw.ch_names
            counter = Counter(ch_names)
            duplicates = [ch for ch, cnt in counter.items() if cnt > 1]

            if duplicates:
                print(f"文件 {file.name} 存在重复通道: {duplicates}，正在删除多余的...")
                # 保留第一次出现，删除后续重复
                seen = set()
                drop_list = []
                for ch in ch_names:
                    if ch in seen:
                        drop_list.append(ch)
                    else:
                        seen.add(ch)
                raw.drop_channels(drop_list)
                print(f"已删除通道: {drop_list}")

            # ✅ 确保现在无重复
            assert len(raw.ch_names) == len(set(raw.ch_names)), "去重失败"

            # 添加通道集合用于一致性比较
            file_channels.append(frozenset(raw.ch_names))

        except Exception as e:
            print(f"处理文件失败 {file}: {e}")

    # === 最终判断所有文件通道是否一致 ===
    if len(set(file_channels)) == 1:
        print(f"chb{i:02d} 所有文件的通道名称一致 ✅")
    else:
        print(f"chb{i:02d} 存在通道名称不一致的文件 ❌")
        # 可选：输出差异
        unique_sets = set(file_channels)
        print(f"  共有 {len(unique_sets)} 种不同的通道组合")

In [1]:
import pathlib
import mne
import warnings

# === 在读取每个文件时局部屏蔽警告 ===
intersection = None

for i in range(1, 25):
    if i in (12, 13):
        continue

    patient_dir = pathlib.Path(f"D:\\陈教授组\\CHB-MIT\\chb{i:02d}")
    edf_files = list(patient_dir.glob("*.edf"))
    if not edf_files:
        continue

    # 局部屏蔽警告
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", 
                                message="Channel names are not unique", 
                                category=RuntimeWarning)
        raw = mne.io.read_raw_edf(edf_files[0], preload=True)

    common_ch_names = set(raw.ch_names)

    if intersection is None:
        intersection = common_ch_names
    else:
        intersection &= common_ch_names

print("所有病人共有的通道：")
print(sorted(intersection))



Extracting EDF parameters from D:\陈教授组\CHB-MIT\chb01\chb01_01.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 921599  =      0.000 ...  3599.996 secs...
Extracting EDF parameters from D:\陈教授组\CHB-MIT\chb02\chb02_01.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 921599  =      0.000 ...  3599.996 secs...
Extracting EDF parameters from D:\陈教授组\CHB-MIT\chb03\chb03_01.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 921599  =      0.000 ...  3599.996 secs...
Extracting EDF parameters from D:\陈教授组\CHB-MIT\chb04\chb04_07.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1918975  =      0.000 ...  7495.996 secs...
Extracting EDF parameters from D:\陈教授组\CHB-MIT\chb05\chb05_01.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 .

--0, --1, --2, --3, --4
  raw = mne.io.read_raw_edf(edf_files[0], preload=True)


Extracting EDF parameters from D:\陈教授组\CHB-MIT\chb15\chb15_02.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 921599  =      0.000 ...  3599.996 secs...


--0, --1, --2, --3, --4, --5
  raw = mne.io.read_raw_edf(edf_files[0], preload=True)


Extracting EDF parameters from D:\陈教授组\CHB-MIT\chb16\chb16_01.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 921599  =      0.000 ...  3599.996 secs...


--0, --1, --2, --3, --4
  raw = mne.io.read_raw_edf(edf_files[0], preload=True)


Extracting EDF parameters from D:\陈教授组\CHB-MIT\chb17\chb17a_03.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 921599  =      0.000 ...  3599.996 secs...


--0, --1, --2, --3, --4
  raw = mne.io.read_raw_edf(edf_files[0], preload=True)


Extracting EDF parameters from D:\陈教授组\CHB-MIT\chb18\chb18_02.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 921599  =      0.000 ...  3599.996 secs...
Extracting EDF parameters from D:\陈教授组\CHB-MIT\chb19\chb19_02.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 921599  =      0.000 ...  3599.996 secs...


--0, --1, --2, --3, --4
  raw = mne.io.read_raw_edf(edf_files[0], preload=True)


Extracting EDF parameters from D:\陈教授组\CHB-MIT\chb20\chb20_01.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 923135  =      0.000 ...  3605.996 secs...
Extracting EDF parameters from D:\陈教授组\CHB-MIT\chb21\chb21_01.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 921599  =      0.000 ...  3599.996 secs...


--0, --1, --2, --3, --4
  raw = mne.io.read_raw_edf(edf_files[0], preload=True)


Extracting EDF parameters from D:\陈教授组\CHB-MIT\chb22\chb22_01.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 921599  =      0.000 ...  3599.996 secs...
Extracting EDF parameters from D:\陈教授组\CHB-MIT\chb23\chb23_06.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 1916415  =      0.000 ...  7485.996 secs...
Extracting EDF parameters from D:\陈教授组\CHB-MIT\chb24\chb24_01.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 921599  =      0.000 ...  3599.996 secs...
所有病人共有的通道：
['C3-P3', 'C4-P4', 'CZ-PZ', 'F3-C3', 'F4-C4', 'F7-T7', 'F8-T8', 'FP1-F3', 'FP1-F7', 'FP2-F4', 'FP2-F8', 'FT10-T8', 'FT9-FT10', 'FZ-CZ', 'P3-O1', 'P4-O2', 'P7-O1', 'P7-T7', 'P8-O2', 'T7-FT9', 'T7-P7', 'T8-P8-0', 'T8-P8-1']


In [23]:
print(len(sorted(intersection)))

23


In [1]:
import mne
import numpy as np

file_path = r"D:\陈教授组\CHB-MIT\chb01\chb01_01.edf"
raw = mne.io.read_raw_edf(file_path, preload=True)
data = raw.get_data()
ch_names = raw.ch_names

print(f"原始通道数量: {len(ch_names)}")

# 检查是否有信号数据完全相同的通道
duplicates_to_remove = []
for i in range(len(ch_names)):
    for j in range(i + 1, len(ch_names)):
        if np.array_equal(data[i], data[j]):
            print(f"发现信号完全相同的通道: {ch_names[i]} 和 {ch_names[j]}")
            duplicates_to_remove.append(ch_names[j])  # 记录下标j的通道（第二个出现的）

# 去重，只保留第一个
unique_channels_to_keep = [ch for ch in ch_names if ch not in duplicates_to_remove]

if duplicates_to_remove:
    print(f"将移除以下重复通道: {duplicates_to_remove}")
    raw_clean = raw.copy().pick_channels(unique_channels_to_keep)
    print(f"最终通道数量: {len(raw_clean.ch_names)}")
    sig = raw_clean.get_data()
else:
    print("未发现信号完全重复的通道。")
    sig = data

print(f"最终数据形状: {sig.shape}")
'''
# 取两个通道的数据
sig0 = raw.get_data(picks=["T8-P8-0"])
sig1 = raw.get_data(picks=["T8-P8-1"])

# 判断是否完全相同
print(np.array_equal(sig0, sig1))   # True 表示完全相同
'''


Extracting EDF parameters from D:\陈教授组\CHB-MIT\chb01\chb01_01.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 921599  =      0.000 ...  3599.996 secs...


  raw = mne.io.read_raw_edf(file_path, preload=True)


原始通道数量: 23
发现信号完全相同的通道: T8-P8-0 和 T8-P8-1
将移除以下重复通道: ['T8-P8-1']
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
最终通道数量: 22
最终数据形状: (22, 921600)


'\n# 取两个通道的数据\nsig0 = raw.get_data(picks=["T8-P8-0"])\nsig1 = raw.get_data(picks=["T8-P8-1"])\n\n# 判断是否完全相同\nprint(np.array_equal(sig0, sig1))   # True 表示完全相同\n'