In [10]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, BatchNormalization, AveragePooling2D, Flatten, Dense
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
import tensorflow as tf

In [11]:
# GPU 메모리 설정 코드 추가
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

1 Physical GPUs, 1 Logical GPUs


In [17]:
# ADHD와 Control 데이터 경로
adhd_epochs_path = r"C:\Users\dlwld\Desktop\comprehensive_design\DataSet Files\ADHD_epochs_modify"
control_epochs_path = r"C:\Users\dlwld\Desktop\comprehensive_design\DataSet Files\Control_epochs"

# 학습된 피험자 데이터가 검증 데이터로 들어가지 않도록 설정
def load_epochs_by_subject(folder_path, label):
    subject_epochs = {}
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".npy"):
            # 피험자 ID 추출 (ex : 'v1p_epoch_1.npy'에서 'v1p' 추출)
            subject_id = file_name.split('_')[0]
            epoch_data = np.load(os.path.join(folder_path, file_name))
            
            if subject_id not in subject_epochs:
                subject_epochs[subject_id] = []
            
            subject_epochs[subject_id].append((epoch_data, label))
    return subject_epochs

# ADHD와 Control 데이터 피험자별로 로드
adhd_subject_epochs = load_epochs_by_subject(adhd_epochs_path, 1)  # ADHD label = 1
control_subject_epochs = load_epochs_by_subject(control_epochs_path, 0)  # Control label = 0

# 피험자 ID 목록 생성
adhd_subjects = list(adhd_subject_epochs.keys())
control_subjects = list(control_subject_epochs.keys())

# 학습/검증 피험자 분리 (80% 학습, 20% 검증)
adhd_train_subjects, adhd_test_subjects = train_test_split(adhd_subjects, test_size=0.2, random_state=42)
control_train_subjects, control_test_subjects = train_test_split(control_subjects, test_size=0.2, random_state=42)

# 학습 데이터와 검증 데이터 생성 함수
def create_dataset(subject_epochs, subjects):
    X = []
    y = []
    for subject in subjects:
        epochs = subject_epochs[subject]
        for epoch_data, label in epochs:
            X.append(epoch_data)
            y.append(label)
    return np.array(X), np.array(y)

# 학습 및 검증 데이터 생성
X_train_adhd, y_train_adhd = create_dataset(adhd_subject_epochs, adhd_train_subjects)
X_test_adhd, y_test_adhd = create_dataset(adhd_subject_epochs, adhd_test_subjects)
X_train_control, y_train_control = create_dataset(control_subject_epochs, control_train_subjects)
X_test_control, y_test_control = create_dataset(control_subject_epochs, control_test_subjects)

# 학습 및 검증 데이터 결합
X_train = np.concatenate([X_train_adhd, X_train_control], axis=0)
y_train = np.concatenate([y_train_adhd, y_train_control], axis=0)
X_test = np.concatenate([X_test_adhd, X_test_control], axis=0)
y_test = np.concatenate([y_test_adhd, y_test_control], axis=0)

# 데이터 형태 재정의
X_train = X_train.reshape(X_train.shape[0], 19, 15360, 1)  # (에포크 수, 채널 수, 샘플 수, 필터 수)
X_test = X_test.reshape(X_test.shape[0], 19, 15360, 1)

# 데이터셋 크기 출력
print(f"Training set size: {X_train.shape[0]} epochs, Labels: {y_train.shape[0]}")
print(f"Validation set size: {X_test.shape[0]} epochs, Labels: {y_test.shape[0]}")

# ADHD와 Control 각각의 학습 및 검증 데이터 크기
print(f"ADHD Training set size: {X_train_adhd.shape[0]} epochs")
print(f"Control Training set size: {X_train_control.shape[0]} epochs")
print(f"ADHD Validation set size: {X_test_adhd.shape[0]} epochs")
print(f"Control Validation set size: {X_test_control.shape[0]} epochs")

Training set size: 352 epochs, Labels: 352
Validation set size: 88 epochs, Labels: 88
ADHD Training set size: 172 epochs
Control Training set size: 180 epochs
ADHD Validation set size: 48 epochs
Control Validation set size: 40 epochs


In [18]:
# 평가 지표 저장 리스트
train_accuracy_list = []
accuracy_list = []
precision_adhd_list = []
precision_control_list = []
recall_adhd_list = []
recall_control_list = []
f1_adhd_list = []
f1_control_list = []
auc_list = []

# 반복 학습 횟수
num_repeats = 10

# CNN 모델 학습 및 평가 반복
for i in range(num_repeats):
    print(f"Training iteration {i + 1}/{num_repeats}")
    
    # CNN 모델 정의 및 학습
    model = Sequential()

    # 첫 번째 공간 블록
    model.add(Conv2D(16, (10, 1), activation='relu', input_shape=(19, 15360, 1), padding='valid'))  # Output Shape: (10, 15360, 16)
    model.add(BatchNormalization())
    model.add(AveragePooling2D(pool_size=(2, 1)))  # Output Shape: (5, 15360, 16)

    # 두 번째 공간 블록
    model.add(Conv2D(16, (4, 1), activation='relu', padding='valid'))  # Output Shape: (2, 15360, 16)
    model.add(BatchNormalization())
    model.add(AveragePooling2D(pool_size=(2, 1)))  # Output Shape: (1, 15360, 16)

    # 첫 번째 시간 블록
    model.add(Conv2D(32, (1, 128), activation='relu', padding='valid'))  # Output Shape: (1, 15233, 32)
    model.add(BatchNormalization())
    model.add(AveragePooling2D(pool_size=(1, 64)))  # Output Shape: (1, 238, 32)

    # 두 번째 시간 블록
    model.add(Conv2D(32, (1, 64), activation='relu', padding='valid'))  # Output Shape: (1, 175, 32)
    model.add(BatchNormalization())
    model.add(AveragePooling2D(pool_size=(1, 32)))  # Output Shape: (1, 5, 32)

    # 특징 선택
    model.add(Flatten())  # Flatten 레이어
    model.add(Dense(64, activation='relu'))  # Dense 1 레이어
    model.add(Dense(32, activation='relu'))  # Dense 2 레이어
    model.add(Dense(1, activation='sigmoid'))  # 출력 레이어

    # 모델 컴파일 및 학습
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)

    # 모델 학습
    model.fit(X_train, y_train, epochs=30, batch_size=16, validation_data=(X_test, y_test), callbacks=[early_stopping], verbose=0)

    # 모델 예측 - 훈련 데이터에 대한 예측
    y_train_pred_probs = model.predict(X_train).flatten()
    y_train_pred = (y_train_pred_probs > 0.5).astype(int)
    
    # 훈련 데이터 성능 평가
    train_acc = accuracy_score(y_train, y_train_pred)
    train_accuracy_list.append(train_acc)

    # 모델 예측 - test data
    y_pred_probs = model.predict(X_test).flatten()
    y_pred = (y_pred_probs > 0.5).astype(int)

    # Accuracy
    acc = accuracy_score(y_test, y_pred)
    accuracy_list.append(acc)

    # Precision
    precision_adhd = precision_score(y_test, y_pred, pos_label=1)
    precision_control = precision_score(y_test, y_pred, pos_label=0)
    precision_adhd_list.append(precision_adhd)
    precision_control_list.append(precision_control)

    # Recall
    recall_adhd = recall_score(y_test, y_pred, pos_label=1)
    recall_control = recall_score(y_test, y_pred, pos_label=0)
    recall_adhd_list.append(recall_adhd)
    recall_control_list.append(recall_control)

    # F1-score
    f1_adhd = f1_score(y_test, y_pred, pos_label=1)
    f1_control = f1_score(y_test, y_pred, pos_label=0)
    f1_adhd_list.append(f1_adhd)
    f1_control_list.append(f1_control)

    # AUC
    auc = roc_auc_score(y_test, y_pred_probs)
    auc_list.append(auc)

# 평균 및 표준 편차 계산
train_accuracy_mean = np.mean(train_accuracy_list)
train_accuracy_std = np.std(train_accuracy_list)

accuracy_mean = np.mean(accuracy_list)
accuracy_std = np.std(accuracy_list)

precision_adhd_mean = np.mean(precision_adhd_list)
precision_adhd_std = np.std(precision_adhd_list)

precision_control_mean = np.mean(precision_control_list)
precision_control_std = np.std(precision_control_list)

recall_adhd_mean = np.mean(recall_adhd_list)
recall_adhd_std = np.std(recall_adhd_list)

recall_control_mean = np.mean(recall_control_list)
recall_control_std = np.std(recall_control_list)

f1_adhd_mean = np.mean(f1_adhd_list)
f1_adhd_std = np.std(f1_adhd_list)

f1_control_mean = np.mean(f1_control_list)
f1_control_std = np.std(f1_control_list)

auc_mean = np.mean(auc_list)
auc_std = np.std(auc_list)

# 결과 출력
print(f"\nCNN Classification Results Over {num_repeats} Repeated Training Times")
print(f"Train Accuracy(%): {train_accuracy_mean*100:.2f} ± {train_accuracy_std*100:.2f}")
print(f"Accuracy(%):: {accuracy_mean*100:.2f} ± {accuracy_std*100:.2f}")
print(f"Precision ADHD: {precision_adhd_mean:.2f} ± {precision_adhd_std:.2f}")
print(f"Precision Control: {precision_control_mean:.2f} ± {precision_control_std:.2f}")
print(f"Recall ADHD: {recall_adhd_mean:.2f} ± {recall_adhd_std:.2f}")
print(f"Recall Control: {recall_control_mean:.2f} ± {recall_control_std:.2f}")
print(f"F1-score ADHD: {f1_adhd_mean:.2f} ± {f1_adhd_std:.2f}")
print(f"F1-score Control: {f1_control_mean:.2f} ± {f1_control_std:.2f}")
print(f"AUC: {auc_mean:.2f} ± {auc_std:.2f}")

Training iteration 1/10
Training iteration 2/10
Training iteration 3/10
Training iteration 4/10
Training iteration 5/10
Training iteration 6/10
Training iteration 7/10
Training iteration 8/10
Training iteration 9/10
Training iteration 10/10

CNN Classification Results Over 10 Repeated Training Times
Train Accuracy: 0.99 ± 0.02
Accuracy: 0.79 ± 0.04
Precision ADHD: 0.88 ± 0.06
Precision Control: 0.72 ± 0.05
Recall ADHD: 0.71 ± 0.08
Recall Control: 0.88 ± 0.07
F1-score ADHD: 0.78 ± 0.05
F1-score Control: 0.79 ± 0.03
AUC: 0.90 ± 0.03


In [23]:
# 반복 횟수 (리스트의 길이)
num_repeats = len(accuracy_list)

# 각 반복의 결과 출력
for i in range(num_repeats):
    print(f"--- 반복 {i + 1} ---")
    print(f"Train Accuracy: {train_accuracy_list[i]*100:.2f}")
    print(f"Test Accuracy: {accuracy_list[i]*100:.2f}")
    print(f"Precision ADHD: {precision_adhd_list[i]:.4f}")
    print(f"Precision Control: {precision_control_list[i]:.4f}")
    print(f"Recall ADHD: {recall_adhd_list[i]:.4f}")
    print(f"Recall Control: {recall_control_list[i]:.4f}")
    print(f"F1-score ADHD: {f1_adhd_list[i]:.4f}")
    print(f"F1-score Control: {f1_control_list[i]:.4f}")
    print(f"AUC: {auc_list[i]:.4f}")
    print()

--- 반복 1 ---
Train Accuracy: 100.00
Test Accuracy: 77.27
Precision ADHD: 0.8333
Precision Control: 0.7174
Recall ADHD: 0.7292
Recall Control: 0.8250
F1-score ADHD: 0.7778
F1-score Control: 0.7674
AUC: 0.8745

--- 반복 2 ---
Train Accuracy: 100.00
Test Accuracy: 76.14
Precision ADHD: 0.8857
Precision Control: 0.6792
Recall ADHD: 0.6458
Recall Control: 0.9000
F1-score ADHD: 0.7470
F1-score Control: 0.7742
AUC: 0.9089

--- 반복 3 ---
Train Accuracy: 91.76
Test Accuracy: 73.86
Precision ADHD: 1.0000
Precision Control: 0.6349
Recall ADHD: 0.5208
Recall Control: 1.0000
F1-score ADHD: 0.6849
F1-score Control: 0.7767
AUC: 0.9260

--- 반복 4 ---
Train Accuracy: 100.00
Test Accuracy: 76.14
Precision ADHD: 0.7755
Precision Control: 0.7436
Recall ADHD: 0.7917
Recall Control: 0.7250
F1-score ADHD: 0.7835
F1-score Control: 0.7342
AUC: 0.8547

--- 반복 5 ---
Train Accuracy: 99.72
Test Accuracy: 81.82
Precision ADHD: 0.9444
Precision Control: 0.7308
Recall ADHD: 0.7083
Recall Control: 0.9500
F1-score ADHD: 0.