In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Conv2D, BatchNormalization, AveragePooling2D, Flatten, Dense, Input
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import numpy as np
import os


In [9]:
# ADHD와 Control 데이터 경로
adhd_epochs_path = r"C:\\Users\\dlwld\\Desktop\\comprehensive_design\\DataSet Files\\ADHD_epochs_modify"
control_epochs_path = r"C:\\Users\\dlwld\\Desktop\\comprehensive_design\\DataSet Files\\Control_epochs"

# 학습 데이터와 검증 데이터 생성 함수
def create_dataset(subject_epochs, subjects):
    X = []
    y = []
    for subject in subjects:
        epochs = subject_epochs[subject]
        for epoch_data, label in epochs:
            X.append(epoch_data)
            y.append(label)
    return np.array(X), np.array(y)

# 학습된 피험자 데이터가 검증 데이터로 들어가지 않도록 설정
def load_epochs_by_subject(folder_path, label):
    subject_epochs = {}
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".npy"):
            # 피험자 ID 추출 (ex : 'v1p_epoch_1.npy'에서 'v1p' 추출)
            subject_id = file_name.split('_')[0]
            epoch_data = np.load(os.path.join(folder_path, file_name))
            
            if subject_id not in subject_epochs:
                subject_epochs[subject_id] = []
            
            subject_epochs[subject_id].append((epoch_data, label))
    return subject_epochs

In [10]:
# ADHD와 Control 데이터 피험자별로 로드
adhd_subject_epochs = load_epochs_by_subject(adhd_epochs_path, 1)  # ADHD label = 1
control_subject_epochs = load_epochs_by_subject(control_epochs_path, 0)  # Control label = 0

# 피험자 ID 목록 생성
adhd_subjects = list(adhd_subject_epochs.keys())
control_subjects = list(control_subject_epochs.keys())

# 학습/검증 피험자 분리 (80% 학습, 20% 검증)
adhd_train_subjects, adhd_test_subjects = train_test_split(adhd_subjects, test_size=0.2, random_state=42)
control_train_subjects, control_test_subjects = train_test_split(control_subjects, test_size=0.2, random_state=42)

# 학습 및 검증 데이터 생성
X_train_adhd, y_train_adhd = create_dataset(adhd_subject_epochs, adhd_train_subjects)
X_test_adhd, y_test_adhd = create_dataset(adhd_subject_epochs, adhd_test_subjects)
X_train_control, y_train_control = create_dataset(control_subject_epochs, control_train_subjects)
X_test_control, y_test_control = create_dataset(control_subject_epochs, control_test_subjects)

# 학습 및 검증 데이터 결합
X_train = np.concatenate([X_train_adhd, X_train_control], axis=0)
y_train = np.concatenate([y_train_adhd, y_train_control], axis=0)
X_test = np.concatenate([X_test_adhd, X_test_control], axis=0)
y_test = np.concatenate([y_test_adhd, y_test_control], axis=0)

# 데이터 형태 재정의
X_train = X_train.reshape(X_train.shape[0], 19, 15360, 1)  # (에포크 수, 채널 수, 샘플 수, 필터 수)
X_test = X_test.reshape(X_test.shape[0], 19, 15360, 1)

# CNN 모델 정의 및 학습
model = Sequential()

# 첫 번째 공간 블록
model.add(Conv2D(16, (10, 1), activation='relu', input_shape=(19, 15360, 1), padding='valid'))  # Output Shape: (10, 15360, 16)
model.add(BatchNormalization())
model.add(AveragePooling2D(pool_size=(2, 1)))  # Output Shape: (5, 15360, 16)

# 두 번째 공간 블록
model.add(Conv2D(16, (4, 1), activation='relu', padding='valid'))  # Output Shape: (2, 15360, 16)
model.add(BatchNormalization())
model.add(AveragePooling2D(pool_size=(2, 1)))  # Output Shape: (1, 15360, 16)

# 첫 번째 시간 블록
model.add(Conv2D(32, (1, 128), activation='relu', padding='valid'))  # Output Shape: (1, 15233, 32)
model.add(BatchNormalization())
model.add(AveragePooling2D(pool_size=(1, 64)))  # Output Shape: (1, 238, 32)

# 두 번째 시간 블록
model.add(Conv2D(32, (1, 64), activation='relu', padding='valid'))  # Output Shape: (1, 175, 32)
model.add(BatchNormalization())
model.add(AveragePooling2D(pool_size=(1, 32)))  # Output Shape: (1, 5, 32)

# 특징 선택
model.add(Flatten())  # Flatten 레이어
model.add(Dense(64, activation='relu'))  # Dense 1 레이어
model.add(Dense(32, activation='relu'))  # Dense 2 레이어
model.add(Dense(1, activation='sigmoid'))  # 출력 레이어

# 모델 컴파일
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

# 모델 학습
model.fit(X_train, y_train, epochs=30, batch_size=16, validation_data=(X_test, y_test), verbose=1)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x233a0d72860>

In [11]:
# 특징맵 추출 및 평가 함수 정의
def evaluate_feature_map(model, layer_name, X_train, X_test, y_train, y_test):
    # 특정 레이어의 출력 추출을 위한 feature extractor 모델 정의
    feature_extractor = Model(inputs=model.input, outputs=model.get_layer(layer_name).output)
    
    # 학습 및 검증 데이터에서 특징 맵 추출
    train_features = feature_extractor.predict(X_train)
    test_features = feature_extractor.predict(X_test)

    # 데이터 스케일링
    scaler = StandardScaler()
    train_features = scaler.fit_transform(train_features)
    test_features = scaler.transform(test_features)

    # 분류기들 정의
    classifiers = {
        'NLSVM': SVC(kernel='rbf', gamma='scale'),
        'LR': LogisticRegression(),
        'RF': RandomForestClassifier(n_estimators=100),
        'GNB': GaussianNB(),
        'KNN': KNeighborsClassifier(n_neighbors=5)
    }

    # 분류기별 성능 평가 결과 저장
    results = {clf_name: [] for clf_name in classifiers.keys()}

    # 각 분류기에 대해 학습 및 평가
    for clf_name, clf in classifiers.items():
        clf.fit(train_features, y_train)
        y_pred = clf.predict(test_features)
        y_pred_probs = clf.decision_function(test_features) if hasattr(clf, "decision_function") else clf.predict_proba(test_features)[:, 1]
        
        # 평가 지표 계산
        acc = accuracy_score(y_test, y_pred)
        precision_adhd = precision_score(y_test, y_pred, pos_label=1)
        precision_control = precision_score(y_test, y_pred, pos_label=0)
        recall_adhd = recall_score(y_test, y_pred, pos_label=1)
        recall_control = recall_score(y_test, y_pred, pos_label=0)
        f1_adhd = f1_score(y_test, y_pred, pos_label=1)
        f1_control = f1_score(y_test, y_pred, pos_label=0)
        auc = roc_auc_score(y_test, y_pred_probs)
        
        # 결과 저장
        results[clf_name].append({
            'Accuracy': acc,
            'Precision_ADHD': precision_adhd,
            'Precision_Control': precision_control,
            'Recall_ADHD': recall_adhd,
            'Recall_Control': recall_control,
            'F1_ADHD': f1_adhd,
            'F1_Control': f1_control,
            'AUC': auc
        })

    # 각 분류기별로 평균 및 표준편차 계산
    for clf_name, metrics_list in results.items():
        print(f"\nResults for {clf_name} using {layer_name} feature map:")
        metrics = list(metrics_list[0].keys())
        for metric in metrics:
            metric_values = [m[metric] for m in metrics_list]
            mean = np.mean(metric_values)
            std = np.std(metric_values)
            print(f"{metric}: {mean:.2f} ± {std:.2f}")

In [13]:
# Flatten 레이어 성능 평가
evaluate_feature_map(model, 'flatten_1', X_train, X_test, y_train, y_test)


Results for NLSVM using flatten_1 feature map:
Accuracy: 0.76 ± 0.00
Precision_ADHD: 0.83 ± 0.00
Precision_Control: 0.70 ± 0.00
Recall_ADHD: 0.71 ± 0.00
Recall_Control: 0.82 ± 0.00
F1_ADHD: 0.76 ± 0.00
F1_Control: 0.76 ± 0.00
AUC: 0.82 ± 0.00

Results for LR using flatten_1 feature map:
Accuracy: 0.75 ± 0.00
Precision_ADHD: 0.82 ± 0.00
Precision_Control: 0.69 ± 0.00
Recall_ADHD: 0.69 ± 0.00
Recall_Control: 0.82 ± 0.00
F1_ADHD: 0.75 ± 0.00
F1_Control: 0.75 ± 0.00
AUC: 0.86 ± 0.00

Results for RF using flatten_1 feature map:
Accuracy: 0.72 ± 0.00
Precision_ADHD: 0.81 ± 0.00
Precision_Control: 0.65 ± 0.00
Recall_ADHD: 0.62 ± 0.00
Recall_Control: 0.82 ± 0.00
F1_ADHD: 0.71 ± 0.00
F1_Control: 0.73 ± 0.00
AUC: 0.85 ± 0.00

Results for GNB using flatten_1 feature map:
Accuracy: 0.81 ± 0.00
Precision_ADHD: 0.84 ± 0.00
Precision_Control: 0.77 ± 0.00
Recall_ADHD: 0.79 ± 0.00
Recall_Control: 0.82 ± 0.00
F1_ADHD: 0.82 ± 0.00
F1_Control: 0.80 ± 0.00
AUC: 0.85 ± 0.00

Results for KNN using flatten_1

In [15]:
# Dense1 레이어 성능 평가
evaluate_feature_map(model, 'dense_3', X_train, X_test, y_train, y_test)


Results for NLSVM using dense_3 feature map:
Accuracy: 0.72 ± 0.00
Precision_ADHD: 0.81 ± 0.00
Precision_Control: 0.65 ± 0.00
Recall_ADHD: 0.62 ± 0.00
Recall_Control: 0.82 ± 0.00
F1_ADHD: 0.71 ± 0.00
F1_Control: 0.73 ± 0.00
AUC: 0.82 ± 0.00

Results for LR using dense_3 feature map:
Accuracy: 0.72 ± 0.00
Precision_ADHD: 0.81 ± 0.00
Precision_Control: 0.65 ± 0.00
Recall_ADHD: 0.62 ± 0.00
Recall_Control: 0.82 ± 0.00
F1_ADHD: 0.71 ± 0.00
F1_Control: 0.73 ± 0.00
AUC: 0.84 ± 0.00

Results for RF using dense_3 feature map:
Accuracy: 0.74 ± 0.00
Precision_ADHD: 0.82 ± 0.00
Precision_Control: 0.67 ± 0.00
Recall_ADHD: 0.67 ± 0.00
Recall_Control: 0.82 ± 0.00
F1_ADHD: 0.74 ± 0.00
F1_Control: 0.74 ± 0.00
AUC: 0.81 ± 0.00

Results for GNB using dense_3 feature map:
Accuracy: 0.69 ± 0.00
Precision_ADHD: 0.80 ± 0.00
Precision_Control: 0.62 ± 0.00
Recall_ADHD: 0.58 ± 0.00
Recall_Control: 0.82 ± 0.00
F1_ADHD: 0.67 ± 0.00
F1_Control: 0.71 ± 0.00
AUC: 0.73 ± 0.00

Results for KNN using dense_3 feature m

In [16]:
# Dense2 레이어 성능 평가
evaluate_feature_map(model, 'dense_4', X_train, X_test, y_train, y_test)


Results for NLSVM using dense_4 feature map:
Accuracy: 0.74 ± 0.00
Precision_ADHD: 0.82 ± 0.00
Precision_Control: 0.67 ± 0.00
Recall_ADHD: 0.67 ± 0.00
Recall_Control: 0.82 ± 0.00
F1_ADHD: 0.74 ± 0.00
F1_Control: 0.74 ± 0.00
AUC: 0.82 ± 0.00

Results for LR using dense_4 feature map:
Accuracy: 0.74 ± 0.00
Precision_ADHD: 0.82 ± 0.00
Precision_Control: 0.67 ± 0.00
Recall_ADHD: 0.67 ± 0.00
Recall_Control: 0.82 ± 0.00
F1_ADHD: 0.74 ± 0.00
F1_Control: 0.74 ± 0.00
AUC: 0.84 ± 0.00

Results for RF using dense_4 feature map:
Accuracy: 0.76 ± 0.00
Precision_ADHD: 0.83 ± 0.00
Precision_Control: 0.70 ± 0.00
Recall_ADHD: 0.71 ± 0.00
Recall_Control: 0.82 ± 0.00
F1_ADHD: 0.76 ± 0.00
F1_Control: 0.76 ± 0.00
AUC: 0.82 ± 0.00

Results for GNB using dense_4 feature map:
Accuracy: 0.69 ± 0.00
Precision_ADHD: 0.82 ± 0.00
Precision_Control: 0.62 ± 0.00
Recall_ADHD: 0.56 ± 0.00
Recall_Control: 0.85 ± 0.00
F1_ADHD: 0.67 ± 0.00
F1_Control: 0.72 ± 0.00
AUC: 0.72 ± 0.00

Results for KNN using dense_4 feature m