In [1]:
import pandas as pd
import optuna
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.metrics import (
    roc_curve, roc_auc_score, accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report, precision_recall_curve, auc
)
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import random

warnings.filterwarnings('ignore')

In [2]:
# 데이터 불러오기

per = 1

train_data = pd.read_csv("/home/watercar99/project/MIMIC-III/Data Extract/Tabular Data/FINAL Data Extract/DATA/train_test_data/Final_train_data("+str(per)+"%)_down.csv")
test_data = pd.read_csv("/home/watercar99/project/MIMIC-III/Data Extract/Tabular Data/FINAL Data Extract/DATA/train_test_data/Final_test_data("+str(per)+"%)_down.csv")

train_data.drop(["SUBJECT_ID","HADM_ID", "DOA", "ETHNICITY", "TLOS", "LOS"], axis=1, inplace=True)
test_data.drop(["SUBJECT_ID","HADM_ID", "DOA",  "ETHNICITY", "TLOS", "LOS"], axis=1, inplace=True)

# train 데이터의 모드(Mode) 값 및 중앙값(Median) 계산
mode_values = train_data[['GENDER', 'Ventilator', 'Anisocytosis', 'Macrocytes', 'Poikilocytosis', 'Bacteria', 'Bilirubin', 'Urine Appearance', 'Urine Color']].mode().iloc[0]
median_values = train_data.drop(columns=['GENDER', 'Ventilator', 'Anisocytosis', 'Macrocytes', 'Poikilocytosis', 'Bacteria', 'Bilirubin', 'Urine Appearance', 'Urine Color']).median()

# train 데이터와 test 데이터의 결측치를 채움
train_data[['GENDER', 'Ventilator', 'Anisocytosis', 'Macrocytes', 'Poikilocytosis', 'Bacteria', 'Bilirubin', 'Urine Appearance', 'Urine Color']] = train_data[['GENDER', 'Ventilator', 'Anisocytosis', 'Macrocytes', 'Poikilocytosis', 'Bacteria', 'Bilirubin', 'Urine Appearance', 'Urine Color']].fillna(mode_values)
test_data[['GENDER', 'Ventilator', 'Anisocytosis', 'Macrocytes', 'Poikilocytosis', 'Bacteria', 'Bilirubin', 'Urine Appearance', 'Urine Color']] = test_data[['GENDER', 'Ventilator', 'Anisocytosis', 'Macrocytes', 'Poikilocytosis', 'Bacteria', 'Bilirubin', 'Urine Appearance', 'Urine Color']].fillna(mode_values)

# 나머지 칼럼들의 결측치를 중앙값으로 채움
train_data = train_data.fillna(median_values)
test_data = test_data.fillna(median_values)

                
train_data = pd.get_dummies(train_data, columns=['GENDER', 'Ventilator', 'Anisocytosis', 'Macrocytes', 'Poikilocytosis', 'Bacteria', 'Bilirubin', 'Urine Appearance', 'Urine Color'])
test_data = pd.get_dummies(test_data, columns=['GENDER', 'Ventilator', 'Anisocytosis', 'Macrocytes', 'Poikilocytosis', 'Bacteria', 'Bilirubin', 'Urine Appearance', 'Urine Color'])

# 결측치가 있는 칼럼을 확인하고 제거합니다.
train_data = train_data.dropna(axis=1)
test_data = test_data.dropna(axis=1)

In [3]:
# train_data와 test_data의 칼럼을 맞추기 위해 차집합을 계산
missing_columns_in_test = set(train_data.columns) - set(test_data.columns)
missing_columns_in_train = set(test_data.columns) - set(train_data.columns)

# test_data에 train_data의 더미 변수를 추가하고 0으로 채워주기
for col in missing_columns_in_test:
    test_data[col] = 0

# train_data에 test_data의 더미 변수를 추가하고 0으로 채워주기
for col in missing_columns_in_train:
    train_data[col] = 0

# Features와 Target 설정
X_train = train_data.drop('y', axis=1)
y_train = train_data['y']
X_test = test_data.drop('y', axis=1)

print("train shape\n",X_train.shape,"\n")
print("train shape\n",X_test.shape,"\n")

target = "y"
features = [f for f in train_data.columns if f not in [target]]

print("train value\n",train_data['y'].value_counts())
print("test value\n",test_data['y'].value_counts())

train shape
 (978, 98) 

train shape
 (457, 98) 

train value
 1    489
0    489
Name: y, dtype: int64
test value
 0    341
1    116
Name: y, dtype: int64


In [4]:
def objective(trial):
    c = trial.suggest_float("c", 1e-8, 10.0, log=True)
    kernel = trial.suggest_categorical("kernel", ["poly"])
    
    model = SVC(C=c, kernel=kernel, probability=True, decision_function_shape="ovo", random_state=42)
    
    # Cross Validation을 통한 평가 지표 계산
    scores = cross_val_score(model, X_train, y_train, cv=3, scoring='f1')
    return scores.mean()

In [5]:
accuracy_list = []
precision_list = []
recall_list = []
f1_list = []
auroc_list = []
y_pred_list = []

for _ in range(10):
    # Optuna 스터디 설정
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=30, gc_after_trial=True, n_jobs=-1)

    # 최적 하이퍼파라미터 확인
    best_trial = study.best_trial
    best_c = best_trial.params["c"]
    best_kernel = best_trial.params["kernel"]

    # 최적 모델 학습 및 평가
    best_model = SVC(C=best_c, kernel=best_kernel, random_state=42 , probability=True)
    best_model.fit(X_train, y_train)
    
    # Test 데이터로 모델 평가 및 예측
    y_pred = best_model.predict(X_test)
    y_score = best_model.predict_proba(X_test)
    accuracy = accuracy_score(test_data['y'], y_pred)
    precision = precision_score(test_data['y'], y_pred)
    recall = recall_score(test_data['y'], y_pred)
    f1 = f1_score(test_data['y'], y_pred)
    auroc = roc_auc_score(test_data['y'], y_score[:, 1])

    rounded_accuracy = round(accuracy * 100, 2)
    rounded_precision = round(precision * 100, 2)
    rounded_recall = round(recall * 100, 2)
    rounded_f1 = round(f1 * 100, 2)
    rounded_auroc = round(auroc * 100, 2)

    # 결과 및 예측값을 리스트에 추가
    accuracy_list.append(rounded_accuracy)
    precision_list.append(rounded_precision)
    recall_list.append(rounded_recall)
    f1_list.append(rounded_f1)
    auroc_list.append(rounded_auroc)
    y_pred_list.append(y_pred)

    print(f"Iteration {_ + 1} Results:")
    print("Best Trial Parameters:")
    print("c:", best_c)
    print("kernel:", best_kernel)
    print("Test Accuracy:", rounded_accuracy)
    print("Precision:", rounded_precision)
    print("Recall:", rounded_recall)
    print("F1-score:", rounded_f1)
    print("AUROC:", rounded_auroc)
    print("")

# 결과 출력
print("Mean Accuracy:", sum(accuracy_list) / len(accuracy_list))
print("Mean Precision:", sum(precision_list) / len(precision_list))
print("Mean Recall:", sum(recall_list) / len(recall_list))
print("Mean F1-score:", sum(f1_list) / len(f1_list))
print("Mean AUROC:", sum(auroc_list) / len(auroc_list))

# 예측값 리스트 출력
for i, y_pred in enumerate(y_pred_list):
    print(f"Iteration {i + 1} Predictions:")
    print(y_pred)


[32m[I 2023-10-07 02:05:11,865][0m A new study created in memory with name: no-name-f430af26-a4d4-4ffd-a0f0-2d5072dd2391[0m
[32m[I 2023-10-07 02:05:13,760][0m Trial 4 finished with value: 0.4382904592372627 and parameters: {'c': 6.220042632397288e-06, 'kernel': 'poly'}. Best is trial 4 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:05:14,172][0m Trial 1 finished with value: 0.4382904592372627 and parameters: {'c': 1.984894617589791e-08, 'kernel': 'poly'}. Best is trial 4 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:05:14,374][0m Trial 26 finished with value: 0.4331137207335333 and parameters: {'c': 0.012387789727913517, 'kernel': 'poly'}. Best is trial 4 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:05:14,520][0m Trial 3 finished with value: 0.4382904592372627 and parameters: {'c': 4.1090035430570067e-07, 'kernel': 'poly'}. Best is trial 4 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:05:14,528][0m Trial 9 finished with value: 0.

Iteration 1 Results:
Best Trial Parameters:
c: 2.900070621981807
kernel: poly
Test Accuracy: 64.11
Precision: 36.21
Recall: 54.31
F1-score: 43.45
AUROC: 65.88



[32m[I 2023-10-07 02:05:21,606][0m Trial 25 finished with value: 0.4382904592372627 and parameters: {'c': 4.3425811005789847e-05, 'kernel': 'poly'}. Best is trial 25 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:05:21,864][0m Trial 1 finished with value: 0.5141330044197759 and parameters: {'c': 0.5391915602147472, 'kernel': 'poly'}. Best is trial 1 with value: 0.5141330044197759.[0m
[32m[I 2023-10-07 02:05:22,130][0m Trial 19 finished with value: 0.4354781749032392 and parameters: {'c': 0.0032533707577510796, 'kernel': 'poly'}. Best is trial 1 with value: 0.5141330044197759.[0m
[32m[I 2023-10-07 02:05:22,147][0m Trial 24 finished with value: 0.5055569387244646 and parameters: {'c': 0.17760881105300774, 'kernel': 'poly'}. Best is trial 1 with value: 0.5141330044197759.[0m
[32m[I 2023-10-07 02:05:22,303][0m Trial 23 finished with value: 0.4382904592372627 and parameters: {'c': 7.566246866844753e-07, 'kernel': 'poly'}. Best is trial 1 with value: 0.5141330044197759

Iteration 2 Results:
Best Trial Parameters:
c: 3.465761828374132
kernel: poly
Test Accuracy: 64.55
Precision: 36.78
Recall: 55.17
F1-score: 44.14
AUROC: 66.61



[32m[I 2023-10-07 02:05:29,687][0m Trial 1 finished with value: 0.4057845011562539 and parameters: {'c': 0.040464881492539635, 'kernel': 'poly'}. Best is trial 1 with value: 0.4057845011562539.[0m
[32m[I 2023-10-07 02:05:29,945][0m Trial 0 finished with value: 0.4320412634154802 and parameters: {'c': 0.007972573934118028, 'kernel': 'poly'}. Best is trial 0 with value: 0.4320412634154802.[0m
[32m[I 2023-10-07 02:05:29,949][0m Trial 15 finished with value: 0.4382904592372627 and parameters: {'c': 3.93582968353685e-06, 'kernel': 'poly'}. Best is trial 15 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:05:30,087][0m Trial 19 finished with value: 0.5892366959885208 and parameters: {'c': 5.780119020516111, 'kernel': 'poly'}. Best is trial 19 with value: 0.5892366959885208.[0m
[32m[I 2023-10-07 02:05:30,097][0m Trial 7 finished with value: 0.4324880829004083 and parameters: {'c': 0.00850553980209529, 'kernel': 'poly'}. Best is trial 19 with value: 0.5892366959885208.[0m

Iteration 3 Results:
Best Trial Parameters:
c: 5.780119020516111
kernel: poly
Test Accuracy: 64.77
Precision: 36.84
Recall: 54.31
F1-score: 43.9
AUROC: 66.88



[32m[I 2023-10-07 02:05:37,842][0m Trial 18 finished with value: 0.39869469673391245 and parameters: {'c': 0.043480321871736165, 'kernel': 'poly'}. Best is trial 18 with value: 0.39869469673391245.[0m
[32m[I 2023-10-07 02:05:38,247][0m Trial 1 finished with value: 0.4382904592372627 and parameters: {'c': 6.764224524395913e-08, 'kernel': 'poly'}. Best is trial 1 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:05:38,465][0m Trial 12 finished with value: 0.4382904592372627 and parameters: {'c': 3.35322046476786e-08, 'kernel': 'poly'}. Best is trial 1 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:05:38,481][0m Trial 17 finished with value: 0.4382904592372627 and parameters: {'c': 1.8790634036692887e-05, 'kernel': 'poly'}. Best is trial 1 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:05:38,989][0m Trial 0 finished with value: 0.4382904592372627 and parameters: {'c': 1.5387436528854848e-08, 'kernel': 'poly'}. Best is trial 1 with value: 0.43829045923

Iteration 4 Results:
Best Trial Parameters:
c: 6.273339756702253
kernel: poly
Test Accuracy: 64.99
Precision: 37.06
Recall: 54.31
F1-score: 44.06
AUROC: 66.88



[32m[I 2023-10-07 02:05:46,130][0m Trial 23 finished with value: 0.4382904592372627 and parameters: {'c': 8.461442864820761e-05, 'kernel': 'poly'}. Best is trial 23 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:05:46,618][0m Trial 0 finished with value: 0.4382904592372627 and parameters: {'c': 1.7583956954023713e-06, 'kernel': 'poly'}. Best is trial 23 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:05:46,626][0m Trial 3 finished with value: 0.5803708160257681 and parameters: {'c': 4.653495220213619, 'kernel': 'poly'}. Best is trial 3 with value: 0.5803708160257681.[0m
[32m[I 2023-10-07 02:05:46,774][0m Trial 15 finished with value: 0.3886566796783515 and parameters: {'c': 0.0503638933657618, 'kernel': 'poly'}. Best is trial 3 with value: 0.5803708160257681.[0m
[32m[I 2023-10-07 02:05:46,791][0m Trial 2 finished with value: 0.4382904592372627 and parameters: {'c': 5.944325249149704e-07, 'kernel': 'poly'}. Best is trial 3 with value: 0.5803708160257681.[

Iteration 5 Results:
Best Trial Parameters:
c: 4.653495220213619
kernel: poly
Test Accuracy: 65.43
Precision: 37.5
Recall: 54.31
F1-score: 44.37
AUROC: 66.81



[32m[I 2023-10-07 02:05:54,290][0m Trial 6 finished with value: 0.4382904592372627 and parameters: {'c': 4.176135154610138e-07, 'kernel': 'poly'}. Best is trial 6 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:05:54,574][0m Trial 29 finished with value: 0.43542576400177163 and parameters: {'c': 0.004141695266118961, 'kernel': 'poly'}. Best is trial 6 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:05:55,090][0m Trial 0 finished with value: 0.5127524330406651 and parameters: {'c': 0.2906100492525435, 'kernel': 'poly'}. Best is trial 0 with value: 0.5127524330406651.[0m
[32m[I 2023-10-07 02:05:55,464][0m Trial 21 finished with value: 0.5455060946586371 and parameters: {'c': 1.4213585152078299, 'kernel': 'poly'}. Best is trial 21 with value: 0.5455060946586371.[0m
[32m[I 2023-10-07 02:05:55,678][0m Trial 15 finished with value: 0.4382904592372627 and parameters: {'c': 1.5443480143399468e-07, 'kernel': 'poly'}. Best is trial 21 with value: 0.5455060946586371.

Iteration 6 Results:
Best Trial Parameters:
c: 3.449796440260045
kernel: poly
Test Accuracy: 64.55
Precision: 36.78
Recall: 55.17
F1-score: 44.14
AUROC: 66.61



[32m[I 2023-10-07 02:06:00,761][0m Trial 3 finished with value: 0.4382904592372627 and parameters: {'c': 1.6780243990988597e-06, 'kernel': 'poly'}. Best is trial 3 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:06:01,006][0m Trial 5 finished with value: 0.4382904592372627 and parameters: {'c': 0.002107553722581183, 'kernel': 'poly'}. Best is trial 3 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:06:01,015][0m Trial 27 finished with value: 0.4382904592372627 and parameters: {'c': 2.0837135551724383e-05, 'kernel': 'poly'}. Best is trial 3 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:06:01,023][0m Trial 1 finished with value: 0.4140934364830615 and parameters: {'c': 0.0353464676655069, 'kernel': 'poly'}. Best is trial 3 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:06:01,028][0m Trial 4 finished with value: 0.5108917085779257 and parameters: {'c': 0.20981771094230575, 'kernel': 'poly'}. Best is trial 4 with value: 0.5108917085779257.[0

Iteration 7 Results:
Best Trial Parameters:
c: 8.812805891884018
kernel: poly
Test Accuracy: 65.43
Precision: 37.79
Recall: 56.03
F1-score: 45.14
AUROC: 67.17



[32m[I 2023-10-07 02:06:05,261][0m Trial 2 finished with value: 0.43542576400177163 and parameters: {'c': 0.003998721131865386, 'kernel': 'poly'}. Best is trial 2 with value: 0.43542576400177163.[0m
[32m[I 2023-10-07 02:06:05,517][0m Trial 13 finished with value: 0.4132989978365987 and parameters: {'c': 0.09013791052506069, 'kernel': 'poly'}. Best is trial 2 with value: 0.43542576400177163.[0m
[32m[I 2023-10-07 02:06:05,519][0m Trial 18 finished with value: 0.5438866226716473 and parameters: {'c': 1.5774239828535075, 'kernel': 'poly'}. Best is trial 18 with value: 0.5438866226716473.[0m
[32m[I 2023-10-07 02:06:05,522][0m Trial 1 finished with value: 0.4382904592372627 and parameters: {'c': 0.0001055734528774339, 'kernel': 'poly'}. Best is trial 18 with value: 0.5438866226716473.[0m
[32m[I 2023-10-07 02:06:05,636][0m Trial 26 finished with value: 0.4382904592372627 and parameters: {'c': 9.573933292161499e-08, 'kernel': 'poly'}. Best is trial 18 with value: 0.54388662267164

Iteration 8 Results:
Best Trial Parameters:
c: 8.246723147835368
kernel: poly
Test Accuracy: 65.65
Precision: 38.15
Recall: 56.9
F1-score: 45.67
AUROC: 67.18



[32m[I 2023-10-07 02:06:09,917][0m Trial 11 finished with value: 0.5317300274219544 and parameters: {'c': 0.9971835310399134, 'kernel': 'poly'}. Best is trial 11 with value: 0.5317300274219544.[0m
[32m[I 2023-10-07 02:06:10,198][0m Trial 1 finished with value: 0.4382904592372627 and parameters: {'c': 3.79709921464914e-07, 'kernel': 'poly'}. Best is trial 11 with value: 0.5317300274219544.[0m
[32m[I 2023-10-07 02:06:10,199][0m Trial 7 finished with value: 0.4382904592372627 and parameters: {'c': 1.5084096559840823e-05, 'kernel': 'poly'}. Best is trial 11 with value: 0.5317300274219544.[0m
[32m[I 2023-10-07 02:06:10,304][0m Trial 22 finished with value: 0.4382904592372627 and parameters: {'c': 4.270400715740467e-05, 'kernel': 'poly'}. Best is trial 11 with value: 0.5317300274219544.[0m
[32m[I 2023-10-07 02:06:10,312][0m Trial 19 finished with value: 0.4382904592372627 and parameters: {'c': 0.000769261706333807, 'kernel': 'poly'}. Best is trial 11 with value: 0.5317300274219

Iteration 9 Results:
Best Trial Parameters:
c: 8.05521112929954
kernel: poly
Test Accuracy: 65.43
Precision: 37.93
Recall: 56.9
F1-score: 45.52
AUROC: 67.18



[32m[I 2023-10-07 02:06:14,405][0m Trial 5 finished with value: 0.4382904592372627 and parameters: {'c': 1.4981427549654625e-05, 'kernel': 'poly'}. Best is trial 5 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:06:14,655][0m Trial 6 finished with value: 0.4382904592372627 and parameters: {'c': 1.1426595283209574e-05, 'kernel': 'poly'}. Best is trial 5 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:06:14,771][0m Trial 1 finished with value: 0.4382904592372627 and parameters: {'c': 0.0003371401660670085, 'kernel': 'poly'}. Best is trial 5 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:06:14,775][0m Trial 7 finished with value: 0.4382904592372627 and parameters: {'c': 8.476213944286455e-08, 'kernel': 'poly'}. Best is trial 5 with value: 0.4382904592372627.[0m
[32m[I 2023-10-07 02:06:14,974][0m Trial 8 finished with value: 0.4382904592372627 and parameters: {'c': 7.788645997472902e-08, 'kernel': 'poly'}. Best is trial 5 with value: 0.438290459237262

Iteration 10 Results:
Best Trial Parameters:
c: 6.964062217703718
kernel: poly
Test Accuracy: 65.21
Precision: 37.43
Recall: 55.17
F1-score: 44.6
AUROC: 66.99

Mean Accuracy: 65.01200000000001
Mean Precision: 37.247
Mean Recall: 55.257999999999996
Mean F1-score: 44.499
Mean AUROC: 66.819
Iteration 1 Predictions:
[1 0 0 1 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0
 0 0 0 0 1 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 1 0 0 0 0 0 1 0 0 1 1 0 1 0 0 1 1
 1 1 0 1 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 1 0 1 1 1
 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 1 1 1 1 0 0 1 0 1 0 0
 0 1 0 0 1 1 1 0 1 1 1 0 0 0 0 1 0 0 1 0 0 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1
 0 1 0 1 1 0 0 0 0 1 1 0 1 0 1 1 0 0 1 0 1 0 1 1 1 0 0 1 0 1 1 1 0 0 0 0 1
 1 0 0 1 0 0 1 0 0 0 0 1 1 1 1 0 0 0 1 1 1 1 0 0 0 0 1 0 1 1 0 1 0 1 0 0 0
 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 1 1 0 1 0 1 0 1 0 0 0 0 1 0 0 1 1
 0 1 0 1 0 0 1 1 0 0 0 0 1 0 1 0 0 0 1 0 0 1 1 1 1 1 0 0 0 1 0 1 0 1 0 0 1
 0 0 0 0 1 

In [6]:
data = {
    'Accuracy': accuracy_list,
    'Precision': precision_list,
    'Recall': recall_list,
    'F1 Score': f1_list,
    'AUROC':auroc_list,
    'Predictions': y_pred_list
    
}

# 딕셔너리를 DataFrame으로 변환
df = pd.DataFrame(data)
df

Unnamed: 0,Accuracy,Precision,Recall,F1 Score,AUROC,Predictions
0,64.11,36.21,54.31,43.45,65.88,"[1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ..."
1,64.55,36.78,55.17,44.14,66.61,"[1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ..."
2,64.77,36.84,54.31,43.9,66.88,"[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ..."
3,64.99,37.06,54.31,44.06,66.88,"[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ..."
4,65.43,37.5,54.31,44.37,66.81,"[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ..."
5,64.55,36.78,55.17,44.14,66.61,"[1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ..."
6,65.43,37.79,56.03,45.14,67.17,"[1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ..."
7,65.65,38.15,56.9,45.67,67.18,"[1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ..."
8,65.43,37.93,56.9,45.52,67.18,"[1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ..."
9,65.21,37.43,55.17,44.6,66.99,"[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ..."


In [7]:
df.to_csv('~/project/MIMIC-III/Model/Output/SVM/SVM'+str(per)+'%_output.csv', index=False)

### DataFrame을 CSV 파일로 저장

In [8]:
# df1 = pd.read_csv("~/project/MIMIC-III/Model/Output/SVM/SVM(50%).csv")

# df2 = pd.read_csv("~/project/MIMIC-III/Model/Output/SVM(10%)_1.csv")
# # df3 = pd.read_csv("~/project/MIMIC-III/Model/Output/SVM(50%)_3.csv")
# # df4 = pd.read_csv("~/project/MIMIC-III/Model/Output/SVM(50%)_4.csv")
# # df5 = pd.read_csv("~/project/MIMIC-III/Model/Output/SVM(50%)_5.csv")
# # df6 = pd.read_csv("~/project/MIMIC-III/Model/Output/SVM(50%)_6.csv")
# merged_df = pd.concat([df1, df2], ignore_index=True)
# merged_df


In [9]:
# # 제거할 행의 조건을 설정
# condition = merged_df['Recall'] == 24.14  # 예제에서는 'A' 열 값이 3인 행을 제거하겠습니다.

# # 조건을 만족하는 행 제거
# merged_df = merged_df[~condition]
# merged_df = merged_df.reset_index(drop=True)
# merged_df

In [10]:
# merged_df.to_csv('~/project/MIMIC-III/Model/Output/SVM/SVM_50%_output.csv', index=False)  # index를 저장하지 않으려면 index=False로 설정
