# load libraries and data

In [12]:
from torch import nn
import torch
from torch import tensor 
from torch.autograd import Variable
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold
import torch
import itertools
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.metrics import roc_auc_score, accuracy_score
from tqdm.notebook import tqdm
from pathlib import Path
import os

# psychosocial + rsfMRI + strurctural MRI + diffusion MRI

train_out = Path('./data/si_ppc_rsf_s_dMRI_train.csv')
test_out =Path('./data/si_ppc_rsf_s_dMRI_test.csv')

train_data= pd.read_csv(train_out)
test_data= pd.read_csv(test_out)

target ='Suicidalideation'
unused_feat = ['subjectkey', 'abcd_site']

# mri feature가 시작하는 column의 index 구하기
# np.where의 결과값이 array에 들어가기 때문에 방금 계산해 넣어놓은 [0]번째 값을 가져온다.
start_psycho_index = np.where(test_data.columns.values == "race.ethnicity_1")[0][0]
start_rsfmri_index = np.where(test_data.columns.values == "rsfmri_var_cort.destrieux_g.and.s.frontomargin.lh")[0][0]
start_structmri_index = np.where(test_data.columns.values == "lh_bankssts_area._.1")[0][0]
start_diffmri_index = np.where(test_data.columns.values == "con_L.BSTS_L.CACG_count")[0][0]

psychosocial = list(test_data.columns[start_psycho_index:start_rsfmri_index])
rsf_mri = list(test_data.columns[start_rsfmri_index:start_structmri_index])
structural_mri = list(test_data.columns[start_structmri_index:start_diffmri_index])
diffusion_mri = list(test_data.columns[start_diffmri_index:])

Num_FOLDS  = 5
# the number of feature that you want to show 
Num_feat = 20

print(len(train_data), len(train_data.columns))
print(len(psychosocial))
print(len(rsf_mri))
print(len(structural_mri))
print(len(diffusion_mri))

5510 4750
129
148
984
3485


In [13]:
train_data['subjectkey']
#test_data['subjectkey']

0       NDARINV003RTV85
1       NDARINV00BD7VDC
2       NDARINV00R4TXET
3       NDARINV00U4FTRU
4       NDARINV00UMK5VC
             ...       
5505    NDARINVZZJ3A7BK
5506    NDARINVZZLZCKAY
5507    NDARINVZZPKBDAC
5508    NDARINVZZZ2ALR6
5509    NDARINVZZZNB0XC
Name: subjectkey, Length: 5510, dtype: object

# Fine object featrues

In [14]:
# reset_index(drop=True): index값 초기화 + drop으로 index 열 삭제
train_data_processed = train_data.fillna(0).reset_index(drop=True)

features = [col for col in train_data_processed.columns 
            if col not in unused_feat + [target] + rsf_mri + structural_mri + diffusion_mri and test_data[col].dtypes != 'object']
print(features)

['Unnamed: 0', 'race.ethnicity_1', 'race.ethnicity_2', 'race.ethnicity_3', 'race.ethnicity_4', 'race.ethnicity_5', 'sex_1', 'sex_2', 'high.educ_1', 'high.educ_2', 'high.educ_3', 'high.educ_4', 'high.educ_5', 'high.educ_6', 'high.educ_7', 'high.educ_8', 'high.educ_9', 'high.educ_10', 'high.educ_11', 'high.educ_12', 'high.educ_13', 'high.educ_14', 'high.educ_15', 'high.educ_16', 'high.educ_17', 'high.educ_18', 'high.educ_19', 'income_1', 'income_2', 'income_3', 'income_4', 'income_5', 'income_6', 'income_7', 'income_8', 'income_9', 'income_10', 'married_1', 'married_2', 'married_3', 'married_4', 'married_5', 'married_6', 'fes_q1_1', 'fes_q1_2', 'fes_q2_1', 'fes_q2_2', 'fes_q3_1', 'fes_q3_2', 'fes_q4_1', 'fes_q4_2', 'fes_q5_1', 'fes_q5_2', 'fes_q6_1', 'fes_q6_2', 'fes_q7_1', 'fes_q7_2', 'fes_q8_1', 'fes_q8_2', 'fes_q9_1', 'fes_q9_2', 'height', 'weight', 'BMI', 'vol', 'age', 'nihtbx_totalcomp_uncorrected', 'nihtbx_fluidcomp_uncorrected', 'nihtbx_pattern_uncorrected', 'nihtbx_picture_uncorr

# For getting feature importance 

In [15]:
def feature(Num_feat, clf, test_data_processed, features):
    importance =clf.feature_importances_
    #plt.plot(importance)
    #plt.show()
    labels_importance=importance.argsort()[::-1]

    importance_sort = np.sort(importance)[::-1]

    feat_name_sort=test_data_processed[features].columns[labels_importance]
    important_features = pd.DataFrame() 
    
    for i in range (Num_feat):
        feature = pd.DataFrame([[feat_name_sort[i],importance_sort[i]]], columns = ['feature name', 'ratio'])
        important_features=pd.concat([important_features,feature])

    return important_features.reset_index(drop=True)

# Define function for preprocessing for Cross validation

In [16]:
def preprocessing (train_data, test_data, NUM_FOLDS):
    test_data_processed= test_data.fillna(0).reset_index(drop=True)
    train_data_processed = train_data.fillna(0).reset_index(drop=True)
    
    # 추가? 값 변경?
    test_data_processed["kfold"] = -1
    train_data_processed["kfold"] = -1

    # frac: 전체 row 중 몇 %를 반환할 지 결정 -> frac=1을 설정해서 모든 데이터를 반환
    # random_state: 추후 이것과 동일한 샘플링을 재현하기 위함
    # sample: 데이터에서 임의의 샘플 선정 -> frac=1이면 전체 data의 순서만 임의로 바뀜
    train_data_processed = train_data_processed.sample(frac=1,random_state=2020).reset_index(drop=True)

    kf = KFold(n_splits=NUM_FOLDS)
    # enumerate: 각 split된 data set 순서대로 index를 함께 반환
    for fold, (trn_, val_) in enumerate(kf.split(X=train_data_processed, y=train_data_processed)):
        #print(len(trn_), len(val_)) -> 출력: 4408, 1102
        train_data_processed.loc[val_, 'kfold'] = fold
    
    print("done preprocessing")
    return train_data_processed, test_data_processed

# Finding best parameters

In [17]:
# Augmented
import torch
import itertools
from sklearn.metrics import confusion_matrix
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.metrics import roc_auc_score, accuracy_score
from tqdm.notebook import tqdm


def find_bestpar(fold, train_data_processed, test_data_processed, features):
    
    """test data 생성"""
    X_test = test_data_processed[features].values
    Y_test = test_data_processed[target].values
    
    # Store maximum auc
    max_auc= 0
    # Store maximum hypterparameter set
    max_hy = []
    
    # define hyperparameter space : learning rate, 
    n_ = [4,8,16]                              # 
    lr_ = [2e-2, 1e-2, 5e-3, 2e-3, 1e-3, 1e-4] # learning rate
    w_ = [0.01, 0.001, 0.0001]                 # weight decay
    g_ = [0.95, 0.99, 0.9]                     # scheduler params - gamma
    ss_ = [10, 20, 30]                         # scheduler params - step_size
    
    # Orginal hyperparameter space 
    """
    # define hyperparameter space (quick version)
    n_ = [4,8]
    lr_ = [2e-2,1e-2, 5e-3, 2e-3, 1e-3, 1e-4]
    w_ = [0.01, 0.001, 0.0001]
    g_ = [0.95, 0.99]
    ss_ = [10, 20]
    """
    all_ = [n_, lr_, w_, g_, ss_]
    h_space = [s for s in itertools.product(*all_)]
    
    for hy in tqdm(h_space):
        """===================Cross Validation==================="""
        
        """validation & test 결과"""
        valid_res = []
        test_auc_res = []
        test_acc_res = []

        for i in range(fold):
            #print("fold ", i)
            # 5개의 fold 사용했으므로 변수 fold 값은 차례대로 0,1,2,3,4 중 하나
            df_train = train_data_processed[train_data_processed.kfold != i]  # 5개 중 4개 train에 할당
            df_valid = train_data_processed[train_data_processed.kfold == i]  # 5개 중 1개 validation에 할당
            
            X_train = df_train[features].values
            Y_train = df_train[target].values
            
            X_valid = df_valid[features].values
            Y_valid = df_valid[target].values
            
            
            clf = TabNetClassifier(n_a = hy[0],
                                   n_d = hy[0],
                                   optimizer_params = dict(lr=hy[1], weight_decay=hy[2]),
                                   scheduler_params={"step_size":hy[4], "gamma":hy[3]},
                                   scheduler_fn=torch.optim.lr_scheduler.StepLR,
                                   verbose=0)

            clf.fit(X_train, Y_train, eval_set=[(X_train, Y_train), (X_valid, Y_valid)], 
                    eval_name=['train', 'valid'], eval_metric=['auc'],
                    max_epochs=200 , patience=20)
       
            preds_acc = clf.predict(X_test)
            preds_prob = clf.predict_proba(X_test)
            test_auc = roc_auc_score(y_score=preds_prob[:,1], y_true=Y_test)
            test_acc = accuracy_score(preds_acc, Y_test)
            
            valid_res.append(clf.best_cost)
            test_auc_res.append(test_auc)
            test_acc_res.append(test_acc)
            print('[%3d/%4d] '%(i+1, fold),'Valid score: %2f'% clf.best_cost, 'Test AUC: %.3f%%'%test_auc, 'Test ACC: %.3f%%'%test_acc)
    
        #print(valid_res)
        #print(test_auc_res)
        #print(test_acc_res)
        """valid와 test의 평균, 표준편차 출력"""
        print("=====parameter별 valid, test score=====")
        print("Validation 평균: %3f"%np.mean(valid_res), "Test AUC 평균: %3f"%np.mean(test_auc_res), "Test ACC 평균: %3f"%np.mean(test_acc_res))

        if np.mean(test_auc_res)>max_auc:
            print("Find new maximum AUC!!")
            max_hy = hy
            max_auc = np.mean(test_auc_res)
    
    return max_hy

# Train with best parameter

In [18]:
def bestpar_tuning(fold, train_data_processed, test_data_processed, max_hy, features):
 
    hy = max_hy
    print("Max hy:" ,hy)
    
    """validation & test 결과"""
    valid_res = []
    test_auc_res = []
    test_ac_resc = []
    
    for i in range(fold):
            # 5개의 fold 사용했으므로 변수 fold 값은 차례대로 0,1,2,3,4 중 하나
            df_train = train_data_processed[train_data_processed.kfold != i]  # 5개 중 4개 train에 할당
            df_valid = train_data_processed[train_data_processed.kfold == i]  # 5개 중 1개 validation에 할당
            
            X_train = df_train[features].values
            Y_train = df_train[target].values
            
            X_valid = df_valid[features].values
            Y_valid = df_valid[target].values
            
            
            clf = TabNetClassifier(n_a = hy[0],
                                   n_d = hy[0],
                                   optimizer_params = dict(lr=hy[1], weight_decay=hy[2]),
                                   scheduler_params={"step_size":hy[4], "gamma":hy[3]},
                                   scheduler_fn=torch.optim.lr_scheduler.StepLR,
                                   verbose=0)

            clf.fit(X_train, Y_train, eval_set=[(X_train, Y_train), (X_valid, Y_valid)], 
                        eval_name=['train', 'valid'], eval_metric=['auc'],
                        max_epochs=200 , patience=20)

            preds_acc = clf.predict(X_test)
            preds_prob = clf.predict_proba(X_test)
            test_auc = roc_auc_score(y_score=preds_prob[:,1], y_true=Y_test)
            test_acc = accuracy_score(preds_acc, Y_test)

            TN, FP, FN, TP = confusion_matrix(y_true=Y_test, y_pred = clf.predict(X_test)).ravel()
            sensitivity =TP/(TP+FN)
            specificity =TN/(TN+FP)
            PPV = TP/(TP+FP)
            NPV = TN/(TN+FN)
            Accuracy = TP+FP/(TN+ FP+ FN + TP)
            
            valid_res.append(clf.best_cost)
            test_auc_res.append(test_auc)
            test_acc_res.append(test_acc)
            print('[%3d/%4d] '%(i+1, fold),'Valid score: %2f'% clf.best_cost, 'Test AUC: %.3f%%'%test_auc, 'Test ACC: %.3f%%'%test_acc)

    """valid와 test의 평균, 표준편차 출력"""
    print("Validation 평균: %3f"%np.mean(valid_res), "Test AUC 평균: "%np.mean(test_auc_res), "Test ACC 평균: "%np.mean(test_acc_res))
    
    return test_auc, clf, preds_prob, sensitivity, specificity, PPV, NPV, Accuracy 

# Run function, Split data and and cross validation. This needs to be modified

In [19]:
def run(train_data_processed, test_data_processed, fold, Num_feat, features):
    
    print("-------------------------------Training Begining-------------------------------")
    n_ = [4,8,16]
    lr_ = [2e-2, 1e-2, 5e-3, 2e-3, 1e-3, 1e-4]
    w_ = [0.01, 0.001, 0.0001]
    g_ = [0.95, 0.99, 0.9]
    ss_ = [10, 20, 30]
    all_ = [n_, lr_, w_, g_, ss_]
    h_space = [s for s in itertools.product(*all_)]
    
    # Start training
    max_hy = find_bestpar(fold, train_data_processed, test_data_processed, features)
    
    # if you want to just test the code, you should use this
    # max_hy = h_space[0]
    print("Found maximum hyperparmeter, now work with that")
    
    print("-------------------------------Testing Begining-------------------------------")
    test_auc, clf, preds_prob, sensitivity, specificity, PPV, NPV, Accuracy = bestpar_tuning(fold, train_data_processed, test_data_processed, max_hy, features)
    
    #print("-------------------------------Important Feature-------------------------------")
    import_feat=feature(Num_feat, clf, test_data_processed, features)
    preds_val_prob = clf.predict_proba(X_valid)

    return test_auc, Y_test, X_test, clf, preds_prob, import_feat, Y_valid, preds_val_prob


# Final training function

In [20]:
def hyperparametertuning_CV (train_data, test_data, target, unused_feat, Num_FOLDS, Num_feat):
    CV_auc_all = []
    import_feat_every = pd.DataFrame()
    
    # Data processing
    train_data_processed, test_data_processed = preprocessing (train_data, test_data, Num_FOLDS)
    
    features_rsf_only = [col for col in train_data_processed.columns if col in psychosocial + rsf_mri]
    
    print("rsf mri only")
    test_auc_rsf_only, Y_test_rsf_only, X_test_rsf_only, clf_rsf_only, preds_prob_rsf_only, import_feat_rsf_only, Y_valid_rsf_only, preds_val_prob_rsf_only = run(train_data_processed,
                                                                             test_data_processed,
                                                                             5, 
                                                                             Num_feat, 
                                                                             features_rsf_only)
        
    return test_auc_rsf_only, Y_test_rsf_only, X_test_rsf_only, clf_rsf_only, preds_prob_rsf_only, import_feat_rsf_only, Y_valid_rsf_only, preds_val_prob_rsf_only

# Main code 

In [None]:
test_auc_rsf_only, Y_test_rsf_only, X_test_rsf_only, clf_rsf_only, preds_prob_rsf_only, import_feat_rsf_only, Y_valid_rsf_only, preds_val_prob_rsf_only = hyperparametertuning_CV (train_data, test_data, target, unused_feat, Num_FOLDS, Num_feat)


done preprocessing
rsf mri only
-------------------------------Training Begining-------------------------------


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=486.0), HTML(value='')))

Device used : cuda

Early stopping occured at epoch 50 with best_epoch = 30 and best_valid_auc = 0.81279
Best weights from best epoch are automatically used!
[  1/   5]  Valid score: 0.812789 Test AUC: 0.729% Test ACC: 0.502%
Device used : cuda

Early stopping occured at epoch 69 with best_epoch = 49 and best_valid_auc = 0.7932
Best weights from best epoch are automatically used!
[  2/   5]  Valid score: 0.793199 Test AUC: 0.718% Test ACC: 0.502%
Device used : cuda

Early stopping occured at epoch 52 with best_epoch = 32 and best_valid_auc = 0.74143
Best weights from best epoch are automatically used!
[  3/   5]  Valid score: 0.741429 Test AUC: 0.725% Test ACC: 0.502%
Device used : cuda

Early stopping occured at epoch 68 with best_epoch = 48 and best_valid_auc = 0.79811
Best weights from best epoch are automatically used!
[  4/   5]  Valid score: 0.798110 Test AUC: 0.708% Test ACC: 0.502%
Device used : cuda

Early stopping occured at epoch 62 with best_epoch = 42 and best_valid_auc = 

# Getting results

In [None]:
valid_auc_all = roc_auc_score (Y_valid_all, preds_val_prob_all[:,1])
valid_auc_pheno = roc_auc_score (Y_valid_pheno, preds_val_prob_pheno[:,1])
valid_auc_mri = roc_auc_score (Y_valid_mri, preds_val_prob_mri[:,1])

def classify (preds_prob_all, Y_test, line):
    y_pred_all = []
    for x in preds_prob_all[:,1]:
        if x > line:
            y_pred_all.append(1)
        else:
            y_pred_all.append(0)
    y_pred_all=np.array(y_pred_all)
    test_acc = accuracy_score(y_pred_all, Y_test)
    #print(test_acc)
    return y_pred_all, test_acc

def find_best_boundary(preds_prob_all,Y_test):
    results =[]
    idx =[]
    for i in  np.arange(0.01, 0.35, 0.01):
   
        idx.append(i)
        _,test_acc =classify(preds_prob_all,Y_test, i)
        results.append(test_acc)

    results
    num_idx=results.index(max(results))
    best_idx =idx[num_idx]
    #print(f'best_classify_boundary {best_idx}')
    y_pred_all,test_acc =classify(preds_prob_all, Y_test, best_idx)
    #print(f'best ACC= {test_acc}')
    return best_idx

def confusion_stuff (Y_test, preds_prob_all):
    best_idx=find_best_boundary(preds_prob_all,Y_test)
    y_pred_all, test_acc =classify (preds_prob_all, Y_test, 0.15)
    TN, FP, FN, TP = confusion_matrix(y_true=Y_test, y_pred = y_pred_all).ravel()
    print(TN,FP)
    print(FN,TP)
    
    sensitivity =TP/(TP+FN)
    specificity =TN/(TN+FP)
    PPV = TP/(TP+FP)
    NPV = TN/(TN+FN)
    Accuracy = test_acc
    return sensitivity, specificity, PPV, NPV, Accuracy

def makeround(sensitivity_all, specificity_all, PPV_all, NPV_all, Accuracy_all):
    lists = sensitivity_all, specificity_all, PPV_all, NPV_all, Accuracy_all
    new_lists=[]
    for list in lists:
        new_list =  round(list*100,2)
        new_lists.append(new_list)
    
    return new_lists[0], new_lists[1], new_lists[2], new_lists[3], new_lists[4]
        
print("<<<<All>>>>")
sensitivity_all, specificity_all, PPV_all, NPV_all, Accuracy_all = confusion_stuff(Y_test_all, preds_prob_all)
sensitivity_all, specificity_all, PPV_all, NPV_all, Accuracy_all = makeround(sensitivity_all, specificity_all, PPV_all, NPV_all, Accuracy_all)
print(f"sensitivity {sensitivity_all}% specificity {sensitivity_all}% PPV {sensitivity_all}% NPV {NPV_all}% Accuracy {Accuracy_all}% Valid_AUC {round(valid_auc_all*100,2)}% Test_AUC {round(test_auc_all*100,2)}% " )

print("<<<<psychosocial>>>>")
sensitivity_pheno, specificity_pheno, PPV_pheno, NPV_pheno, Accuracy_pheno = confusion_stuff(Y_test_pheno, preds_prob_pheno)
sensitivity_pheno, specificity_pheno, PPV_pheno, NPV_pheno, Accuracy_pheno= makeround(sensitivity_pheno, specificity_pheno, PPV_pheno, NPV_pheno, Accuracy_pheno)
print(f"sensitivity {sensitivity_pheno}% specificity {specificity_pheno}% PPV {PPV_pheno}% NPV {NPV_pheno}% Accuracy {Accuracy_pheno}% Valid_AUC {round(valid_auc_pheno*100,2)}% Test_AUC {round(test_auc_pheno*100,2)}%")

print("<<<<MRI>>>>")
sensitivity_mri, specificity_mri, PPV_mri, NPV_mri, Accuracy_mri = confusion_stuff(Y_test_mri, preds_prob_mri)
sensitivity_mri, specificity_mri, PPV_mri, NPV_mri, Accuracy_mri = makeround(sensitivity_mri, specificity_mri, PPV_mri, NPV_mri, Accuracy_mri )
print(f"sensitivity {sensitivity_mri}% specificity {specificity_mri}% PPV {PPV_mri}% NPV {NPV_mri}% Accuracy {Accuracy_mri}% Valid_AUC {round(valid_auc_mri*100,2)}% Test_AUC {round(test_auc_mri*100,2)}%")

# For drawing ROC curve graph

In [None]:
def rocvis(true , prob , label ) :
    from sklearn.metrics import roc_curve
    if type(true[0]) == str :
        from sklearn.preprocessing import LabelEncoder
        le = LabelEncoder()
        true = le.fit_transform(true)
    else :
        pass
    fpr, tpr, thresholds = roc_curve(true, prob)
    plt.plot(fpr, tpr, marker='.', label = label  )

# Draw ROC curve

In [None]:
fig , ax = plt.subplots(figsize= (20,10))
plt.plot([0, 1], [0, 1], linestyle='--')
rocvis(Y_test_all , preds_prob_all[:,1] , f"Combined:{round(test_auc_all*100, 2)}%")
rocvis(Y_test_pheno , preds_prob_pheno[:,1] , f"psychosocial:{round(test_auc_pheno*100, 2)}%")
rocvis(Y_test_mri , preds_prob_mri[:,1] , f"MRI:{round(test_auc_mri*100, 2)}%")
    #rocvis(caty_Test , catprob[:,1] , "CatBoost")
dataset_name="enbackfMRI"

plt.legend(fontsize = 40)
plt.title(dataset_name, fontsize= 50)
plt.xlabel("FP rate", fontsize =30)
plt.ylabel("TP rate", fontsize =30)
plt.xticks(size = 30)
plt.yticks(size = 30)

plt.tight_layout()
plt.savefig(f'{dataset_name}_ROC.png')

# For model interpretation. with subjectkey below datas are needed

name_test_all, name_train_all, name_valid_all,
name_test_pheno, name_train_pheno, name_valid_pheno, 
name_test_mri, name_train_mri, name_valid_mri, 
Y_test_all , preds_prob_all,
Y_test_pheno , preds_prob_pheno, 
Y_test_mri , preds_prob_mri, 
Y_valid_all, preds_val_prob_all, 
Y_valid_pheno, preds_val_prob_pheno,
Y_valid_mri, preds_val_prob_mri

In [None]:
def save_prob_with_true(Y_test,preds_prob, subjectkey, testORvalid, modeltype):
    
    combined_model=pd.DataFrame({f"subjectkey_{testORvalid}_{modeltype}": subjectkey, f"Y_{testORvalid}_{modeltype}":Y_test, f"preds_prob_{testORvalid}_{modeltype}" :preds_prob[:,1]} )
    combined_model.to_csv(f"{dataset_name}_combined_forROC_{modeltype}_{testORvalid}.csv")
    return combined_model

In [None]:
result_test_all = save_prob_with_true(Y_test_all, preds_prob_all, name_test_all, "test", "all")
result_valid_all = save_prob_with_true(Y_valid_all, preds_val_prob_all, name_valid_all, "valid", "all")

result_test_pheno = save_prob_with_true(Y_test_pheno, preds_prob_pheno, name_test_pheno, "test", "psychosocial")
result_valid_pheno = save_prob_with_true(Y_valid_pheno, preds_val_prob_pheno, name_valid_pheno, "valid", "psychosocial")

result_test_mri = save_prob_with_true(Y_test_mri, preds_prob_mri, name_test_mri, "test", "mri")
result_valid_mri = save_prob_with_true(Y_valid_mri, preds_val_prob_mri, name_valid_mri, "valid", "mri")

# save feature importance

In [None]:
import_feat_every=pd.concat([import_feat_all,import_feat_pheno, import_feat_mri])

import_feat_every.to_csv(f"{dataset_name}_features.csv")

import_feat_every

# Draw AUC per epochs and loss per epochs

In [None]:
feature_names=["Combined", "psychosocial", "MRI"]
clfs = [clf_all, clf_pheno, clf_mri ]
simple_name = "enbackfMRI"

def error_plot(clf):
    plt.plot(clf.history['train_auc'])
    plt.plot(clf.history['valid_auc'])

for i in range(3):
    error_plot(clfs[i])

plt.legend([f'train_{feature_names[0]}', f'valid_{feature_names[0]}', 
           f'train_{feature_names[1]}', f'valid_{feature_names[1]}',
           f'train_{feature_names[2]}', f'valid_{feature_names[2]}'])
plt.title(f'{simple_name}')
plt.xlabel('epochs')
plt.ylabel('AUC')
plt.tight_layout()

plt.savefig(f'AUC_per_epochs {simple_name}.png')

plt.show()          


feature_names=["combined", "psychosocial", "MRI"]
clfs = [clf_all, clf_pheno, clf_mri ]

def error_plot(clf):
    plt.plot(clf.history['loss'])

for i in range(3):
    error_plot(clfs[i])

plt.legend([f'{feature_names[0]}', 
           f'{feature_names[1]}',
           f'{feature_names[2]}'])
plt.title(f'{simple_name}')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.tight_layout()

plt.savefig(f'loss_per_epochs {simple_name}.png')

plt.show()

# Model save 

In [None]:
def saveclf(clf,name):
    clf.save_model(f'{simple_name}_{name}')
saveclf(clf_all,"all")
saveclf(clf_pheno,"combined")
saveclf(clf_mri,"mri")