In [16]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as spy
import prince
import pickle
import plotly.express as px
from tqdm import tqdm
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.decomposition import PCA
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, confusion_matrix

# 결과 확인을 용이하게 하기 위한 코드
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = 'all'

In [2]:
from matplotlib import font_manager, rc

font_path = "C:/Windows/Fonts/NGULIM.TTF"
font = font_manager.FontProperties(fname=font_path).get_name()
rc('font', family=font)

# 데이터 불러오기

In [6]:
with open('df_FwithS_mat.pickle', 'rb') as f:
    FwithS_mat = pickle.load(f)

In [7]:
with open('df_FwoutS_mat.pickle', 'rb') as f:
    FwoutS_mat = pickle.load(f)

# 데이터 전처리

## icd9 : 4019, 4280/ item 제거

In [8]:
FwithS_mat = FwithS_mat.drop(['icd9 : 4019', 'icd9 : 4280'], axis=1)
FwithS_mat = FwithS_mat[FwithS_mat.columns.drop(
    list(FwithS_mat.filter(regex='item')))]

FwithS_mat

Unnamed: 0,HADM_ID,GENDER,age,icd9 : 0389,icd9 : 5849,icd9 : 486,icd9 : 51881,icd9 : 78552,icd9 : 2866,icd9 : 496,...,icd9 : 61179,icd9 : E9399,icd9 : 99939,icd9 : 56731,icd9 : 41404,icd9 : 30183,icd9 : 30928,icd9 : 5933,icd9 : 71697,icd9 : 7469
0,192123.0,F,66.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,112906.0,M,71.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,196896.0,M,76.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,193975.0,F,83.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,151459.0,M,52.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
635,103030.0,F,78.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
636,145414.0,F,61.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
637,186076.0,M,69.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
638,165352.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
FwoutS_mat = FwoutS_mat.drop('icd9 : 4019', axis=1)
FwoutS_mat = FwoutS_mat[FwoutS_mat.columns.drop(
    list(FwoutS_mat.filter(regex='item')))]

FwoutS_mat

Unnamed: 0,HADM_ID,GENDER,age,icd9 : 1570,icd9 : 57410,icd9 : 9971,icd9 : 4275,icd9 : 99811,icd9 : 5680,icd9 : 55321,...,icd9 : 33721,icd9 : 34691,icd9 : 60781,icd9 : 40591,icd9 : 1838,icd9 : 4374,icd9 : 86122,icd9 : 86113,icd9 : E9654,icd9 : 88013
0,112213.0,M,72.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,143045.0,F,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,188822.0,M,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,109235.0,M,300.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,157681.0,F,75.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13075,101083.0,M,66.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13076,167228.0,M,77.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13077,117390.0,M,78.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13078,197084.0,F,65.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## 모든 feature를 binary로 변환

In [10]:
# Gender가 F면 0, M이면 1로 변환

FwithS_mat.replace({'F': 0, 'M': 1}, inplace=True)
FwithS_mat

FwoutS_mat.replace({'F': 0, 'M': 1}, inplace=True)
FwoutS_mat

Unnamed: 0,HADM_ID,GENDER,age,icd9 : 0389,icd9 : 5849,icd9 : 486,icd9 : 51881,icd9 : 78552,icd9 : 2866,icd9 : 496,...,icd9 : 61179,icd9 : E9399,icd9 : 99939,icd9 : 56731,icd9 : 41404,icd9 : 30183,icd9 : 30928,icd9 : 5933,icd9 : 71697,icd9 : 7469
0,192123.0,0.0,66.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,112906.0,1.0,71.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,196896.0,1.0,76.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,193975.0,0.0,83.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,151459.0,1.0,52.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
635,103030.0,0.0,78.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
636,145414.0,0.0,61.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
637,186076.0,1.0,69.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
638,165352.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,HADM_ID,GENDER,age,icd9 : 1570,icd9 : 57410,icd9 : 9971,icd9 : 4275,icd9 : 99811,icd9 : 5680,icd9 : 55321,...,icd9 : 33721,icd9 : 34691,icd9 : 60781,icd9 : 40591,icd9 : 1838,icd9 : 4374,icd9 : 86122,icd9 : 86113,icd9 : E9654,icd9 : 88013
0,112213.0,1.0,72.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,143045.0,0.0,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,188822.0,1.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,109235.0,1.0,300.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,157681.0,0.0,75.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13075,101083.0,1.0,66.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13076,167228.0,1.0,77.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13077,117390.0,1.0,78.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13078,197084.0,0.0,65.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
FwithS_mat['age'] = FwithS_mat['age'].apply(lambda x: 0 if x <= 60 else 1)
FwithS_mat

FwoutS_mat['age'] = FwoutS_mat['age'].apply(lambda x: 0 if x <= 60 else 1)
FwoutS_mat

Unnamed: 0,HADM_ID,GENDER,age,icd9 : 0389,icd9 : 5849,icd9 : 486,icd9 : 51881,icd9 : 78552,icd9 : 2866,icd9 : 496,...,icd9 : 61179,icd9 : E9399,icd9 : 99939,icd9 : 56731,icd9 : 41404,icd9 : 30183,icd9 : 30928,icd9 : 5933,icd9 : 71697,icd9 : 7469
0,192123.0,0.0,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,112906.0,1.0,1,1.0,1.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,196896.0,1.0,1,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,193975.0,0.0,1,1.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,151459.0,1.0,0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
635,103030.0,0.0,1,0.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
636,145414.0,0.0,1,1.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
637,186076.0,1.0,1,1.0,1.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
638,165352.0,,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,HADM_ID,GENDER,age,icd9 : 1570,icd9 : 57410,icd9 : 9971,icd9 : 4275,icd9 : 99811,icd9 : 5680,icd9 : 55321,...,icd9 : 33721,icd9 : 34691,icd9 : 60781,icd9 : 40591,icd9 : 1838,icd9 : 4374,icd9 : 86122,icd9 : 86113,icd9 : E9654,icd9 : 88013
0,112213.0,1.0,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,143045.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,188822.0,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,109235.0,1.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,157681.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13075,101083.0,1.0,1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13076,167228.0,1.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13077,117390.0,1.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13078,197084.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# xgboost로 feature importance 확인

In [17]:
FwithS_mat['target'] = 1
FwithS_mat

FwoutS_mat['target'] = 0
FwoutS_mat

Unnamed: 0,HADM_ID,GENDER,age,icd9 : 0389,icd9 : 5849,icd9 : 486,icd9 : 51881,icd9 : 78552,icd9 : 2866,icd9 : 496,...,icd9 : E9399,icd9 : 99939,icd9 : 56731,icd9 : 41404,icd9 : 30183,icd9 : 30928,icd9 : 5933,icd9 : 71697,icd9 : 7469,target
0,192123.0,0.0,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,112906.0,1.0,1,1.0,1.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,196896.0,1.0,1,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
3,193975.0,0.0,1,1.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4,151459.0,1.0,0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
635,103030.0,0.0,1,0.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1
636,145414.0,0.0,1,1.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1
637,186076.0,1.0,1,1.0,1.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1
638,165352.0,,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1


Unnamed: 0,HADM_ID,GENDER,age,icd9 : 1570,icd9 : 57410,icd9 : 9971,icd9 : 4275,icd9 : 99811,icd9 : 5680,icd9 : 55321,...,icd9 : 34691,icd9 : 60781,icd9 : 40591,icd9 : 1838,icd9 : 4374,icd9 : 86122,icd9 : 86113,icd9 : E9654,icd9 : 88013,target
0,112213.0,1.0,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,143045.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,188822.0,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,109235.0,1.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,157681.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13075,101083.0,1.0,1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
13076,167228.0,1.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
13077,117390.0,1.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
13078,197084.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [63]:
df_fi = pd.DataFrame()

for i in tqdm(range(30)):

    FwoutS_mat = FwoutS_mat.sample(n=len(FwithS_mat))
    fmerge = pd.merge(FwithS_mat, FwoutS_mat, how='outer')
    fmerge = fmerge.fillna(0)
    cols = fmerge.columns.tolist()
    cols = [col for col in fmerge if col != 'target'] + ['target']
    fmerge = fmerge[cols]

    X_train, X_test, y_train, y_test = train_test_split(
        fmerge.iloc[:, 1:-1], fmerge['target'])

    xgb = XGBClassifier(n_estimators=500, learning_rate=0.1, max_depth=4)

    xgb_model = xgb.fit(X_train, y_train)

    xgb_y_pred = xgb_model.predict(X_test)
    xgb_y_pred_prob = xgb_model.predict_proba(X_test)

    df_fi.index = cols[1:-1]
    df_fi.loc[:, i] = xgb_model.feature_importances_





  3%|██▊                                                                                | 1/30 [00:19<09:13, 19.10s/it]



  7%|█████▌                                                                             | 2/30 [00:42<10:06, 21.66s/it]



 10%|████████▎                                                                          | 3/30 [01:06<10:09, 22.59s/it]



 13%|███████████                                                                        | 4/30 [01:30<09:59, 23.06s/it]



 17%|█████████████▊                                                                     | 5/30 [01:53<09:40, 23.21s/it]



 20%|████████████████▌                                                                  | 6/30 [02:17<09:26, 23.59s/it]



 23%|███████████████████▎                                                               | 7/30 [02:41<09:03, 23.64s/it]



 27%|██████████████████████▏                                                            | 8/30 [03:06<08:51, 24.16s/it]



 30%|████████████████████████▉                                                          | 9/30 [03:31<08:32, 24.43s/it]



 33%|███████████████████████████▎                                                      | 10/30 [03:56<08:07, 24.36s/it]



 37%|██████████████████████████████                                                    | 11/30 [04:20<07:40, 24.26s/it]



 40%|████████████████████████████████▊                                                 | 12/30 [04:44<07:16, 24.23s/it]



 43%|███████████████████████████████████▌                                              | 13/30 [05:08<06:51, 24.23s/it]



 47%|██████████████████████████████████████▎                                           | 14/30 [05:32<06:27, 24.19s/it]



 50%|█████████████████████████████████████████                                         | 15/30 [05:56<06:02, 24.18s/it]



 53%|███████████████████████████████████████████▋                                      | 16/30 [06:20<05:38, 24.15s/it]



 57%|██████████████████████████████████████████████▍                                   | 17/30 [06:44<05:13, 24.10s/it]



 60%|█████████████████████████████████████████████████▏                                | 18/30 [07:08<04:48, 24.04s/it]



 63%|███████████████████████████████████████████████████▉                              | 19/30 [07:32<04:25, 24.10s/it]



 67%|██████████████████████████████████████████████████████▋                           | 20/30 [07:57<04:01, 24.14s/it]



 70%|█████████████████████████████████████████████████████████▍                        | 21/30 [08:21<03:37, 24.16s/it]



 73%|████████████████████████████████████████████████████████████▏                     | 22/30 [08:45<03:13, 24.16s/it]



 77%|██████████████████████████████████████████████████████████████▊                   | 23/30 [09:09<02:48, 24.13s/it]



 80%|█████████████████████████████████████████████████████████████████▌                | 24/30 [09:33<02:24, 24.08s/it]



 83%|████████████████████████████████████████████████████████████████████▎             | 25/30 [09:57<02:00, 24.05s/it]



 87%|███████████████████████████████████████████████████████████████████████           | 26/30 [10:22<01:36, 24.19s/it]



 90%|█████████████████████████████████████████████████████████████████████████▊        | 27/30 [10:46<01:12, 24.18s/it]



 93%|████████████████████████████████████████████████████████████████████████████▌     | 28/30 [11:10<00:48, 24.14s/it]



 97%|███████████████████████████████████████████████████████████████████████████████▎  | 29/30 [11:34<00:24, 24.14s/it]



100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [11:58<00:00, 23.95s/it]


In [65]:
df_fi

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
GENDER,0.004444,0.004965,0.004260,0.004453,0.002649,0.004270,0.004172,0.003037,0.002578,0.003349,...,0.003595,0.004613,0.003709,0.003079,0.003676,0.004261,0.004241,0.003156,0.003386,0.003111
age,0.004958,0.003711,0.004195,0.004685,0.002582,0.004255,0.003618,0.003962,0.004537,0.003318,...,0.002960,0.003716,0.002872,0.003565,0.004505,0.003468,0.005127,0.005145,0.005106,0.004528
icd9 : 0389,0.001564,0.004545,0.007020,0.002024,0.002493,0.001955,0.001335,0.000736,0.000000,0.011886,...,0.012273,0.001486,0.013565,0.002179,0.001648,0.000896,0.000808,0.012315,0.003222,0.005582
icd9 : 5849,0.054865,0.072253,0.055870,0.058289,0.048258,0.062488,0.082383,0.039310,0.100400,0.061463,...,0.033646,0.052148,0.068100,0.031506,0.057280,0.062363,0.061179,0.041708,0.053127,0.034365
icd9 : 486,0.014156,0.013665,0.008804,0.011781,0.011643,0.013788,0.012246,0.016192,0.014473,0.016708,...,0.012528,0.013668,0.014766,0.018446,0.011857,0.011273,0.012586,0.012430,0.010594,0.015471
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
icd9 : 4374,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
icd9 : 86122,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
icd9 : 86113,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
icd9 : E9654,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [72]:
df_mean = df_fi.mean(axis=1)
df_mean = df_mean.sort_values(ascending=False)
df_fi_30 = pd.DataFrame(df_mean[:30], columns=['importance_mean'])
df_fi_30

Unnamed: 0,importance_mean
icd9 : 5849,0.05439
icd9 : 42832,0.028653
icd9 : 42833,0.028416
icd9 : 42822,0.027484
icd9 : V4581,0.024923
icd9 : 5990,0.019886
icd9 : 42821,0.019099
icd9 : 5859,0.018609
icd9 : 42820,0.017484
icd9 : 41071,0.016778


# 결과 저장

In [74]:
# with open('df_fi.pickle', 'wb') as f:
#     pickle.dump(df_fi, f)

In [73]:
# with open('df_fi_30.pickle', 'wb') as f:
#     pickle.dump(df_fi_30, f)