# 의료와 데이터사이언스 - 11주차
## 비정형 데이터 생체신호 분석 2: 생체신호 인공지능 모델링 2 - 3
- Author : **Hyun-Lim Yang, Ph.D.**<br>
Assistant Professor @ Office of Hospital Information, Seoul National University Hospital <br>
Adjunct Assistant Professor @ Department of Medicine, Seoul National University College of Medicine
- E-mail : hlyang{_at_}snu{_dot_}ac{_dot_}kr
***

In [1]:
from IPython.display import HTML
style_warn = "<style>div.warn { background-color: #fcf2f2;border-color: #dFb5b4; border-left: 5px solid #dfb5b4; padding: 0.5em;}</style>"
HTML(style_warn)

## Preprocessing for Stroke volume prediction

### Import packages

<div class="warn">**Warning!** : use your directory at `download_directory`</div>


> **Wargning** <br>
> colab 환경을 위한 google drive import 코드가 포함되어 있습니다. <br>
> 로컬 환경에서 실행 시 colab을 위한 import function들을 comment out 한 뒤 실행하세요. 

In [None]:
!pip install vitaldb

In [None]:
from google.colab import drive  # for colab
drive.mount('/content/gdrive/')  # for colab

In [None]:
import os
cloud_directory = '/content/gdrive/My Drive/KOHI_2022_CNN_data_open/'
os.listdir(cloud_directory) # for colab, check cloud directory mount

In [None]:
import sys
#download_directory = os.getcwd() # for local environments
#sys.path.append(download_directory) # for local environments
sys.path.append(cloud_directory) # for colab

In [2]:
import os
import sys
download_directory = os.getcwd() # for local environments
sys.path.append(download_directory) # for local environments

In [3]:
import numpy as np
import pandas as pd
import glob
import kohi_preprocessor as pre
import vitaldb
from tqdm import tqdm
import warnings
warnings.filterwarnings(action='ignore')

download_directory = cloud_directory # for colab

### Data loading
샘플 파일을 vitaldb 서버로 부터 직접 다운받아 로드

> **TODO:** `00019.vital`을 100hz로 변환하여 `chart_pd_01`에 DataFrame으로 저장하기

In [4]:
track_names = ["EV1000/SV", "SNUADC/ART"]
### =========== Your code here ====================

vitalfile = vitaldb.VitalFile(19, track_names=track_names)
chart_pd_01 = vitalfile.to_pandas(track_names, interval=1/100)

### ===============================================

print(chart_pd_01.columns)

Index(['EV1000/SV', 'SNUADC/ART'], dtype='object')


In [5]:
col_svs = 'EV1000/SV'
col_art = 'SNUADC/ART'

### Stroke volume 데이터 추출

In [6]:
# stroke volume 데이터 추출
### =========== Your code here ====================

svs_data_pd = chart_pd_01[col_svs][chart_pd_01[col_svs].notnull()]
svs_index = svs_data_pd.index.values

### ===============================================
print(svs_data_pd.head())

53845    61.0
54008    61.0
54208    61.0
54408    61.0
54607    61.0
Name: EV1000/SV, dtype: float32


### arterial wave 데이터 추출 및 nan 지우기

In [7]:
# art 데이터 전체 추출 및 nan value 채우기
### =========== Your code here ====================

art_full_pd = chart_pd_01[col_art]
art_full_pd = art_full_pd.fillna(0)

### ===============================================

print(art_full_pd.head())

0    0.0
1    0.0
2    0.0
3    0.0
4    0.0
Name: SNUADC/ART, dtype: float32


### 데이터셋 만들기
필요한 파라미터들 정의

In [8]:
# 필요한 파라미터들 정의
srate = 100
length = 20
max_limit_svs = 200 # svs max : 200
min_limit_svs = 20  # svs min : 20

입력 데이터 길이가 20s이니, svs가 20s 이내에 등장하는 것은 무시함

In [9]:
# svs index를 20초 뒤로 미룸
### =========== Your code here ====================

svs_points = svs_index[svs_index > length*srate]

### ===============================================

print(svs_points)

[  53845   54008   54208 ... 2700200 2700400 2700600]


### Arterial wave segment 추출

In [10]:
# arterial segment 추출하기
### =========== Your code here ====================
svs_values_list = [svs_data_pd[idx] for idx in svs_points]
art_seg_list = [art_full_pd[idx-(length*srate):idx].values for idx in svs_points]

### ===============================================

svs_values_np = np.array(svs_values_list)
art_seg_np = np.array(art_seg_list)

print(svs_values_np)
print(art_seg_np)

[61. 61. 61. ... 63. 63. 63.]
[[ 62.2872   62.2872   58.3374  ...  62.2872   65.2496   66.2371 ]
 [ 54.3876   56.3625   62.2872  ...  53.4001   51.4252   50.4377 ]
 [ 65.2496   64.2621   63.2747  ...  75.1242   70.1869   67.2245 ]
 ...
 [  6.00217 -13.747    30.6886  ...  -9.79714  -5.84731  19.8266 ]
 [-26.5839   23.7764   10.9395  ...   2.05234 -16.7093   28.7137 ]
 [-24.609    21.8015    4.02726 ...  -3.8724  -10.7846   20.814  ]]


### 조건에 따라 filter들 정의

In [11]:
# 필터들 선언
# svs min-max filter

### =========== Your code here ====================

svs_max_filter = svs_values_np < max_limit_svs
svs_min_filter = svs_values_np > min_limit_svs
svs_filter = svs_max_filter & svs_min_filter

### ===============================================


# abp range filter

### =========== Your code here ====================

art_filter_list = []
for seg in art_seg_np:
    filter_value = (np.array(seg) > 25.0).all() and (np.array(seg) < 250.0).all()
    art_filter_list.append(filter_value)
art_filter = np.array(art_filter_list)

### ===============================================


# mstds 필터

### =========== Your code here ====================

mstds_values_list = []
for seg in tqdm(art_seg_np):
    if (np.array(seg) < 0.).any():
        mstds_values_list.append(float(0.))
    else:
        mstd_val, _ = pre.process_beat(seg)
        mstds_values_list.append(mstd_val)
mstds_filter = np.array(mstds_values_list) > 0.

### ===============================================


100%|██████████| 13217/13217 [02:46<00:00, 79.35it/s] 


전체 필터 하나로 만들기

In [12]:
### =========== Your code here ====================

all_filters = svs_filter & art_filter & mstds_filter

### ===============================================


### 필터 적용하여 데이터 추출

In [13]:
# 필터 적용해서 추출

### =========== Your code here ====================

svs_filtered = svs_values_np[all_filters]
art_filtered = art_seg_np[all_filters]

### ===============================================

print(svs_filtered.shape)
print(art_filtered.shape)

(12363,)
(12363, 2000)


데이터셋 정의

In [14]:
x_data = art_filtered
y_label = svs_filtered

In [15]:
print(x_data.shape)
print(y_label.shape)

(12363, 2000)
(12363,)
