In [6]:
import pandas as pd
from scipy.stats import zscore
import numpy as np
import warnings
import os

pd.set_option('mode.chained_assignment',  None)
warnings.filterwarnings('ignore')

In [7]:
obs_col = ['patientid', 'datetime','value', 'variableid']
pharm_col = ['patientid', 'givenat','fluidamount_calc', 'pharmaid', 'infusionid', 'recordstatus']

In [8]:
observation_path = '/Users/DAHS/Desktop/hirid-a-high-time-resolution-icu-dataset-1.1.1/raw_stage/observation_tables/csv/part-'
pharma_path = '/Users/DAHS/Desktop/hirid-a-high-time-resolution-icu-dataset-1.1.1/raw_stage/pharma_records/csv/part-'
general_path = '/Users/DAHS/Desktop/hirid-a-high-time-resolution-icu-dataset-1.1.1/reference_data/general_table.csv'

obs_feature_path = '/Users/DAHS/Desktop/hirid-a-high-time-resolution-icu-dataset-1.1.1/reference_data/hirid_vital_lab_id.csv' #will be inserted features folder
pharma_feature_path = '/Users/DAHS/Desktop/hirid-a-high-time-resolution-icu-dataset-1.1.1/reference_data/hirid_pharma_id.csv' #will be inserted features folder

# save_pth = 

parts = 134

start_los = 24 # hour
end_los = 96 # hour

In [9]:
# ready

observation = pd.read_csv(observation_path+str(parts)+'.csv', usecols=obs_col)
observation = observation[obs_col]

pharma = pd.read_csv(pharma_path+str(parts)+'.csv', usecols=pharm_col)[pharm_col].rename(columns={'givenat':'datetime',
                                                                                       'pharmaid':'variableid', 
                                                                                       'fluidamount_calc': 'value'})

general = pd.read_csv(general_path)
general = general[(general['age']>18) & (general['age']<100)]

feature_obs = pd.read_csv(obs_feature_path)
feature_pha = pd.read_csv(pharma_feature_path)

In [10]:
# feature selection

def feature_selection(df, item):
    data = df.copy()
    result = data[data['variableid'].isin(item.item_id.unique())]
    
    #feature name mapping
    rename_dict = dict(zip(item.item_id, item.Name))
    result['variableid'] = result['variableid'].map(rename_dict)
    return result

selected_pharma = feature_selection(pharma, feature_pha)
selected_chart = feature_selection(observation, feature_obs)

In [11]:
# outlier control (vital/lab-zscore or IQR, pharm-non negative)

# threshold = 3.0

# selected_chart['zscore'] = (
#     selected_chart.groupby('variableid')['value']
#     .transform(lambda x: (x - x.mean()) / x.std(ddof=0))
# )

# chart = selected_chart[selected_chart['zscore'].abs() <= threshold].copy()
# chart = chart.drop(columns=['zscore'])

iqr_factor = 1.5

def remove_outliers_iqr(x):
    q1 = x.quantile(0.25)
    q3 = x.quantile(0.75)
    iqr = q3 - q1
    lower = q1 - iqr_factor * iqr
    upper = q3 + iqr_factor * iqr
    return x.between(lower, upper)

# variableid별 IQR 필터링
mask = selected_chart.groupby("variableid")["value"].transform(remove_outliers_iqr)

# 이상치 제거 후 DataFrame 생성
chart = selected_chart[mask].copy()
idx = chart[chart['value']==0].index
chart = chart.drop(index = idx)


pharm = selected_pharma.copy()
pharm.loc[pharm['value'] < 0, 'value'] = 0

# del selected_pharma

In [12]:
# resampling



target_patient = general[general['patientid'].isin(chart.patientid.unique())]
chart=chart.merge(target_patient[['patientid', 'admissiontime']], how='left', left_on='patientid', right_on='patientid')
pharm=pharm.merge(target_patient[['patientid', 'admissiontime']], how='left', left_on='patientid', right_on='patientid')

def resampling(df, start_los, end_los):
    df['datetime']=pd.to_datetime(df['datetime'])
    df['admissiontime'] = pd.to_datetime(df['admissiontime'])
    df['Time_since_ICU_admission'] = df['datetime'] - df['admissiontime']

    del df['admissiontime']
    del df['datetime']

    df['Anchor_Time'] = (
        df['Time_since_ICU_admission'].dt.total_seconds() / 60
    ).round(2)

    df=df[df['Anchor_Time']>=0]
    los_df = df.groupby('patientid').max()[['Anchor_Time']].reset_index()
    filter_los = los_df[(los_df['Anchor_Time'] >= start_los*60)&(los_df['Anchor_Time'] <= end_los*60)].patientid.unique()

    filtered_selected = df[df['patientid'].isin(filter_los)]

    filtered_selected=filtered_selected.sort_values(by=['patientid', 'Anchor_Time'])
    
    return filtered_selected


pharm = resampling(pharm, start_los, end_los)
chart = resampling(chart, start_los, end_los)

# sanity
pharm = pharm[pharm['patientid'].isin(chart.patientid.unique())]

In [13]:
# pharma processing

interest_code = [524, 8, 520, 780, 776]

pharm = pharm[pharm['recordstatus'].isin(interest_code)]

def adm_duration(pharm):
    global refine, inf
    # identify bolus and infusion including start time, end time of administration of pharma

    df = pharm.copy()

    df = df.sort_values('Anchor_Time')
    result = []

    for inf in df.infusionid.unique():
        
        interest = df[df['infusionid']==inf]
        interest['start_time'] = (pd.to_numeric(interest['Anchor_Time'])//30)*30 + 30
        
        # bolus
        if (len(interest)==1) | all(interest.recordstatus.isin([780])):
            interest['recordstatus'] = 780
            result.append(interest)
        
        # infuse
        elif any(interest.recordstatus.isin([524, 776]))&(len(interest)!=1):
            refine = interest[interest.value > 0]
            try:
                refine.iloc[0, 4] = 524
            except:
                pass
            
            result.append(refine)
            
        else:
            print(inf)
        
    return pd.concat(result).sort_values(['patientid', 'start_time']).reset_index(drop=True)

selected_pharm = adm_duration(pharm)

In [14]:
chart['start_time'] = (pd.to_numeric(chart['Anchor_Time'])//30)*30 + 30

In [15]:
chart.drop(['Time_since_ICU_admission'], axis = 1, inplace=True)
selected_pharm.drop(['Time_since_ICU_admission'], axis = 1, inplace=True)

chart['infusionid'] = 'dummy'
chart['recordstatus'] = 'dummy'

# Specify the item_ids we are interested in
required_item_ids = {'HR', 'SBP', 'DBP', 'Lactate'}
valid_stay_ids = chart[chart['variableid'].isin(required_item_ids)].groupby('patientid')['variableid'].nunique()
valid_stay_ids = valid_stay_ids[valid_stay_ids == len(required_item_ids)].index

chart = chart[chart['patientid'].isin(valid_stay_ids)].sort_values(by='Anchor_Time')
pharm = selected_pharm[selected_pharm['patientid'].isin(valid_stay_ids)].sort_values(by='Anchor_Time')

In [16]:
# Time bucket 안에서 치료 시점으로 다시 구분 -> 더 irregular 한 시간 간격이 생성됨

chart_copy = chart.copy()
pharm_copy = pharm.copy()

pharm_copy.reset_index(drop=True, inplace=True)
chart_copy.reset_index(drop=True, inplace=True)

chart_copy['pivot_time'] = chart_copy['start_time'].copy()
pharm_copy['pivot_time'] = pharm_copy['start_time'].copy()

for id in valid_stay_ids:
    
    pharm_ind = pharm_copy[pharm_copy['patientid']==id]
    chart_ind = chart_copy[chart_copy['patientid']==id]

    for bk_id in pharm_ind.start_time.unique():

        ph_bucket = pharm_ind[pharm_ind['start_time']==bk_id]
        ch_bucket = chart_ind[chart_ind['start_time']==bk_id]

        times = sorted(ph_bucket['Anchor_Time'].dropna().unique())

        if len(times) > 1:
            interval = [(0, times[0])]
            interval += [(times[i], times[i+1]) for i in range(len(times)-1)]
        elif len(times) == 1:
            interval = [(np.nan, times[0])]
        else:
            interval = []

        for i, (st, ed) in enumerate(interval):
            
            tr_bucket = ch_bucket[(ch_bucket['Anchor_Time']>=st)&(ch_bucket['Anchor_Time']<ed)]
                
            idx = tr_bucket.index
            p_idx = ph_bucket.index[i]
            
            if len(tr_bucket)==0:
                pharm_copy.drop(index = p_idx, inplace = True)
                pass
            
            chart_copy.loc[idx, 'pivot_time'] = bk_id + i
            pharm_copy.loc[p_idx, 'pivot_time'] = bk_id + i
            
            if i == len(interval)-1:
                tr_bucket = ch_bucket[(ch_bucket['Anchor_Time']>=ed)]
                idx = tr_bucket.index
                
                chart_copy.loc[idx, 'pivot_time'] = bk_id + i

pharm_copy.dropna(axis=0, inplace = True) # 삭제한 행에 다시 접근해서 발생한 문제 -> 삭제가 맞음

In [17]:
pharm_copy['variableid'] = 'vasopressor'
column = ['patientid', 'value', 'variableid', 'pivot_time']

merged = pd.concat([chart_copy[column],pharm_copy[column]], axis = 0)

In [18]:
feat = set(feature_obs.Name.unique()) | set(['vasopressor'])
part_csv = pd.DataFrame() 


for hid in valid_stay_ids:

    df2 = merged[merged['patientid'] == hid]
    val = df2.pivot_table(index='pivot_time', columns='variableid', values='value').reset_index()
    val = val.rename(columns={'pivot_time': 'Time'})
    val['patientid'] = hid

    if part_csv.empty:
        part_csv = val
    else:
        part_csv = pd.concat([part_csv, val], axis=0)

    feat_df=pd.DataFrame(columns=list(set(feat)-set(part_csv.columns)))
    part_csv=pd.concat([part_csv,feat_df],axis=1)

    # Additional processing

    # 1. vasopressor
    part_csv['vasopressor'].fillna(0, inplace=True)
    idx = part_csv[part_csv['vasopressor'] > 0].index
    part_csv.loc[idx, 'vasopressor'] = 1

    # 2. MAP
    part_csv['MAP'] = (part_csv['DBP']*2 + part_csv['SBP'])/3

    #[ ====== Save temporal data to csv ====== ]
    if not os.path.exists(save_pth):
        os.makedirs(save_pth)
    file_path = os.path.join(save_pth, f"part-{part_num}.csv")
    part_csv.to_csv(file_path,index=False)

NameError: name 'save_pth' is not defined

In [211]:
save_pth = '/Users/DAHS/Desktop/hirid-a-high-time-resolution-icu-dataset-1.1.1/raw_stage/tabular_records/20251008/'
parts = 140
# try except pass
part_example = pd.read_csv(save_pth+f"part-{parts}.csv")

In [None]:
def interpolate_lactate_hirid(df, time_col="Time", lactate_col="Lactate"):
    """
    HiRID 논문 정의에 기반한 제한적 선형보간(Lactate):
    - Δt < 6h(=360min): 전체 구간 선형 보간
    - Δt ≥ 6h: 앞뒤 3h(=180min)만 forward/backward fill
    - 2 mmol/L 경계 통과 시점 이후 보간 중단
    효율적 벡터 연산 기반으로 구현.
    """

    df = df.sort_values(time_col).reset_index(drop=True).copy()
    t = df[time_col].to_numpy()
    l = df[lactate_col].to_numpy()
    n = len(df)
    interp = np.copy(l)

    # 유효 측정 인덱스
    valid_idx = np.where(~np.isnan(l))[0]

    if len(valid_idx) == 0:
        df["Lactate_interp"] = np.nan
        return df
    elif len(valid_idx) == 1:
        # 측정이 1개뿐이면 앞뒤 3시간만 동일값으로 복제
        t0, l0 = t[valid_idx[0]], l[valid_idx[0]]
        interp[:] = np.nan
        interp[(t >= t0 - 180) & (t <= t0 + 180)] = l0
        df["Lactate_interp"] = interp
        return df

    # ---- 구간별 처리 ----
    for i in range(len(valid_idx) - 1):
        i1, i2 = valid_idx[i], valid_idx[i+1]
        t1, t2 = t[i1], t[i2]
        L1, L2 = l[i1], l[i2]
        dt = t2 - t1

        # (1) 상태가 threshold 2 mmol/L을 넘었는지 확인
        
        precede = (L1 >= 2 and L2 < 2)
        follow = (L1 < 2 and L2 >= 2)
        
        cross_2 = follow or precede
        
        if not cross_2:

            # (2) 6시간 이내면 선형보간
            if dt <= 360:
                mask = (t > t1) & (t < t2)
                interp[mask] = L1 + (L2 - L1) * (t[mask] - t1) / (t2 - t1)
            
            # (3) 6시간 초과면 3시간까지만 ffill/bfill
            elif dt > 360:
                mask_f = (t > t1) & (t <= t1 + 180)
                interp[mask_f] = L1
                
                mask_b = (t < t2) & (t >= t2 - 180)
                interp[mask_b] = L2
                
        # (4) lactate 가 2mmol 이상인 경우        
        if cross_2:
            
            if follow: # 두 측정 값 중 후행 값이 넘는 경우: 선행 lactate forward 3시간
                mask_f = (t > t1) & (t <= t1 + 180)
                interp[mask_f] = L1
            else: # 두 측정 값 중 선행 값이 넘는 경우: 후행 lactate forward 3시간
                mask_b = (t < t2) & (t >= t2 - 180)
                interp[mask_b] = L2
                
        first_idx, last_idx = valid_idx[0], valid_idx[-1]
        if l[first_idx] <= 2:  # 정상
            interp[t <= t[first_idx]] = l[first_idx]  # 무제한 backward fill
        else:
            interp[(t >= t[first_idx]) & (t <= t[first_idx] + 180)] = l[first_idx]  # 3시간

        if l[last_idx] <= 2:  # 정상
            interp[t >= t[last_idx]] = l[last_idx]  # 무제한 forward fill
        else:
            interp[(t <= t[last_idx]) & (t >= t[last_idx] - 180)] = l[last_idx]  # 3시간

    df["Lactate_interp"] = interp
    return df

# annotated = []

def AnnotationEpisodes(part_example):

    episodes = []

    for pid, df in part_example.groupby("patientid"):
        one_p = part_example[part_example['patientid']==pid]
        # [['Time', 'MAP', 'Lactate', 'vasopressor']]
        
        one_p['vasopressor_prev'] = one_p['vasopressor'].shift(1).fillna(0)

        df = one_p.sort_values("Time").reset_index(drop=True)
        df = interpolate_lactate_hirid(df, time_col="Time", lactate_col="Lactate")
        
        
        labels = []
        for i in range(len(df)):
            t = df.loc[i, "Time"]

            # 45 min window
            window = df[(df["Time"] >= t - 22.5) & (df["Time"] <= t + 22.5)]

            if window.empty:
                labels.append("unknown")
                continue

            # condition
            cond_nonshock = (window["MAP"] > 65) & (window["vasopressor_prev"] == 0) & (window["Lactate_interp"] <= 2)
            cond_shock = (window["MAP"] <= 65) | ((window["vasopressor_prev"] == 1) & (window["Lactate_interp"] >= 2))

            # duration
            def duration(cond):
                valid_times = window.loc[cond, "Time"].values
                return np.sum(np.diff(valid_times)) if len(valid_times) > 1 else 0

            dur_nonshock = duration(cond_nonshock)
            dur_shock = duration(cond_shock)

            # annotation
            if dur_nonshock >= 30:
                labels.append("non-shock")
            elif dur_shock >= 30:
                labels.append("shock")
            else:
                # unknown: missing or ambiguous
                if window["MAP"].isna().any() or window["Lactate_interp"].isna().any():
                    labels.append("unknown")
                elif (((window["MAP"] <= 65) | (window["vasopressor_prev"] == 1)) & (window["Lactate_interp"] <= 2)).any():
                    labels.append("unknown")
                else:
                    labels.append("unknown")

        df["ShockLabel"] = labels
        df.drop(['vasopressor_prev', 'Lactate_interp'], axis = 1, inplace = True)
        
        
        df = df[df['ShockLabel'] != "unknown"].reset_index(drop=True)
        labels = df['ShockLabel'].values

        start_idx = 0
        ep_idx = 0
        
        for i in range(1, len(labels)):
            prev, curr = labels[i - 1], labels[i]

            # (case A) non-shock -> shock : generate episode
            if prev == "non-shock" and curr == "shock":
                ep_idx += 1
                end_idx = i
                ep = df.iloc[start_idx:end_idx + 1].copy()
                nonshock_count = (ep['ShockLabel'] == 'non-shock').sum()

                # at least 2 non-shock sample (minimum seq)
                if (nonshock_count >= 2) and (len(ep)<=96): # 2(args.min_seq_len - 1), 60 -> args.min_seq_len, args.max_seq_len, args.prediction_window, args.label_save_pth
                    ep['episodeid'] = str(ep_idx) + '-' + str(pid)
                    episodes.append(ep)

                # regardless of condition, next start
                start_idx = end_idx + 1

            # (case B) shock -> shock 유지: skip
            elif prev == "shock" and curr == "shock":
                continue

            # (case C) shock -> non-shock: update next start point of episode
            elif prev == "shock" and curr == "non-shock":
                start_idx = i

        # (case D) last non-shock samples
        if start_idx < len(df):
            last_ep = df.iloc[start_idx:].copy()
            nonshock_count = (last_ep['ShockLabel'] == 'non-shock').sum()
            if (nonshock_count >= 2) and (len(last_ep)<=96):
                ep_idx += 1
                last_ep['episodeid'] = str(ep_idx) + '-' + str(pid)
                episodes.append(last_ep)
                
    return pd.concat(episodes, axis = 0)

In [None]:
labeled_part = AnnotationEpisodes(part_example)

In [253]:
def Labeling(labeled_part):

    labeled_part['shock_next_8h'] = np.nan
    labeled_part['is_mask'] = 0
    dyn_csv = pd.DataFrame()

    for ep in labeled_part.episodeid.unique():
        
        episode = labeled_part[labeled_part['episodeid']==ep]
        
        if any(episode['ShockLabel'].isin(['shock'])):
            episode['endpoint_window'] = episode['Time'] + 480
        
            for idx, row in episode.iterrows():
                current_time = row['Time']
                endpoint_window = row['endpoint_window']

                future_rows = episode[(episode['Time'] > current_time) & (episode['Time'] <= endpoint_window)]

                if any(future_rows['ShockLabel'] == 'shock'):
                    episode.loc[idx, 'shock_next_8h'] = 1
                else:
                    episode.loc[idx, 'shock_next_8h'] = 0
                    
            episode = episode.drop(['endpoint_window'], axis = 1)
        
        else:
            pass

        if len(episode) < 96:
            pad_length = 96 - len(episode)
            pad_rows = pd.DataFrame(0, index = range(pad_length), columns = episode.columns)
            pad_rows['is_mask'] = 1
            pad_rows['patientid'] = episode.patientid.unique()[0]
            
            episode = pd.concat([episode, pad_rows], axis = 0)

        else:
            episode = episode.iloc[:96]
            
        dyn_csv = pd.concat([dyn_csv, episode], axis = 0)
    
    # dir = f'tabular_records/causal_action_bucket_label/part-{parts}.csv'
    # dyn_csv.to_csv(local+dir,index=False)

    return dyn_csv

In [254]:
shocking = Labeling(labeled_part)

In [245]:
episode = labeled_part[labeled_part['episodeid']==labeled_part.episodeid.unique()[50]]

In [261]:
shocking[shocking['episodeid']==labeled_part.episodeid.unique()[79]]

Unnamed: 0,Time,BaseExcess,Bilirubin_total,Chloride,Creatinine,DBP,EtCO2,FiO2,Glucose,HCO3,...,Bilirubin_direct,Calcium,TroponinT,AST,Alkalinephos,MAP,ShockLabel,episodeid,shock_next_8h,is_mask
44,755.0,,,,,41.0,,,4.9,,...,,,,,,71.666667,non-shock,3-14978,0.0,0
45,1015.0,,,,,48.5,,,,,...,,,,,,78.833333,non-shock,3-14978,1.0,0
46,1315.0,,,,,31.0,,,,,...,,,,,,58.444444,shock,3-14978,0.0,0


In [252]:
episode['patientid']

98     4019
99     4019
100    4019
101    4019
Name: patientid, dtype: int64

In [None]:


if any(episode['ShockLabel'].isin(['shock'])):
    print('d')
    episode['endpoint_window'] = episode['Time'] + 480
    
    for idx, row in episode.iterrows():
        current_time = row['Time']
        endpoint_window = row['endpoint_window']

        future_rows = episode[(episode['Time'] > current_time) & (episode['Time'] <= endpoint_window)]

        if any(future_rows['ShockLabel'] == 'shock'):
            episode.loc[idx, 'shock_next_8h'] = 1
        else:
            episode.loc[idx, 'shock_next_8h'] = 0
            
    episode = episode.drop(['endpoint_window'], axis = 1)
    
else:
    pass

if len(episode) < 96:
    pad_length = 96 - len(episode)
    pad_rows = pd.DataFrame(0, index = range(pad_length), columns = episode.columns)
    pad_rows['is_mask'] = 1
    pad_rows['patientid'] = episode.patientid.unique[0]
    
    episode = pd.concat([episode, pad_rows], axis = 0)

else:
    episode = episode.iloc[:96]
    
dyn_csv = pd.concat([dyn_csv, episode], axis = 0

d


In [72]:
# result = []
# for p in part_example.patientid.unique():

#     one_p = part_example[part_example['patientid']==p][['Time', 'MAP', 'Lactate', 'vasopressor']].sort_values('Time')
#     one_p['vasopressor_prev'] = one_p['vasopressor'].shift(1).fillna(0)
    
#     shock = (one_p['MAP']<=65)|((one_p['Lactate']>=2)&(one_p['vasopressor_prev']==1))
    
#     if shock.any():
#         first_shock_idx = one_p.index[shock][0]
#         t0 = one_p.loc[first_shock_idx, 'Time']
#         kept_one = one_p[one_p['Time'] <= t0].copy()
#         kept_one['Annotation'] = np.where(
#                                             (kept_one['MAP']<=65)|((kept_one['Lactate']>=2)&(kept_one['vasopressor_prev']==1)),
#                                             'shock','non-shock'
#                                         )
        
#         if len(kept_one) < 3: # at least 1 hour records is ensured
#             continue
        
#     else:
#         kept_one = one_p.copy()
#         kept_one['Annotation'] = 'non-shock'
        
#     result.append(kept_one)



result = []

for p in part_example.patientid.unique()[:10]:
    one_p = (
        part_example[part_example['patientid'] == p]
        [['Time', 'MAP', 'Lactate', 'vasopressor']]
        .sort_values('Time')
        .reset_index(drop=True)
    )

    one_p['vasopressor_prev'] = one_p['vasopressor'].shift(1).fillna(0)

    cond_shock = (one_p['MAP'] <= 65) | (
        (one_p['Lactate'] >= 2) & (one_p['vasopressor_prev'] == 1)
    )

    cond_nonshock = (one_p['MAP'] > 65) & (
        (one_p['Lactate'] <= 2) & (one_p['vasopressor_prev'] == 0)
    )

    one_p['Annotation'] = np.select(
        [cond_shock, cond_nonshock],
        ['shock', 'non-shock'],
        default='undefined'
    )

    if cond_shock.any():
        first_shock_idx = cond_shock.idxmax()  # True 중 첫 index 반환
        t0 = one_p.loc[first_shock_idx, 'Time']
        kept_one = one_p[one_p['Time'] <= t0].copy()
        
        if len(kept_one) < 3:
            continue
        
        if len(kept_one) == 3:
            if 'undefined' in kept_one['Annotation'].unique():
                continue

    else:
        kept_one = one_p.copy()

    result.append(kept_one)

In [59]:
one_p = one_p[['Time', 'MAP', 'Lactate', 'vasopressor']].sort_values('Time')

In [None]:
one_p['vasopressor_prev'] = one_p['vasopressor'].shift(1).fillna(0)

Unnamed: 0,Time,MAP,Lactate,vasopressor,vasopressor_prev
2625,30.0,,,0.0,0.0
2626,60.0,98.541502,0.9,1.0,0.0
2627,90.0,87.500000,,1.0,1.0
2628,120.0,95.211111,,1.0,1.0
2629,150.0,87.655556,,1.0,1.0
...,...,...,...,...,...
2809,5550.0,84.777778,,1.0,1.0
2810,5580.0,85.022222,,1.0,1.0
2811,5610.0,88.544444,,1.0,1.0
2812,5640.0,89.603175,,1.0,1.0


In [65]:
shock = (one_p['MAP']<=65)|((one_p['Lactate']>=2)&(one_p['vasopressor_prev']==1))


first_shock_idx = one_p.index[shock][0]
t0 = one_p.loc[first_shock_idx, 'Time']
kept_one = one_p[one_p['Time'] <= t0].copy()
kept_one['Annotation'] = np.where(
                                    (kept_one['MAP']<=65)|((kept_one['Lactate']>=2)&(kept_one['vasopressor_prev']==1)),
                                    'shock','non-shock'
                                )

In [66]:
kept_one

Unnamed: 0,Time,MAP,Lactate,vasopressor,vasopressor_prev,Annotation
2625,30.0,,,0.0,0.0,non-shock
2626,60.0,98.541502,0.9,1.0,0.0,non-shock
2627,90.0,87.500000,,1.0,1.0,non-shock
2628,120.0,95.211111,,1.0,1.0,non-shock
2629,150.0,87.655556,,1.0,1.0,non-shock
...,...,...,...,...,...,...
2723,2970.0,71.588889,,1.0,1.0,non-shock
2724,3000.0,78.688889,,1.0,1.0,non-shock
2725,3030.0,88.522222,,1.0,1.0,non-shock
2726,3060.0,84.356322,,1.0,1.0,non-shock


In [78]:
result[3]

IndexError: list index out of range

In [64]:
shock.any()

np.True_

In [None]:
from tabularization import *

parts_list=[]
for i in range(0, 250):
    parts_list.append(i)

start_los = 24 
end_los = 48
resample_time = 10
resample_mode = 'action_bucket_new'
mode = 'causal'

HiRID_Pipeline(parts_list, resample_mode, resample_time, start_los, end_los, mode)