In [1]:
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split, StratifiedKFold
import warnings
from tqdm import tqdm
import random
from sklearn.utils import resample
import time
import gc

warnings.filterwarnings('ignore')

In [65]:
mimic_data_dir = '/Users/DAHS/Desktop/ECP_CONT/ECP_SCL/Case Labeling/MIMIC-IV.csv.gz'
mimic = pd.read_csv(mimic_data_dir, compression='gzip')

eicu_data_dir = '/Users/DAHS/Desktop/ECP_CONT/ECP_SCL/Case Labeling/eICU.csv.gz'
eicu = pd.read_csv(eicu_data_dir, compression='gzip')

In [66]:
def sampling_target_cohort(target, event='circ', mode = 'mimic'):
    
    data = target.copy()
    data['Annotation'] = data['Annotation'].replace({'amb_circ':'circ'})
    
    split_data = []
    current_part = []
    event_occurred = False
    
    if mode == 'mimic':
        stay_id = 'stay_id'
    else:
        stay_id = 'patientunitstayid'
    
    for stayid in data[stay_id].unique():
        dataset = data[data[stay_id]==stayid]
        
        for index, row in dataset.iterrows():
            
            if event_occurred:
                event_occurred = False
                break
                        
            else:
                current_part.append(row)
                if row['Annotation']==event:
                    split_data.append(pd.DataFrame(current_part))
                    event_occurred = True
                    current_part = []
            
        if current_part:
            split_data.append(pd.DataFrame(current_part))
            current_part = []

    return split_data

In [67]:
part_mimic = sampling_target_cohort(mimic, event='circ', mode = 'mimic')
part_eicu = sampling_target_cohort(eicu, event='circ', mode = 'eicu')

new_mimic = pd.concat(part_mimic).reset_index(drop=True)
new_eicu = pd.concat(part_eicu).reset_index(drop=True)

In [81]:
new_mimic['classes'] =new_mimic['classes'].replace({5:4, 6:4})
new_eicu['classes'] =new_eicu['classes'].replace({5:4, 6:4})

In [97]:
new_mimic.to_csv('MIMIC-IV-COHORT.csv.gz', compression='gzip')
new_eicu.to_csv('eICU-COHORT.csv.gz', compression='gzip')