In [1]:
# import os

# # Set R environment variables using the conda environment path
# r_home = '/sfs/gpfs/tardis/home/jq2uw/llm_nicu_vitalsigns/clip_env/lib/R'
# os.environ['R_HOME'] = r_home
# os.environ['R_LIBS'] = f"{r_home}/library"
# os.environ['R_LIBS_USER'] = os.path.expanduser('~/R/goolf/4.3')
# os.environ['LD_LIBRARY_PATH'] = f"{r_home}/lib:" + os.environ.get('LD_LIBRARY_PATH', '')

import torch
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()

In [2]:
from config import *
from encoder import *
from decoder import *
from data import *
from vital import *
from train import *
from eval import *
from augmentor import *
from describer import *
from masker import *
print("using device: ", device)
import pandas as pd
import os
from sklearn.model_selection import train_test_split

# import pkg_resources
# print(pkg_resources.get_distribution('python-calamine').version)



Random seed set to 333
using device:  cuda


## Customize Configuration

In [8]:
# (customize) configs
overwrite = True
model_name = 'testtest'
text_config['cl']['die7d'] = True # udpate text_config here if needed
text_config['split'] = True
text_config['demo']['gre'] = True
text_config['demo']['apgar_mage'] = True
model_name = model_name + "___" + "_".join(get_true_components(text_config))

update_config(
    model_name = model_name,
    ts_aug = False, # Data settings
    ts_subseq = False,
    ts_augsub = False,
    downsample_size = 10,
    balance = False,
    block_target = False,
    embedded_dim = 32,
    batch_size = 2048, # Data loader settings
    ts_global_normalize = True,
    ts_local_normalize = False,# True,
    patience = 100, # Training settings
    num_saves = 20,
    num_epochs = 10000,
    init_lr = 1e-4,
    text_config = text_config,
    text_col_ls = ['cl_event', 'ts_description', 'demo_ga', 'demo_weight', 'demo_apgar', 'demo_mother']
)
config_dict = get_config_dict()

In [4]:
# run preprocess.py to ready the data
with open('preprocess.py', 'r') as file:
    exec(file.read())


Sample of patients with positive labels:
VitalID
1018    8
5170    8
1464    8
2361    8
2791    8
dtype: int64


[Parallel(n_jobs=36)]: Using backend LokyBackend with 36 concurrent workers.
[Parallel(n_jobs=36)]: Done 142 tasks      | elapsed:    4.5s
[Parallel(n_jobs=36)]: Done 728 tasks      | elapsed:    6.6s
[Parallel(n_jobs=36)]: Done 3323 tasks      | elapsed:   12.0s
[Parallel(n_jobs=36)]: Done 6832 tasks      | elapsed:   15.0s
[Parallel(n_jobs=36)]: Done 11232 tasks      | elapsed:   17.9s
[Parallel(n_jobs=36)]: Done 16432 tasks      | elapsed:   21.4s
[Parallel(n_jobs=36)]: Done 22432 tasks      | elapsed:   25.7s
[Parallel(n_jobs=36)]: Done 29232 tasks      | elapsed:   30.1s
[Parallel(n_jobs=36)]: Done 36832 tasks      | elapsed:   35.3s
[Parallel(n_jobs=36)]: Done 45232 tasks      | elapsed:   40.8s
[Parallel(n_jobs=36)]: Done 54432 tasks      | elapsed:   46.9s
[Parallel(n_jobs=36)]: Done 64432 tasks      | elapsed:   54.1s
[Parallel(n_jobs=36)]: Done 65353 out of 65353 | elapsed:   54.8s finished


This infant will survive.  This infant has gestational age 24 weeks. Birth weight is 360 grams. This infant is Female Black non-Hispanic. The Apgar5 scores 6. Mother is 21 years old.    Moderate variability.  Very low amount of consecutive increases. 

Available text columns:
['cl_event', 'ts_description', 'demo_ga', 'demo_weight', 'demo_gender', 'demo_race', 'demo_ethnicity', 'demo_apgar', 'demo_mother', 'cl_die7d']

Sample of patients with positive labels:
TestID
817     8
1903    8
801     8
508     8
2518    8
dtype: int64


[Parallel(n_jobs=36)]: Using backend LokyBackend with 36 concurrent workers.
[Parallel(n_jobs=36)]: Done 184 tasks      | elapsed:    0.2s
[Parallel(n_jobs=36)]: Done 1800 tasks      | elapsed:    1.2s
[Parallel(n_jobs=36)]: Done 4600 tasks      | elapsed:    2.9s
[Parallel(n_jobs=36)]: Done 8200 tasks      | elapsed:    5.3s
[Parallel(n_jobs=36)]: Done 12600 tasks      | elapsed:    8.2s
[Parallel(n_jobs=36)]: Done 17800 tasks      | elapsed:   11.4s
[Parallel(n_jobs=36)]: Done 23800 tasks      | elapsed:   15.2s
[Parallel(n_jobs=36)]: Done 30600 tasks      | elapsed:   19.5s
[Parallel(n_jobs=36)]: Done 38200 tasks      | elapsed:   24.7s
[Parallel(n_jobs=36)]: Done 46600 tasks      | elapsed:   30.2s
[Parallel(n_jobs=36)]: Done 55800 tasks      | elapsed:   35.8s
[Parallel(n_jobs=36)]: Done 61570 out of 61570 | elapsed:   39.6s finished


This infant will survive.  This infant has gestational age 33 weeks. Birth weight is 2630 grams. This infant is Male non-Black non-Hispanic. The Apgar5 scores 9. Mother is 26 years old.    High variability.  Low amount of consecutive increases. 

Available text columns:
['cl_event', 'ts_description', 'demo_ga', 'demo_weight', 'demo_gender', 'demo_race', 'demo_ethnicity', 'demo_apgar', 'demo_mother', 'cl_die7d']
After downsampling:
cl_event
This infant will die in 7 days.     384
This infant will survive.            10
Name: count, dtype: int64
After downsampling:
cl_event
This infant will die in 7 days.     241
This infant will survive.            10
Name: count, dtype: int64


  from tqdm.autonotebook import tqdm, trange


cl_event
This infant will die in 7 days.     384
This infant will survive.            10
Name: count, dtype: int64
cl_event
This infant will die in 7 days.     241
This infant will survive.            10
Name: count, dtype: int64


## Customize VITAL Model

In [5]:
%load_ext autoreload
%autoreload 1
%aimport vital
%autoreload 1
%aimport data

In [9]:
# customize model
if overwrite:    
    # check if ts_f_dim is already in the memory
    if 'ts_f_dim' not in locals():
        # get the dimension out
        if config_dict['3d']:
            ts_f_dim, tx_f_dim_ls, labels_dim = get_features3d(df_train.iloc[:1,:], 
                                                                config_dict['text_encoder_name'], 
                                                                config_dict['ts_normalize_mean'],
                                                                config_dict['ts_normalize_std'],
                                                                text_col_ls = config_dict['text_col_ls'])
        else:
            ts_f_dim, tx_f_dim, labels_dim = get_features(df_train.iloc[:1,:], 
                                                            config_dict['text_encoder_name'], 
                                                            config_dict['ts_normalize_mean'],
                                                            config_dict['ts_normalize_std'])
    
    ts_encoder = None
    ts_decoder = None
    # #--- custom ts encoder in encoder.py ---
    # e = MLPEncoder(
    #     ts_dim=ts_f_dim.shape[1], 
    #     output_dim=config_dict['embedded_dim']
    # )
    # ts_encoder = TSVAEEncoderWrapper(e)
    # # --- custom ts decoder in decoder.py ---
    # d = MLPDecoder(
    #     ts_dim=ts_f_dim.shape[1], 
    #     output_dim=config_dict['embedded_dim']
    # )
    # ts_decoder = TSVAEDecoderWrapper(d)

    if config_dict['3d']:
        model = VITAL3D(
                    ts_dim=ts_f_dim.shape[1],
                    text_dim=tx_f_dim_ls[0].shape[1],
                    n_text=len(tx_f_dim_ls),
                    output_dim=config_dict['embedded_dim'],
                    ts_encoder=ts_encoder,
                    ts_decoder=ts_decoder
                )
    else:
        model = VITAL(
                    ts_dim=ts_f_dim.shape[1],
                    text_dim=tx_f_dim.shape[1],
                    output_dim=config_dict['embedded_dim'],
                    ts_encoder=ts_encoder,
                    ts_decoder=ts_decoder
                )
    update_config(model_init = model)
    config_dict = get_config_dict()
    
    # ------------------------- ready training -------------------------
    optimizer = torch.optim.AdamW(
        model.parameters(),
        lr=config_dict['init_lr'],
        weight_decay=1e-4
    )

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, 
        mode='min',
        factor=0.9,         
        patience=config_dict['patience'],       
        verbose=True,
        min_lr=1e-10,        
        threshold=1e-4,      
        cooldown=20          
    )

    kl_annealer = KLAnnealer(start=1.0, 
                             end=1.0, 
                             epochs=10000) # for the first 1000 epochs, favor reconstruction more


    train_eval_metrics_list = []
    test_eval_metrics_list = []
    train_losses = []
    test_losses = []

    # ------------------------- ready output directory -------------------------
    import shutil
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir)
    # torch.save(config_dict, config_path)
    # overwrite = False # reset overwrite to False
   

Layer (type:depth-idx)                                                 Param #
VITAL3D                                                                1
├─TSVAEEncoder: 1-1                                                    --
│    └─Sequential: 2-1                                                 --
│    │    └─Linear: 3-1                                                77,056
│    │    └─LeakyReLU: 3-2                                             --
│    │    └─Linear: 3-3                                                32,896
│    │    └─LeakyReLU: 3-4                                             --
│    └─Linear: 2-2                                                     4,128
│    └─Linear: 2-3                                                     4,128
├─TextEncoderWithAttention: 1-2                                        32
│    └─ModuleList: 2-4                                                 --
│    │    └─Sequential: 3-5                                            1,595,552
│    │    └─S

## Test CLIP

In [None]:
train_losses_tmp, test_losses_tmp = train_vital(model, 
                                                train_dataloader,
                                                test_dataloader, 
                                                optimizer, 
                                                scheduler,
                                                kl_annealer,
                                                num_epochs=1000, 
                                                train_type='clip')

beta: 1.0
Epoch [1/1000]
	Training Loss: 27.625423
	Testing Loss: 22.341518
	Learning Rate: 0.000100000
beta: 1.0
Epoch [2/1000]
	Training Loss: 23.361652
	Testing Loss: 21.267193
	Learning Rate: 0.000100000
beta: 1.0
