# Example usage

To use `mvtsbuilder` in a project:

In [1]:
import pandas as pd
import json
import warnings
warnings.filterwarnings('ignore')

In [2]:
import mvtsbuilder

print(mvtsbuilder.__version__)

2022-10-07 09:20:16.034035: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


0.1.0


# Initiate mvtsbuilder project

In [3]:
# set working directory to your project folder
work_dir = '/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project'
# initiate a mvtsbuilder project instance, and save as myprj object
prj = mvtsbuilder.Project(work_dir)



Project begins at: 2022-10-07 09:20:19.852567


Working directory: /Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project


Meta_data directory: /Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data


--- Project variable_dict.json not exist. ---
You can put a previous 'variable_dict.json' file in path '/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data'.
Or, You can use function .new_demo_variable_dict() to create one. Please modify the newly created file '/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data/demo_variable_dict_TIMESTAMP.json' and save it as '/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data/variable_dict.json';


--- Project csv_source_dict.json not exist. ---
You can put a previous 'csv_source_dict.json' file in path '/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data'.
Or, You can use function .new_demo_csv_sou

In [4]:
# print project info 
print(prj)

{
  "Project Info": {
    "datetime": "2022-10-07 09:20:19.852567",
    "working_dir": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project",
    "meta_data_dir": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data",
    "dictionary": {
      "variable_dict": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data/variable_dict.json not found",
      "csv_source_dict": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data/csv_source_dict.json not found"
    },
    "ml_var": {
      "inputs": "None",
      "outputs": "None",
      "others": "None"
    }
  },
  "Episode Definition": null,
  "MVTS": null,
  "MVTS for ML": null
}
Project Status


# Build dictionaries in meta_data folder

## initiate demo variable dictionary

In [5]:
path = prj.new_demo_variable_dict()

A new demo of variable dictionary is ready for you, using path: 

/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data/demo_variable_dict_20221007-092019.json


In [6]:
# read newly created demo json file
with open(path, 'r') as f:
    demo_variable_dict = json.load(f)
# print it as dictionary
print(json.dumps(demo_variable_dict, indent=2))

{
  "__uid": {
    "src_names": [
      "id",
      "subjectnbr",
      "ID"
    ],
    "label": "unique study subject id",
    "unique_per_sbj": true
  },
  "__time": {
    "src_names": [
      "hoursincebirth"
    ],
    "label": "time since birth",
    "unique_per_sbj": false,
    "unit": "hour"
  },
  "__anchor": {
    "src_names": [
      "y"
    ],
    "label": "anchor of an episode",
    "unique_per_sbj": false,
    "factor": {
      "levels": {
        "__1": [
          "1",
          "1.0"
        ],
        "__2": [
          "2",
          "2.0"
        ],
        "__3": [
          "3",
          "3.0"
        ]
      }
    }
  },
  "log_BI": {
    "output": true,
    "src_names": [
      "log_burden_index"
    ],
    "label": "log of episode burden index",
    "unique_per_sbj": false,
    "numeric": {
      "scaler": "none",
      "unit": "log(percentage of daily episode duration +0.0001)",
      "cutoff": {
        "quantile_min": 0.0001,
        "quantile_max": 0.9999,


## Editing variable dictionary

Now you can open the json file in any editor to modify the content in it. Instructions can be found here. After building the variable dictionary for current project, you can save it as "variable_dict.json" back to the same meta_data directory. <br>
Let's take a look at the final variable dictionary json file for the example project, by calling load_variable_dict() function.

In [7]:
prj.load_variable_dict()
# # To verify, let's load "variable_dict.json" file from meta_data folder directly to take a look. 
# # open "variable_dict.json" file directly
# with open(str(myprj.meta_dir)+'/variable_dict.json', 'r') as f:
#     var_dict = json.load(f)
# # print it as dictionary
# print(json.dumps(var_dict, indent=2))

Project variable dictionary loaded;
{
  "__uid": {
    "src_names": [
      "id",
      "ID",
      "subject_id",
      "subject_id",
      "oldid"
    ],
    "label": "subject id",
    "unique_per_sbj": true
  },
  "__time": {
    "src_names": [
      "tsa",
      "timeMinutes"
    ],
    "label": "Time since admission",
    "unit": "minute"
  },
  "__anchor": {
    "src_names": [
      "True_positive",
      "True positive",
      "label"
    ],
    "label": "episode anchor",
    "unique_per_sbj": false,
    "shuffle": [
      "__nbc"
    ],
    "factor": {
      "levels": {
        "__nbc": [
          "nan"
        ],
        "__ctm": [
          "contaminant"
        ],
        "__neg": [
          "0",
          "0.0",
          "negative"
        ],
        "__pos": [
          "1",
          "1.0",
          "true_positive"
        ]
      },
      "impute_per_sbj": {
        "nan_level": "__nbc"
      }
    }
  },
  "y": {
    "output": true,
    "src_names": [
      "True_pos

In [8]:
# print project info
print(prj)

{
  "Project Info": {
    "datetime": "2022-10-07 09:20:19.852567",
    "working_dir": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project",
    "meta_data_dir": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data",
    "dictionary": {
      "variable_dict": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data/variable_dict.json",
      "csv_source_dict": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data/csv_source_dict.json not found"
    },
    "ml_var": {
      "inputs": "['age', 'albumin', 'alp', 'alt', 'ast', 'bicarbonate', 'bun', 'calcium', 'chloride', 'co2', 'creatinine', 'dbp', 'fio2_pct', 'glucose', 'hematocrit', 'hemoglobin', 'heart_rate', 'lactic_acid', 'magnesium', 'o2_flow', 'pco2', 'peep', 'ph_arterial', 'phosphorus', 'po2', 'potassium', 'protime_inr', 'ptt', 'platelet_count', 'resp_rate', 'sbp', 'sodium', 'spo2', 'temp', 'total_bilirubin', 'total_protein', 'txp___no

# Define Episode

In [9]:
prj.def_episode(
    input_time_len=4*24*60,
    output_time_len=1*24*60, 
    time_resolution=60, 
    time_lag=0, 
    anchor_gap=7*24*60)

Project variable dictionary loaded;
Success! Project has updated attributes --- episode. 


In [10]:
print(prj)

{
  "Project Info": {
    "datetime": "2022-10-07 09:20:19.852567",
    "working_dir": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project",
    "meta_data_dir": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data",
    "dictionary": {
      "variable_dict": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data/variable_dict.json",
      "csv_source_dict": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data/csv_source_dict.json not found"
    },
    "ml_var": {
      "inputs": "['age', 'albumin', 'alp', 'alt', 'ast', 'bicarbonate', 'bun', 'calcium', 'chloride', 'co2', 'creatinine', 'dbp', 'fio2_pct', 'glucose', 'hematocrit', 'hemoglobin', 'heart_rate', 'lactic_acid', 'magnesium', 'o2_flow', 'pco2', 'peep', 'ph_arterial', 'phosphorus', 'po2', 'potassium', 'protime_inr', 'ptt', 'platelet_count', 'resp_rate', 'sbp', 'sodium', 'spo2', 'temp', 'total_bilirubin', 'total_protein', 'txp___no

# Build MVTS DataFrame

In [11]:
# source from external dataframe object
df = pd.read_csv("/Users/jiaxingqiu/Documents/CAMA_projects/BSI/2016_2021/data_ml/bsi_new_deidentified_bc.csv", nrows=200000)

In [12]:
prj.build_mvts(source=df,nsbj=20,replace=True)

--- MVTSbuilder Project is engineering customized table format data ---
20 out of 326 is sampled!
-- __time fixed
-- __anchor fixed
-- y fixed
--- fix upper boundary for age by 84.33
--- fix lower boundary for age by 29.14
-- age fixed
--- fix upper boundary for albumin by 4.0
--- fix lower boundary for albumin by 1.8
-- albumin fixed
--- fix upper boundary for alp by 197.0
--- fix lower boundary for alp by 36.0
-- alp fixed
--- fix upper boundary for alt by 610.0
--- fix lower boundary for alt by 7.0
-- alt fixed
--- fix upper boundary for ast by 1267.0
--- fix lower boundary for ast by 11.0
-- ast fixed
--- fix upper boundary for bicarbonate by 39.6
--- fix lower boundary for bicarbonate by 5.8
-- bicarbonate fixed
--- fix upper boundary for bun by 150.0
--- fix lower boundary for bun by 5.0
-- bun fixed
--- fix upper boundary for calcium by 13.8945500000031
--- fix lower boundary for calcium by 5.2
-- calcium fixed
--- fix upper boundary for chloride by 124.0
--- fix lower boundary 

In [13]:
print(prj)

{
  "Project Info": {
    "datetime": "2022-10-07 09:20:19.852567",
    "working_dir": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project",
    "meta_data_dir": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data",
    "dictionary": {
      "variable_dict": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data/variable_dict.json",
      "csv_source_dict": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data/csv_source_dict.json not found"
    },
    "ml_var": {
      "inputs": "['age', 'albumin', 'alp', 'alt', 'ast', 'bicarbonate', 'bun', 'calcium', 'chloride', 'co2', 'creatinine', 'dbp', 'fio2_pct', 'glucose', 'hematocrit', 'hemoglobin', 'heart_rate', 'lactic_acid', 'magnesium', 'o2_flow', 'pco2', 'peep', 'ph_arterial', 'phosphorus', 'po2', 'potassium', 'protime_inr', 'ptt', 'platelet_count', 'resp_rate', 'sbp', 'sodium', 'spo2', 'temp', 'total_bilirubin', 'total_protein', 'txp___no

In [14]:
help(mvtsbuilder.Project.build_mvts)

Help on function build_mvts in module mvtsbuilder.classes.project:

build_mvts(self, source=None, nsbj=None, frac=0.3, replace=True, stratify_by=None, skip_uid=None, keep_uid=None, return_episode=True, topn_eps=None, dummy_na=False, sep='---', viz=False, viz_ts=False)
    Build episode-wise Multi-Variable Time Series DataFrame.
    
    
    Parameters
    ----------
    source: string or pandas.core.frame.DataFrame
        the source of data, supporting formats include a pandas dataframe, a string of the path to the csv_pool directory.
    nsbj: int
        number of subjects to sample from the source, it can be None.
    frac: float
        sampling rate or fraction of subjects from the source, not work if nsbj is given.
    replace: bool
        sampling subjects from source data with or without replacement, True means with replacement.
    stratify_by: list
        list of final variable names by which, sampling should be stratefied by.
    skip_uid: list
        list of subject ID

# Prepare for ML

In [15]:
prj.split_mvts(valid_frac=0.2, test_frac=0.1, byepisode=False, batch_size=64, impute_input='constant', impute_output='none', fill_value=-333)

Success! Project has updated attributes --- train_df, valid_df and test_df. 
Using 'mask' for predictor imputation (constant value -333) because too few subjects are sampled.
Using 'mode' for response imputation because too few subjects are sampled.
Success! Project has updated attributes --- train_df_imputed, valid_df_imputed and test_df_imputed. 


2022-10-07 09:21:06.616369: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Success! Project has updated attributes --- train_tfds, valid_tfds and test_tfds. 


In [16]:
print(prj)

{
  "Project Info": {
    "datetime": "2022-10-07 09:20:19.852567",
    "working_dir": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project",
    "meta_data_dir": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data",
    "dictionary": {
      "variable_dict": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data/variable_dict.json",
      "csv_source_dict": "/Users/jiaxingqiu/Documents/CAMA_projects/mvtsbuilder_example_project/meta_data/csv_source_dict.json not found"
    },
    "ml_var": {
      "inputs": "['age', 'albumin', 'alp', 'alt', 'ast', 'bicarbonate', 'bun', 'calcium', 'chloride', 'co2', 'creatinine', 'dbp', 'fio2_pct', 'glucose', 'hematocrit', 'hemoglobin', 'heart_rate', 'lactic_acid', 'magnesium', 'o2_flow', 'pco2', 'peep', 'ph_arterial', 'phosphorus', 'po2', 'potassium', 'protime_inr', 'ptt', 'platelet_count', 'resp_rate', 'sbp', 'sodium', 'spo2', 'temp', 'total_bilirubin', 'total_protein', 'txp___no

# That's it!