# Table of Contents
1. Create chartevents_trust for trust.ipynb
2. Create noteevents_trust for trust.ipynb

In [None]:
import gc
import numpy as np
import polars as pl

In [None]:
DATA_LOCATION = 'Mimic3_Data'

In [None]:
chartevents = pl.scan_csv(f'{DATA_LOCATION}/CHARTEVENTS.csv', schema_overrides={'VALUE': pl.String()}, infer_schema_length=20000, ignore_errors=True)
chartevents = chartevents.select(['HADM_ID', 'ITEMID', 'VALUE']).unique()
d_items = pl.scan_csv(f'{DATA_LOCATION}/D_ITEMS.csv', infer_schema_length=20000, ignore_errors=True)
d_items = d_items.select(['ITEMID', 'LABEL']).unique()


In [None]:
# Read interpersonal interaction variables from chartevents

relevant_labels = '''
    Family Communication
    Follows Commands
    Education Barrier
    Education Learner
    Education Method
    Education Readiness
    Education Topic #1
    Education Topic #2
    Pain
    Pain Level
    Pain Level (Rest)
    Pain Assess Method
    Restraint
    Restraint Type
    Restraint (Non-violent)
    Restraint Ordered (Non-violent)
    Restraint Location
    Reason For Restraint
    Spiritual Support
    Support Systems
    State
    Behavior
    Behavioral State
    Reason For Restraint
    Stress
    Safety
    Safety Measures_U_1
    Family
    Patient/Family Informed
    Pt./Family Informed
    Health Care Proxy
    BATH                
    bath                
    Bath                
    Bed Bath            
    bed bath            
    bed bath            
    Bedbath             
    CHG Bath            
    Skin Care           
    Judgement           
    Family Meeting held 
    Emotional / physical / sexual harm by partner or close relation
    Verbal Response
    Side Rails
    Orientation
    RSBI Deferred
    Richmond-RAS Scale
    Riker-SAS Scale
    Status and Comfort
    Teaching directed toward
    Consults
    Social work consult
    Sitter
    security
    safety
    headache
    hairwashed
    observer
'''

labels_only = []
for rl in relevant_labels.split('\n'):
    rl = rl.strip()
    if len(rl):
        labels_only.append(rl)

In [None]:
# creates chartevents_trust for trust.ipynb
d_items_trust = d_items.filter(pl.col('LABEL').str.contains_any(labels_only, ascii_case_insensitive=True))
chartevents_trust = chartevents.join(d_items_trust, left_on='ITEMID', right_on='ITEMID', how='inner')
chartevents_trust = chartevents_trust.select(['HADM_ID', 'LABEL', 'VALUE'])
chartevents_trust = chartevents_trust.collect()
display(chartevents_trust.head())
chartevents_trust.write_parquet(f'{DATA_LOCATION}/chartevents_trust.parquet')
del chartevents_trust
gc.collect()

In [None]:
# creates noteevents_trust for trust.ipynb
noteevents_trust = pl.scan_csv(f'{DATA_LOCATION}/NOTEEVENTS.csv', schema_overrides={'TEXT': pl.String()}, infer_schema_length=20000, ignore_errors=True)
noteevents_trust = noteevents_trust.select(['HADM_ID', 'CATEGORY', 'TEXT', 'CHARTDATE', 'CHARTTIME', 'ISERROR'])
noteevents_trust = noteevents_trust.filter(pl.col('ISERROR').is_null())
noteevents_trust = noteevents_trust.collect()
display(noteevents_trust.head())
noteevents_trust.write_parquet(f'{DATA_LOCATION}/noteevents_trust.parquet')
del noteevents_trust
gc.collect()