# Atlas-Lightersaber Class Demo

In [None]:

from cohort_connector import *
from feature_extractor import *

#### Example Experiment Settings

In [2]:
#Analysis name
analysis_name = 'IHM'

In [3]:
#DB connection details
dbms = ""
path_to_driver = ''
user = ''
pw = ''
hostname = ''
dbname = ''
port = ''

In [4]:
# Database containing the OMOP CDM data
cdm_database_schema = 'omop2'

# Database where the cohorts will be generated
target_cohort_database_schema = 'results'
outcome_cohort_database_schema = 'results'

#Table where the cohorts will be retrieved
target_cohort_table = 'cohort'
outcome_cohort_table = 'cohort'

#Cohort Ids
target_cohort_id = 10001
outcome_cohort_id = 10002

#Sample size
sample_size = 100

In [5]:
#Tidy Plp settings
min_fraction = 0.1
normalize = False
remove_redundancy = True

In [6]:
# Covariates
included_covariate_concept_ids = [3012888, 3020716, 3016335, 3008223, 3032652, 
                                  3009094, 3034962, 3027018, 3036277, 3027598, 
                                  3016502, 3024171, 3004249, 3020891, 3025315]


categorical_covariate_concept_value_mappings = { 3016335: {1:'No Response', 2: 'To pain', 3:'To speech', 4:'Spontaneously'},
                                                 3009094: {1:'No response',2:'Incomprensible sounds',3:'Inappropriate words',4:'Confused', 5:'Oriented'},
                                                 3008223 : {1:'No response',2:'Abnormal extension',3:'Abnormal flexion',4:'Flex-withdraws', 5:'Localizes pain',6:'Obeys commands'},
                                                 3032652: {3:'L3', 4:'L4', 5:'L5',6:'L6',7:'L7', 8:'L8',9:'L9',10:'L10',11:'L11', 12:'L12', 13:'L13', 14:'L14', 15:'L15'}
                                               }


categorical_covariate_concept_ids = [3008223, 3009094, 3016335, 3032652]


numerical_covariate_concept_ids = [3004249, 3012888, 3016502, 3020716, 
                                   3020891, 3024171, 3025315, 3027018, 
                                   3027598, 3034962, 3036277]

normal_covariate_concept_values = {
                                   3036277: 170.0,
                                   3020891: 36.6,
                                   3025315: 81.0,
                                   3012888: 59.0,
                                   3032652: 'L15',
                                   3016335: 'Spontaneously',
                                   3008223: 'Obeys commands',
                                   3009094: 'Oriented',
                                   3034962: 128.0,
                                   3027018: 86,
                                   3020716: 0.21,
                                   3027598: 77.0,
                                   3016502: 98.0,
                                   3024171: 19,
                                   3004249: 118.0
                                }

In [7]:
#Feature Extraction settings
sample_size = None
val_size = .25 
path =  ''

In [8]:
cohort_connection_settings = dict()

cohort_connection_settings['connection_details'] = {
                                                    'dbms': dbms,
                                                    'path_to_driver': path_to_driver,
                                                    'hostname': hostname,
                                                    'port': port,
                                                    'dbname': dbname,
                                                    'user': user,
                                                    'password': pw

                                                 }

cohort_connection_settings['cohort_details'] =   {
                                                'cdm_database_schema': cdm_database_schema, 
                                                'target_cohort_database_schema': target_cohort_database_schema,
                                                'target_cohort_table': target_cohort_table,
                                                'target_cohort_id': target_cohort_id,
                                                'outcome_cohort_database_schema': outcome_cohort_database_schema,
                                                'outcome_cohort_table': outcome_cohort_table,
                                                'outcome_cohort_id': outcome_cohort_id,
                                                'oracle_temp_schema': None
                                                }

with open('cohort_connection_settings.json', 'w') as f:
    json.dump(cohort_connection_settings, f, indent=4)

In [9]:
feature_extraction_settings = dict()

feature_extraction_settings['analysis_name'] = analysis_name


feature_extraction_settings['covariate_settings'] ={
                                    'use_demographics_gender': True,
                                    'use_demographics_age_group': True,
                                    'use_demographics_race': True,
                                    'use_demographics_ethnicity': True,
                                    'use_condition_occurrence_any_time_prior': True,
                                    'use_dcsi': True,
                                    'use_chads2': True,
                                    'use_chads2_vasc': True,
                                    'included_covariate_concept_ids': included_covariate_concept_ids
                                }

feature_extraction_settings['tidy_covariate_settings'] = {
                                     'min_fraction': min_fraction,
                                     'normalize': normalize,
                                     'remove_redundancy': remove_redundancy
                                      }


feature_extraction_settings['model_training_settings'] ={ 
                                        'sample_size': sample_size, 
                                        'val_size': val_size, 
                                        'random_state': 10,
                                        'path': path
                                     }


feature_extraction_settings['expt_config_settings'] ={
                                    'categorical_covariate_concept_ids': categorical_covariate_concept_ids,
                                    'numerical_covariate_concept_ids': numerical_covariate_concept_ids,
                                    'categorical_covariate_concept_value_mappings': categorical_covariate_concept_value_mappings,
                                    'normal_covariate_concept_values': normal_covariate_concept_values
                                  }

In [10]:
with open('feature_extraction_settings.json', 'w') as f:
    json.dump(feature_extraction_settings, f, indent=4)

In [11]:
#Read settings from json files
def read_settings(path):
    with open(path) as f:
        return json.load(f)
    
cohort_connection_settings = read_settings('cohort_connection_settings.json')
feature_extraction_settings = read_settings('feature_extraction_settings.json')

## Cohort Connector

In [None]:
cohort_connector = CohortConnector(file_path = 'cohort_connection_settings.json')

In [None]:
cohort_connector = CohortConnector( dbms = dbms,
                                    path_to_driver = path_to_driver,
                                    hostname = hostname,
                                    port = port,
                                    dbname = dbname,
                                    user = user,
                                    password = pw,
                                    cdm_database_schema = cdm_database_schema, 
                                    target_cohort_database_schema = target_cohort_database_schema,
                                    target_cohort_table = target_cohort_table,
                                    target_cohort_id = target_cohort_id,
                                    outcome_cohort_database_schema = outcome_cohort_database_schema,
                                    outcome_cohort_table = outcome_cohort_table,
                                    outcome_cohort_id = outcome_cohort_id)

In [None]:
cohort_connector.db_connection_details

In [None]:
cohort_connector.cohort_details

## Feature Extraction

In [None]:
feature_extractor = FeatureExtractor(cohort_connector =  cohort_connector,
                                     file_path = 'feature_extraction_settings.json')


In [None]:
feature_extractor = FeatureExtractor(cohort_connector =  cohort_connector,
                                    analysis_name = analysis_name,

                                    use_demographics_gender = True,
                                    use_demographics_age_group = True,
                                    use_demographics_race = True,
                                    use_demographics_ethnicity = True,
                                    use_condition_occurrence_any_time_prior = True,
                                    use_dcsi = True,
                                    use_chads2 = True,
                                    use_chads2_vasc = True,

                                    min_fraction = min_fraction,
                                    normalize = min_fraction,
                                    remove_redundancy = min_fraction,

                                    sample_size = sample_size, 
                                    val_size = val_size, 
                                    path = path,

                                    included_covariate_concept_ids = included_covariate_concept_ids,
                                    categorical_covariate_concept_ids = categorical_covariate_concept_ids,
                                    numerical_covariate_concept_ids = numerical_covariate_concept_ids,
                                    categorical_covariate_concept_value_mappings = categorical_covariate_concept_value_mappings,
                                    normal_covariate_concept_values = normal_covariate_concept_values,
                                   )

In [None]:
import time
start_time = time.time()
feature_extractor.extract_features( 
                        setup = 'train'
                      )
duration = time.time() - start_time
print(duration)

In [None]:
X_train = pd.read_csv('data/IHM_T{}_O{}_FEAT_TRAIN.csv'.format(target_cohort_id,outcome_cohort_id))
X_train

In [None]:
X_val = pd.read_csv('data/IHM_T{}_O{}_FEAT_VAL.csv'.format(target_cohort_id,outcome_cohort_id))
X_val

In [None]:
y_train = pd.read_csv('data/IHM_T{}_O{}_COHORT_OUT_TRAIN.csv'.format(target_cohort_id,outcome_cohort_id))
y_train

In [None]:
y_val =  pd.read_csv('data/IHM_T{}_O{}_COHORT_OUT_VAL.csv'.format(target_cohort_id,outcome_cohort_id))
y_val

In [None]:
#yaml config
with open("IHM_T{}_O{}_expt_config.yaml".format(target_cohort_id,outcome_cohort_id), 'r') as stream:
    yaml_config = yaml.safe_load(stream)
print(yaml.dump(yaml_config))

In [None]:
#Single patient
X_pred = feature_extractor.extract_features(setup = 'prediction', 
                                subject_id = )
X_pred

In [None]:
#Multiple patients
X_pred1 = feature_extractor.extract_features(setup = 'prediction', 
                                subject_id = [])
X_pred1

In [None]:
start_time

In [None]:
start_time2 = time.time()
feature_extractor.extract_features(setup = 'prediction', 
                                subject_id = )
duration2 = time.time() - start_time2
duration2