# Synthea
Ambulatory Patients - First Encounter, 10k Patients

## Libraries and Constants

In [1]:
import subprocess

from pathlib import Path
import shutil
import pandas as pd

# Absolute paths to files and directories
BASE_DIRECTORY = (Path.cwd()).parent
SYNTHEA_PATH = Path(BASE_DIRECTORY / 'synthea-with-dependencies.jar')
RAW_DATA_PATH = Path(BASE_DIRECTORY / 'data/raw/analysis')
CSV_RAW_DATA_PATH = Path(RAW_DATA_PATH / 'csv')
PROCESSED_DATA_PATH = Path(BASE_DIRECTORY / 'data/processed/analysis')

# Shell command to run Synthea
RUN_SYNTHEA = f'java -jar {SYNTHEA_PATH}'

# CSV files that will be excluded from the generated data
EXCLUDED_CSV_FILES = [
    'careplans.csv',
    'claims.csv',
    'claims_transactions.csv',
    'organizations.csv',
    'patient_expenses.csv',
    'payer_transitions.csv',
    'payers.csv',
    'providers.csv',
    'supplies.csv'
]

# Number of patients to be generated
NUM_PATIENTS = 10_000

# If new Synthea data should be generated
GENERATE_DATA = False

# Data Generation

In [2]:
if(GENERATE_DATA):
    # Verify if the raw data directory exists
    if RAW_DATA_PATH.exists():
        # Delete the directory
        shutil.rmtree(RAW_DATA_PATH)

    # String containing the CSV files that will not be generated
    excluded_files = ','.join(EXCLUDED_CSV_FILES)

    # Define the shell command to generate the Synthea data
    command = RUN_SYNTHEA \
                + f' -p {NUM_PATIENTS}' \
                + f' --exporter.baseDirectory={RAW_DATA_PATH}' \
                + ' --exporter.csv.export=true' \
                + f' --exporter.csv.excluded_files={excluded_files}' \
                + ' --exporter.metadata.export=false' \
                + ' --exporter.fhir.export=false' \
                + ' --exporter.fhir.transaction_bundle=false' \
                + ' --exporter.hospital.fhir.export=false' \
                + ' --exporter.practitioner.fhir.export=false'

    # Run Synthea
    subprocess.run(command, shell=True)

## Data Processing

### Opening the Raw Data Files

In [3]:
# Get the raw data file names
raw_files = [file.name[:-4] for file in CSV_RAW_DATA_PATH.iterdir()]

# Load the DataFrames with raw data
dfs = dict()
for file in raw_files:
    dfs[file] = pd.read_csv(f'{CSV_RAW_DATA_PATH}/{file}.csv')

### Finding the Ambulatory Patients

In [4]:
# Drop unnecessary encounter columns
dfs['encounters'] = dfs['encounters'] \
    .drop(columns=[
        'ORGANIZATION', 'PROVIDER', 'PAYER', 'BASE_ENCOUNTER_COST',
        'TOTAL_CLAIM_COST', 'PAYER_COVERAGE'
    ])

# Filter only emergency encounters
dfs['encounters'] = dfs['encounters'] \
    .query('ENCOUNTERCLASS == "ambulatory"') \
    .query('REASONDESCRIPTION.notnull()') \
    .reset_index(drop=True)

# Get the identifiers (encounters and patients)
encounters_ids = dfs['encounters']['Id'].tolist()
patients_ids = dfs['encounters']['PATIENT'].tolist()

In [5]:
dfs['encounters']

Unnamed: 0,Id,START,STOP,PATIENT,ENCOUNTERCLASS,CODE,DESCRIPTION,REASONCODE,REASONDESCRIPTION
0,ca5babe8-6014-eaef-7189-1f09e5280886,2019-06-15T07:47:26Z,2019-06-15T08:02:26Z,17fdf07e-2189-49e3-3ff4-2fdcb99c062c,ambulatory,185347001,Encounter for problem,24079001.0,Atopic dermatitis
1,32e6188c-8c10-d290-9774-cee197d5ad81,2019-06-18T20:47:26Z,2019-06-18T21:02:26Z,17fdf07e-2189-49e3-3ff4-2fdcb99c062c,ambulatory,185347001,Encounter for problem,24079001.0,Atopic dermatitis
2,9ee6c40f-fa70-cdf3-ed78-896c75ccb5f4,2019-07-01T13:47:26Z,2019-07-01T14:16:43Z,17fdf07e-2189-49e3-3ff4-2fdcb99c062c,ambulatory,185347001,Encounter for problem,419199007.0,Allergy to substance (finding)
3,05fa1752-d63b-92fe-bf6a-7aa1cd15690b,2015-02-28T07:57:10Z,2015-02-28T08:12:10Z,c1ceda31-3ade-9760-437f-8eb454821bfc,ambulatory,185349003,Encounter for check up,263102004.0,Fracture subluxation of wrist
4,8f4319ad-5a69-ecef-287a-d10504b5bd7d,2015-06-21T04:45:15Z,2015-06-21T05:00:15Z,bbd6deca-6ab7-9347-4a20-276331a8825f,ambulatory,185345009,Encounter for symptom,195662009.0,Acute viral pharyngitis (disorder)
...,...,...,...,...,...,...,...,...,...
419401,0ee31043-7e36-407d-f0c9-496f321ac59a,2014-04-03T03:23:31Z,2014-04-03T03:38:31Z,291d0eaf-3b39-ce15-db1d-a2c5e29b8bb3,ambulatory,390906007,Follow-up encounter (procedure),82423001.0,Chronic pain (finding)
419402,7d982820-cf8e-303c-f169-1e123ac45a09,2014-06-02T03:23:31Z,2014-06-02T03:38:31Z,291d0eaf-3b39-ce15-db1d-a2c5e29b8bb3,ambulatory,390906007,Follow-up encounter (procedure),82423001.0,Chronic pain (finding)
419403,6aae81a1-3087-e3fc-dcba-1dd71037cb61,2022-03-12T20:27:08Z,2022-03-12T20:42:08Z,291d0eaf-3b39-ce15-db1d-a2c5e29b8bb3,ambulatory,185347001,Encounter for problem,307731004.0,Injury of tendon of the rotator cuff of shoulder
419404,786fd7c0-a13c-c60b-d250-bcf964f9aca6,2022-06-09T20:27:08Z,2022-06-09T20:42:08Z,291d0eaf-3b39-ce15-db1d-a2c5e29b8bb3,ambulatory,185349003,Encounter for check up,307731004.0,Injury of tendon of the rotator cuff of shoulder


### Filtering the Data Associated with the Infarction Emergency Encounters

In [6]:
# Filter data from DataFrames with encounter foreign key
for file in raw_files:
    if (file != 'encounters') and (file != 'patients'):
        dfs[file] = dfs[file] \
            .query('ENCOUNTER in @encounters_ids') \
            .reset_index(drop=True)

# # Drop unnecessary columns from immunizations, medications and procedures DataFrames
dfs['immunizations'] = dfs['immunizations'].drop(columns=['BASE_COST'])
dfs['medications'] = dfs['medications'] \
    .drop(columns=['BASE_COST', 'PAYER_COVERAGE', 'TOTALCOST'])
dfs['procedures'] = dfs['procedures'].drop(columns=['BASE_COST'])

# Filter data and drop unnecessary columns from the patients DataFrame
dfs['patients'] = dfs['patients'] \
    .drop(columns=['HEALTHCARE_EXPENSES', 'HEALTHCARE_COVERAGE', 'INCOME']) \
    .query('Id in @patients_ids') \
    .reset_index(drop=True)

### Writing CSV Files with the Processed Data

In [7]:
# Verify if the processed data directory exists
if PROCESSED_DATA_PATH.exists():
    # Delete the directory
    shutil.rmtree(PROCESSED_DATA_PATH)

# Create the processed data directory
PROCESSED_DATA_PATH.mkdir(parents=True)

# Write CSV files with processed data
for file in raw_files:
    dfs[file].to_csv(f'{PROCESSED_DATA_PATH}/{file}.csv', index=False)

## Data Analysis

### Treating the Encounters DataFrame

In [8]:
# Open the encounters data file
df_encounters = pd.read_csv(f'{PROCESSED_DATA_PATH}/encounters.csv')

# Drop unnecessary encounter columns
df_encounters_description = df_encounters \
    .drop(columns=[
        'START', 'STOP', 'PATIENT', 'ENCOUNTERCLASS', 'CODE', 'REASONCODE'
    ]) \
    .rename(columns={'Id': 'ENCOUNTER'})

# Print some of the encounters data
df_encounters_description.head()

Unnamed: 0,ENCOUNTER,DESCRIPTION,REASONDESCRIPTION
0,ca5babe8-6014-eaef-7189-1f09e5280886,Encounter for problem,Atopic dermatitis
1,32e6188c-8c10-d290-9774-cee197d5ad81,Encounter for problem,Atopic dermatitis
2,9ee6c40f-fa70-cdf3-ed78-896c75ccb5f4,Encounter for problem,Allergy to substance (finding)
3,05fa1752-d63b-92fe-bf6a-7aa1cd15690b,Encounter for check up,Fracture subluxation of wrist
4,8f4319ad-5a69-ecef-287a-d10504b5bd7d,Encounter for symptom,Acute viral pharyngitis (disorder)


In [9]:
df_encounters \
    .groupby(by=['REASONDESCRIPTION'], as_index=False) \
    .agg(CASES=('REASONCODE', 'count')) \
    .sort_values(by=['CASES'], ascending=False) \
    .reset_index(drop=True)

Unnamed: 0,REASONDESCRIPTION,CASES
0,Chronic kidney disease stage 4 (disorder),168534
1,End-stage renal disease (disorder),81995
2,Normal pregnancy,30457
3,Allergy to substance (finding),19044
4,Dependent drug abuse (disorder),14319
...,...,...
107,Tricuspid valve stenosis (disorder),3
108,Fracture of the vertebral column with spinal c...,2
109,Excessive salivation (disorder),2
110,Preinfarction syndrome (disorder),2


### Verifying the Number of Conditions per Encounter

In [10]:
# Open the conditions data file
df_conditions = pd.read_csv(f'{PROCESSED_DATA_PATH}/conditions.csv')

# Print some of the conditions data
df_conditions.head()

Unnamed: 0,START,STOP,PATIENT,ENCOUNTER,CODE,DESCRIPTION
0,2019-06-15,,17fdf07e-2189-49e3-3ff4-2fdcb99c062c,ca5babe8-6014-eaef-7189-1f09e5280886,24079001,Atopic dermatitis
1,2015-06-21,2015-06-28,bbd6deca-6ab7-9347-4a20-276331a8825f,8f4319ad-5a69-ecef-287a-d10504b5bd7d,195662009,Acute viral pharyngitis (disorder)
2,2021-10-11,2021-11-06,e2335115-b5f4-8b93-81b1-cc51d3af1835,826ee4b0-8bec-a8c3-6b17-ed40cd7786bd,444814009,Viral sinusitis (disorder)
3,2021-12-07,2021-12-19,e2335115-b5f4-8b93-81b1-cc51d3af1835,e301defb-dcae-d3ee-5688-88b97c082acc,10509002,Acute bronchitis (disorder)
4,2019-10-28,2019-11-06,bbd6deca-6ab7-9347-4a20-276331a8825f,f50d4fd7-6d66-464a-829f-d152b16ff626,10509002,Acute bronchitis (disorder)


In [11]:
# Aggregate data using encounter codes
df_conditions_agg = df_conditions \
    .groupby(by=['ENCOUNTER'], as_index=False) \
    .agg(
        NUM_CONDITIONS=('CODE', pd.Series.nunique),
        CONDITIONS=('DESCRIPTION', 'unique')
    ) \
    .sort_values(by=['NUM_CONDITIONS'], ascending=False) \
    .reset_index(drop=True)

# Save a CSV file with the aggregation result
df_conditions_agg.to_csv(f'{PROCESSED_DATA_PATH}/agg_conditions.csv', index=False)

# Print some of the aggregation result
df_conditions_agg

Unnamed: 0,ENCOUNTER,NUM_CONDITIONS,CONDITIONS
0,c39ec22c-2a3f-b1b9-da87-69c3c17d2bd5,11,"[Headache (finding), Cough (finding), Sore thr..."
1,b9b5aaf9-ed48-207d-3be4-26bc61d43a57,11,"[Cerebral palsy (disorder), Pain (finding), Po..."
2,a3832f5d-fa19-8d56-1782-6a8681c24d5f,11,"[Headache (finding), Cough (finding), Dyspnea ..."
3,8a136b8c-d25d-0d7c-90b1-d55e413ef9c5,10,"[Cough (finding), Sore throat symptom (finding..."
4,17b19d1b-6b78-bd34-9542-bdf3853db573,10,"[Cerebral palsy (disorder), Dystonia (disorder..."
...,...,...,...
48621,58d5dedd-daa6-249b-6254-7bde357525a6,1,[Viral sinusitis (disorder)]
48622,58d7b0fa-07b4-acbf-9815-084d5ec0b3e6,1,[Idiopathic atrophic hypothyroidism]
48623,58d7e549-d4df-3c68-96d9-7f8277fed837,1,[Viral sinusitis (disorder)]
48624,58d7fbec-b353-489b-6c8c-e03b8ac5138a,1,[Viral sinusitis (disorder)]


### Verifying the Number of Observations per Encounter

In [12]:
# Open the observations data file
df_observations = pd.read_csv(f'{PROCESSED_DATA_PATH}/observations.csv')

# Print some of the observations data
df_observations.head()

Unnamed: 0,DATE,PATIENT,ENCOUNTER,CATEGORY,CODE,DESCRIPTION,VALUE,UNITS,TYPE
0,2019-06-18T20:47:26Z,17fdf07e-2189-49e3-3ff4-2fdcb99c062c,32e6188c-8c10-d290-9774-cee197d5ad81,exam,66519-0,Percentage area affected by eczema Head and Ne...,22.1,%,numeric
1,2019-06-18T20:47:26Z,17fdf07e-2189-49e3-3ff4-2fdcb99c062c,32e6188c-8c10-d290-9774-cee197d5ad81,exam,66529-9,Percentage area affected by eczema Trunk [PhenX],6.9,%,numeric
2,2019-06-18T20:47:26Z,17fdf07e-2189-49e3-3ff4-2fdcb99c062c,32e6188c-8c10-d290-9774-cee197d5ad81,exam,66524-0,Percentage area affected by eczema Upper extre...,6.3,%,numeric
3,2019-06-18T20:47:26Z,17fdf07e-2189-49e3-3ff4-2fdcb99c062c,32e6188c-8c10-d290-9774-cee197d5ad81,exam,66534-9,Percentage area affected by eczema Lower extre...,6.0,%,numeric
4,2015-06-21T04:45:15Z,bbd6deca-6ab7-9347-4a20-276331a8825f,8f4319ad-5a69-ecef-287a-d10504b5bd7d,vital-signs,8310-5,Body temperature,37.5,Cel,numeric


In [13]:
# Aggregate data using encounter codes
df_observations_agg = df_observations \
    .groupby(by=['ENCOUNTER'], as_index=False) \
    .agg(
        NUM_CATEGORIES=('CATEGORY', pd.Series.nunique),
        CATEGORIES=('CATEGORY', 'unique'),
        NUM_OBSERVATIONS=('CODE', pd.Series.nunique),
        OBSERVATIONS=('DESCRIPTION', 'unique')
    ) \
    .sort_values(by=['NUM_CATEGORIES', 'NUM_OBSERVATIONS'], ascending=False) \
    .reset_index(drop=True)

# Save a CSV file with the aggregation result
df_observations_agg.to_csv(f'{PROCESSED_DATA_PATH}/agg_observations.csv', index=False)

# Print some of the aggregation result
df_observations_agg

Unnamed: 0,ENCOUNTER,NUM_CATEGORIES,CATEGORIES,NUM_OBSERVATIONS,OBSERVATIONS
0,a6a73ec4-74ad-3e1c-3086-f6160ef5b57b,4,"[procedure, survey, exam, laboratory]",29,"[US Guidance for biopsy of Prostate, Within th..."
1,35939b53-1566-bc14-340c-0c1687f77829,4,"[procedure, survey, exam, laboratory]",28,"[US Guidance for biopsy of Prostate, Within th..."
2,23ce5d3b-6d8b-859b-41d3-24da934908ee,4,"[procedure, survey, exam, laboratory]",27,"[US Guidance for biopsy of Prostate, Within th..."
3,e17c8ef5-cd6c-ba89-a0f7-e42ec0666a0d,4,"[procedure, survey, exam, laboratory]",27,"[US Guidance for biopsy of Prostate, Within th..."
4,136c8057-653c-980e-0154-8fae54960b53,4,"[procedure, survey, exam, laboratory]",26,"[US Guidance for biopsy of Prostate, Within th..."
...,...,...,...,...,...
298558,ffecaabb-0622-01e2-e060-b1dd71a35e79,1,[vital-signs],1,[Body temperature]
298559,fff40a75-b8db-65f5-ed5e-15ef4dc6fb1e,1,[vital-signs],1,[Body temperature]
298560,fff77bfd-9637-28ae-462a-9e27d4d6b8d2,1,[vital-signs],1,[Body temperature]
298561,fff84851-2899-8e94-7b96-88f6c4e27afa,1,[vital-signs],1,[Body temperature]


### Verifying the Number of Conditions and Observations per Encounter

In [14]:
# Merge aggregation result for conditions and observations
df_merged_agg = df_conditions_agg \
    .merge(
        right=df_observations_agg,
        how='left',
        on='ENCOUNTER'
    ) \
    .sort_values(
        by=['NUM_CONDITIONS', 'NUM_CATEGORIES', 'NUM_OBSERVATIONS'],
        ascending=False
    ) \
    .fillna({
        'NUM_CATEGORIES': 0,
        'CATEGORIES': '',
        'NUM_OBSERVATIONS': 0,
        'OBSERVATIONS': ''
    }) \
    .astype({'NUM_CATEGORIES': int, 'NUM_OBSERVATIONS': int})

# Save a CSV file with the merge result
df_merged_agg.to_csv(f'{PROCESSED_DATA_PATH}/agg_conditions_observations.csv', index=False)

# Print some of the merge result
df_merged_agg

Unnamed: 0,ENCOUNTER,NUM_CONDITIONS,CONDITIONS,NUM_CATEGORIES,CATEGORIES,NUM_OBSERVATIONS,OBSERVATIONS
0,c39ec22c-2a3f-b1b9-da87-69c3c17d2bd5,11,"[Headache (finding), Cough (finding), Sore thr...",2,"[vital-signs, laboratory]",17,"[Body temperature, Respiratory rate, Heart rat..."
2,a3832f5d-fa19-8d56-1782-6a8681c24d5f,11,"[Headache (finding), Cough (finding), Dyspnea ...",2,"[vital-signs, laboratory]",17,"[Body temperature, Respiratory rate, Heart rat..."
1,b9b5aaf9-ed48-207d-3be4-26bc61d43a57,11,"[Cerebral palsy (disorder), Pain (finding), Po...",0,,0,
3,8a136b8c-d25d-0d7c-90b1-d55e413ef9c5,10,"[Cough (finding), Sore throat symptom (finding...",2,"[vital-signs, laboratory]",17,"[Body temperature, Respiratory rate, Heart rat..."
5,4ef07c79-a127-6985-2bb5-d6c1c31f2ad8,10,"[Cough (finding), Sputum finding (finding), Fa...",2,"[vital-signs, laboratory]",17,"[Body temperature, Respiratory rate, Heart rat..."
...,...,...,...,...,...,...,...
48621,58d5dedd-daa6-249b-6254-7bde357525a6,1,[Viral sinusitis (disorder)],0,,0,
48622,58d7b0fa-07b4-acbf-9815-084d5ec0b3e6,1,[Idiopathic atrophic hypothyroidism],0,,0,
48623,58d7e549-d4df-3c68-96d9-7f8277fed837,1,[Viral sinusitis (disorder)],0,,0,
48624,58d7fbec-b353-489b-6c8c-e03b8ac5138a,1,[Viral sinusitis (disorder)],0,,0,


In [15]:
# Merge aggregation result for conditions and observations
df_merged_aggs = df_encounters_description \
    .merge(
        right=df_merged_agg,
        how='left',
        on='ENCOUNTER'
    ) \
    .sort_values(
        by=['NUM_CONDITIONS', 'NUM_CATEGORIES', 'NUM_OBSERVATIONS'],
        ascending=False
    ) \
    .fillna({
        'NUM_CONDITIONS': 0,
        'CONDITIONS': '',
        'NUM_CATEGORIES': 0,
        'CATEGORIES': '',
        'NUM_OBSERVATIONS': 0,
        'OBSERVATIONS': ''
    }) \
    .astype({'NUM_CATEGORIES': int, 'NUM_OBSERVATIONS': int})

# Save a CSV file with the merge result
# df_merged_aggs.to_csv(f'{PROCESSED_DATA_PATH}/agg_conditions_observations.csv', index=False)

# Print some of the merge result
df_merged_aggs

Unnamed: 0,ENCOUNTER,DESCRIPTION,REASONDESCRIPTION,NUM_CONDITIONS,CONDITIONS,NUM_CATEGORIES,CATEGORIES,NUM_OBSERVATIONS,OBSERVATIONS
221977,a3832f5d-fa19-8d56-1782-6a8681c24d5f,Encounter for symptom (procedure),Suspected COVID-19,11.0,"[Headache (finding), Cough (finding), Dyspnea ...",2,"[vital-signs, laboratory]",17,"[Body temperature, Respiratory rate, Heart rat..."
223314,c39ec22c-2a3f-b1b9-da87-69c3c17d2bd5,Encounter for symptom (procedure),Suspected COVID-19,11.0,"[Headache (finding), Cough (finding), Sore thr...",2,"[vital-signs, laboratory]",17,"[Body temperature, Respiratory rate, Heart rat..."
268772,b9b5aaf9-ed48-207d-3be4-26bc61d43a57,Physician visit with evaluation AND/OR managem...,Cerebral palsy (disorder),11.0,"[Cerebral palsy (disorder), Pain (finding), Po...",0,,0,
18468,4ef07c79-a127-6985-2bb5-d6c1c31f2ad8,Encounter for symptom (procedure),Suspected COVID-19,10.0,"[Cough (finding), Sputum finding (finding), Fa...",2,"[vital-signs, laboratory]",17,"[Body temperature, Respiratory rate, Heart rat..."
49663,123ba070-6cab-ef43-deb4-b9e4cbf4cac3,Encounter for symptom (procedure),Suspected COVID-19,10.0,"[Cough (finding), Sputum finding (finding), Fa...",2,"[vital-signs, laboratory]",17,"[Body temperature, Respiratory rate, Heart rat..."
...,...,...,...,...,...,...,...,...,...
419397,64b1d627-a3da-dec5-685e-2ccfa8625d25,Encounter for check up,Screening for malignant neoplasm of colon (pro...,0.0,,0,,0,
419398,5ff3bc66-6d88-2bd7-6f93-1f2b411a041e,Office Visit,Alzheimer's disease (disorder),0.0,,0,,0,
419401,0ee31043-7e36-407d-f0c9-496f321ac59a,Follow-up encounter (procedure),Chronic pain (finding),0.0,,0,,0,
419402,7d982820-cf8e-303c-f169-1e123ac45a09,Follow-up encounter (procedure),Chronic pain (finding),0.0,,0,,0,


In [17]:
df_merged_aggs \
    .groupby(by=['REASONDESCRIPTION'], as_index=False) \
    .agg(
        CASES=('ENCOUNTER', 'count'),
        MEAN_CONDITIONS=('NUM_CONDITIONS', 'mean'),
        STD_CONDITIONS=('NUM_CONDITIONS', 'std'),
        MEAN_OBSERVATIONS=('NUM_OBSERVATIONS', 'mean'),
        STD_OBSERVATIONS=('NUM_OBSERVATIONS', 'std')
    ) \
    .sort_values(['MEAN_CONDITIONS', 'MEAN_OBSERVATIONS'], ascending=[False, True]) \
    .reset_index(drop=True) \
    .head(50)

Unnamed: 0,REASONDESCRIPTION,CASES,MEAN_CONDITIONS,STD_CONDITIONS,MEAN_OBSERVATIONS,STD_OBSERVATIONS
0,Suspected COVID-19,1048,5.057252,1.733033,14.541985,3.342893
1,Neoplasm of prostate,293,1.59727,1.073462,19.754266,26.654969
2,Bleeding from anus,60,1.216667,0.903696,0.0,0.0
3,Suspected prostate cancer (situation),207,1.169082,0.535543,5.516908,9.439046
4,Mitral valve regurgitation (disorder),59,1.016949,0.130189,0.0,0.0
5,Streptococcal sore throat (disorder),1641,1.007922,0.088679,0.996953,0.107592
6,Acute viral pharyngitis (disorder),6907,1.006515,0.085688,0.997828,0.112219
7,Localized primary osteoarthritis of the hand,243,1.004115,0.06415,0.255144,0.436842
8,Osteoarthritis of knee,542,1.00369,0.06069,0.328413,0.47007
9,Viral sinusitis (disorder),12168,1.001808,0.138669,0.003369,0.081523
