In [11]:
#!pip install psycopg2-binary

Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[K     |████████████████████████████████| 3.0 MB 5.3 MB/s eta 0:00:01
[?25hInstalling collected packages: psycopg2-binary
Successfully installed psycopg2-binary-2.9.5


In [40]:
import json
import psycopg2
import pandas as pd

In [41]:
settings = json.loads(open(".credentials.json").read())

In [42]:
settings = json.loads(open(".credentials.json").read())
schema = "cds_cdm"
con = psycopg2.connect(**settings, options=f"-c search_path={schema}")

In [43]:
cursor = con.cursor() 

In [44]:
def insert(table, data):
    columns = ', '.join(data.keys())
    value_placeholder = ', '.join(['%s'] * len(data))
    sql = f"INSERT INTO {table} ({columns}) VALUES ({value_placeholder}) RETURNING {table}_id"
    cursor.execute(sql, list(data.values()))
    
    return cursor.fetchone()[0]

In [83]:
insert('person', {
    "gender_concept_id": 8507, # "MALE"
    "year_of_birth": 1982, 
    "month_of_birth": 5,
    "day_of_birth": 5, 
    "race_concept_id": 8527, 
    "ethnicity_concept_id": 38003564 # "Not Hispanic or Latino"
})
con.commit()

In [84]:
insert('condition_occurrence', {
    "person_id": 4,
    "condition_concept_id": 37311061, # "COVID-19"
    "condition_start_date": "1990-01-01",
    "condition_start_datetime": "1990-01-01 00:00:00",
    "condition_end_date": "1990-01-02",
    "condition_end_datetime": "1990-01-02 00:00:00",
    "condition_type_concept_id": 32817
})
con.commit()

In [17]:
insert('observation', {
    "person_id": 4,
    "observation_concept_id": 4169185, # "Allergy to heparin"
    "observation_date": "1990-01-01",
    "observation_type_concept_id": 32817
})
con.commit()

In [18]:
insert('measurement', {
    "person_id": 4,
    "measurement_concept_id": 3048530, # "Fibrin D-dimer DDU [Mass/volume] in Platelet poor plasma"
    "measurement_date": "1990-01-01",
    "value_as_number": 2.1,
    "unit_concept_id": 8636,
    "range_high": 2.0, # überhaupt angeben?
    "measurement_type_concept_id": 32817
})
con.commit()

In [31]:
insert('procedure_occurrence', {
    "person_id": 4,
    "procedure_concept_id": 4230167, # "Artificial respiration"
    "procedure_date": "1990-01-01",
    "procedure_type_concept_id": 32817
})
con.commit()

In [29]:
insert('visit_occurrence', {
    "person_id": 4,
    "visit_concept_id": 9201, # "Inpatient visit"
    "visit_start_date": "1990-01-01",
    "visit_end_date": "1990-02-01",
    "visit_type_concept_id": 32817
})
con.commit()

In [30]:
insert('visit_detail', {
    "person_id": 4,
    "visit_occurrence_id": 1,
    "visit_detail_concept_id": 32037, # "Intensive Care"
    "visit_detail_start_date": "1990-01-01",
    "visit_detail_end_date": "1990-01-15",
    "visit_detail_type_concept_id": 32817
})
con.commit()

In [45]:
insert('drug_exposure', {
    "person_id": 4,
    "drug_concept_id": 1315865, # "Fondaparinux"
    "drug_exposure_start_date": "1990-01-01",
    "drug_exposure_end_date": "1990-01-15",
    "quantity": 75,
    "drug_type_concept_id": 32817
})
con.commit()

# Concepts

In [8]:
import requests
import pandas as pd

In [10]:
concepts = [
    {"id": 37311061, "domain": "Condition", "name": "COVID-19"},
    {"id": 444247, "domain": "Condition", "name": "Venous Thrombosis"},
    {"id": 4009307, "domain": "Condition", "name": "Heparin-induced thrombocytopenia with thrombosis"},
    {"id": 4169185, "domain": "Observation", "name": "Allergy to heparin"},
    {"id": 4170358, "domain": "Observation", "name": "Allergy to heparinoid"},
    {"id": 432870, "domain": "Condition", "name": "Thrombocytopenic disorder"},
    {"id": 440417, "domain": "Condition", "name": "Pulmonary embolism"},
    {"id": 4195694, "domain": "Condition", "name": "Acute respiratory distress syndrome"},
    {"id": 3048530, "domain": "Measurement", "name": "Fibrin D-dimer DDU [Mass/volume] in Platelet poor plasma", "unit_concept_id": 8842, "range_low": 0, "range_high": 2.5, 'freq_per_day': 2}, # >= 2 mg/l
    {"id": 3013466, "domain": "Measurement", "name": "aPTT in Blood by Coagulation assay", "unit_concept_id": 8555, "range_low": 0, "range_high": 70, 'freq_per_day': 1}, # 50 s
    {"id": 3029943, "domain": "Measurement", "name": "Horowitz index in Arterial blood", "unit_concept_id": 8876, "range_low": 0, "range_high": 200, 'freq_per_day': 8}, # < 150 mmHg
    {"id": 3020716, "domain": "Measurement", "name": "Inhaled oxygen concentration", "unit_concept_id": 8554, "range_low": 0, "range_high": 1.0, 'freq_per_day': 30}, # 0.3, .... 0.9 
    {"id": 3017878, "domain": "Measurement", "name": "Tidal volume.spontaneous+mechanical/Body weight [Volume/mass] --on ventilator", "unit_concept_id": 9571, "range_low": 0, "range_high": 8, 'freq_per_day': 30}, #  6 'ml/kg
    {"id": 21490650, "domain": "Measurement", "name": "Pressure max Respiratory system airway --during inspiration", "unit_concept_id": 44777590, "range_low": 0, "range_high": 2.5, 'freq_per_day': 40}, # 30 'cm[H2O]'
    {"id": 21490855, "domain": "Measurement", "name": "PEEP Respiratory system --on ventilator", "unit_concept_id": 44777590, "range_low": 0, "range_high": 20, 'freq_per_day': 2}, # 5 cm[H2O]  ... >18 cm[H2O]
    {"id": 1367571, "domain": "Drug", "name": "heparin", "range_low": 0, "range_high": 1}, # any (aPTT goal)
    {"id": 1322207, "domain": "Drug", "name": "argatroban", "range_low": 0, "range_high": 1}, # any (aPTT goal)
    {"id": 1301065, "domain": "Drug", "name": "dalteparin", "range_low": 2000, "range_high": 4000}, #  5000 IU/d
    {"id": 1301025, "domain": "Drug", "name": "enoxaparin", "range_low": 0, "range_high": 80}, # 40 mg/d
    {"id": 19001014, "domain": "Drug", "name": "nadroparin", "range_low": 0, "range_high": 8000}, # 3800 IE/d, 5700 IE/d (for body weight 70 kg)
    {"id": 19016072, "domain": "Drug", "name": "certoparin", "range_low": 0, "range_high": 6000}, # 3000 IE/d
    {"id": 1315865, "domain": "Drug", "name": "fondaparinux", "range_low": 0, "range_high": 5}, # 2.5 'mg'/day
    {"id": 4230167, "domain": "Procedure", "name": "Artificial respiration"}, # couples of da>ys ? + ON intensive care
    {"id": 4196006, "domain": "Procedure", "name": "Placing subject in prone position"}, # >= 16h 
    {"id": 32037, "domain": "Visit", "name": "Intensive Care"}, # couples of days ?
]


concepts = pd.DataFrame(concepts)

concepts.iloc[1:10]

Unnamed: 0,id,domain,name,unit_concept_id,range_low,range_high,freq_per_day
1,444247,Condition,Venous Thrombosis,,,,
2,4009307,Condition,Heparin-induced thrombocytopenia with thrombosis,,,,
3,4169185,Observation,Allergy to heparin,,,,
4,4170358,Observation,Allergy to heparinoid,,,,
5,432870,Condition,Thrombocytopenic disorder,,,,
6,440417,Condition,Pulmonary embolism,,,,
7,4195694,Condition,Acute respiratory distress syndrome,,,,
8,3048530,Measurement,Fibrin D-dimer DDU [Mass/volume] in Platelet p...,8842.0,0.0,2.5,2.0
9,3013466,Measurement,aPTT in Blood by Coagulation assay,8555.0,0.0,70.0,1.0


In [85]:
api_url = "http://192.168.200.128:9876/WebAPI"

def get_concept_info(concept_id):
    r = requests.get(api_url + f"/vocabulary/concept/{concept_id}")
    assert r.status_code == 200
    return r.json()

for idx, row in concepts.iterrows():
    c = get_concept_info(row["id"])
    assert c["STANDARD_CONCEPT"] == "S"
    assert c["INVALID_REASON"] == "V"
    assert c["DOMAIN_ID"] == row["domain"]
    if c["CONCEPT_NAME"] != row["name"]:
        print(f"Updating name: {row['name']} --> {c['CONCEPT_NAME']}")
        concepts.loc[idx, "name"] = c["CONCEPT_NAME"]


Updating name: Venous Thrombosis --> Venous thrombosis


In [86]:
concepts

Unnamed: 0,id,domain,name,unit_concept_id,range_low,range_high,freq_per_day
0,37311061,Condition,COVID-19,,,,
1,444247,Condition,Venous thrombosis,,,,
2,4009307,Condition,Heparin-induced thrombocytopenia with thrombosis,,,,
3,4169185,Observation,Allergy to heparin,,,,
4,4170358,Observation,Allergy to heparinoid,,,,
5,432870,Condition,Thrombocytopenic disorder,,,,
6,440417,Condition,Pulmonary embolism,,,,
7,4195694,Condition,Acute respiratory distress syndrome,,,,
8,3048530,Measurement,Fibrin D-dimer DDU [Mass/volume] in Platelet p...,8842.0,0.0,2.5,2.0
9,3013466,Measurement,aPTT in Blood by Coagulation assay,8555.0,0.0,70.0,1.0


In [None]:
# Create patient

# create visits (intensive care, non intensive care)

# add conditions rand(yes/no), timestamp within visits

# add observations rand(yes/no), timestamp within visits

# add measurements --> define range, distribution function

# add procedure --> within visit, duration random (define range)

# add drugs (rand(yes/no) per patient, then dosage (define range))