## Dependency Set-Up

In [1]:
using DrWatson
@quickactivate "CompositionalMLStudy"

using ACSets
using Catlab.CategoricalAlgebra
using DataFrames

import DBInterface:
    execute

import DrWatson:
  datadir

import IPUMS:
  load_ipums_extract,
  parse_ddi

import OMOPCDMCohortCreator:
    GenerateDatabaseDetails,
    GenerateTables

import SQLite:
    DB

# Data Set-Up

## OMOP CDM Data Directory

In [2]:
# OMOP CDM Data Directory
OMOPCDM_DIR = datadir("exp_raw", "OMOPCDM")

# OMOP CDM Example Data 
DATABASE_FILE = "eunomia.sqlite"

"eunomia.sqlite"

## IPUMS Data Directory

In [3]:
# IPUMS Data Directory
IPUMS_DIR = datadir("exp_raw", "IPUMS")

# DDI Data Dictionary
DDI_FILE = "cps_00097.xml"

# IPUMS CPS Example Data 
DAT_FILE = "cps_00097.dat"

"cps_00097.dat"

## Load Data

In [4]:
omop_db_conn = DB(joinpath(OMOPCDM_DIR, DATABASE_FILE))
GenerateDatabaseDetails(:sqlite, "omop");
omop_tables = GenerateTables(omop_db_conn, exported = true);

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mGlobal database dialect set to: sqlite
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mGlobal schema set to: omop
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mcdm_source table generated internally
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mlocation table generated internally
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mcare_site table generated internally
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mconcept_ancestor table generated internally
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mconcept table generated internally
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mconcept_relationship table generated internally
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mcondition_era table generated internally
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mcost table generated internally
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mdose_era table generated internally
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mdrug_strength table generated inter

In [5]:
ddi = parse_ddi(joinpath(IPUMS_DIR, DDI_FILE));
ipums = load_ipums_extract(ddi, joinpath(IPUMS_DIR, DAT_FILE));

# ACSet-ifying Data

## OMOP CDM ACSets

### Person Table

In [6]:
omop_tables[:person].columns

OrderedCollections.OrderedDict{Symbol, FunSQL.SQLColumn} with 18 entries:
  :person_id                   => SQLColumn(:person_id)
  :gender_concept_id           => SQLColumn(:gender_concept_id)
  :year_of_birth               => SQLColumn(:year_of_birth)
  :month_of_birth              => SQLColumn(:month_of_birth)
  :day_of_birth                => SQLColumn(:day_of_birth)
  :birth_datetime              => SQLColumn(:birth_datetime)
  :race_concept_id             => SQLColumn(:race_concept_id)
  :ethnicity_concept_id        => SQLColumn(:ethnicity_concept_id)
  :location_id                 => SQLColumn(:location_id)
  :provider_id                 => SQLColumn(:provider_id)
  :care_site_id                => SQLColumn(:care_site_id)
  :person_source_value         => SQLColumn(:person_source_value)
  :gender_source_value         => SQLColumn(:gender_source_value)
  :gender_source_concept_id    => SQLColumn(:gender_source_concept_id)
  :race_source_value           => SQLColumn(:race_source_v

In [None]:
@present SchOMOPCDM(FreeSchema) begin
  (Numerical, Label)::AttrType

  Person::Ob
  person_id::Attr(Person, Numerical) 
  person_source_value::Attr(Person, Label)

  gender_concept_id::Attr(Person, Numerical) 
  gender_source_value::Attr(Person, Label)
  gender_source_concept_id::Attr(Person, Numerical)

  year_of_birth::Attr(Person, Numerical)
  month_of_birth::Attr(Person, Numerical)
  day_of_birth::Attr(Person, Numerical)
  birth_datetime::Attr(Person, Numerical)

  race_concept_id::Attr(Person, Numerical)
  race_source_value::Attr(Person, Label)
  race_source_concept_id::Attr(Person, Numerical)

  ethnicity_concept_id::Attr(Person, Numerical)
  ethnicity_source_value::Attr(Person, Label)
  ethnicity_source_concept_id::Attr(Person, Numerical)

  location_id::Attr(Person, Numerical)
  provider_id::Attr(Person, Numerical)
  care_site_id::Attr(Person, Numerical)

  Death::Ob
  death::Hom(Death, Person)
  # person_id::Attr(Person, Numerical)
  death_date::Attr(Person, Numerical)
  death_datetime::Attr(Person, Numerical)
  death_type_concept_id::Attr(Person, Numerical)
  cause_concept_id::Attr(Person, Numerical)
  cause_source_value::Attr(Person, Label)
  cause_source_concept_id::Attr(Person, Numerical)

  ObservationPeriod::Ob
  observation_period::Hom(ObservationPeriod, Person)
  observation_period_id::Attr(Person, Numerical)
  # person_id::Attr(Person, Numerical)
  observation_period_start_date::Attr(Person, Numerical)
  observation_period_end_date::Attr(Person, Numerical)
  period_type_concept_id::Attr(Person, Numerical)

  VisitOccurrence::Ob
  visit_occurrence::Hom(VisitOccurrence, Person)
  visit_occurrence_id::Attr(Person, Numerical)
  # person_id::Attr(Person, Numerical)
  visit_concept_id::Attr(Person, Numerical)
  visit_start_date::Attr(Person, Numerical)
  visit_start_datetime::Attr(Person, Numerical)
  visit_end_date::Attr(Person, Numerical)
  visit_end_datetime::Attr(Person, Numerical)
  visit_type_concept_id::Attr(Person, Numerical)
  # provider_id::Attr(Person, Numerical)
  # care_site_id::Attr(Person, Numerical)
  visit_source_value::Attr(Person, Label)
  visit_source_concept_id::Attr(Person, Numerical)
  admitted_from_concept_id::Attr(Person, Numerical)
  admitted_from_source_value::Attr(Person, Label)
  discharged_to_concept_id::Attr(Person, Numerical)
  discharged_to_source_value::Attr(Person, Label)
  preceding_visit_occurrence_id::Attr(Person, Numerical)

  ConditionOccurrence::Ob
  condition_occurrence::Hom(ConditionOccurrence, Person)
  condition_occurrence_id::Attr(Person, Numerical)
  # person_id::Attr(Person, Numerical)
  condition_concept_id::Attr(Person, Numerical)
  condition_start_date::Attr(Person, Numerical)
  condition_start_datetime::Attr(Person, Numerical)
  condition_end_date::Attr(Person, Numerical)
  condition_end_datetime::Attr(Person, Numerical)
  condition_type_concept_id::Attr(Person, Numerical)
  condition_status_concept_id::Attr(Person, Numerical)
  stop_reason::Attr(Person, Label)
  # provider_id::Attr(Person, Numerical)
  # visit_occurrence_id::Attr(Person, Numerical)
  # visit_detail_id::Attr(Person, Numerical)
  condition_source_value::Attr(Person, Label)
  condition_source_concept_id::Attr(Person, Numerical)
  condition_status_source_value::Attr(Person, Label)

  DrugExposure::Ob
  drug_exposure::Hom(DrugExposure, Person)
  drug_exposure_id::Attr(Person, Numerical)
  # person_id::Attr(Person, Numerical)
  drug_concept_id::Attr(Person, Numerical)
  drug_exposure_start_date::Attr(Person, Numerical)
  drug_exposure_start_datetime::Attr(Person, Numerical)
  drug_exposure_end_date::Attr(Person, Numerical)
  drug_exposure_end_datetime::Attr(Person, Numerical)
  verbatim_end_date::Attr(Person, Numerical)
  drug_type_concept_id::Attr(Person, Numerical)
  stop_reason::Attr(Person, Label)
  refills::Attr(Person, Numerical)
  quantity::Attr(Person, Numerical)
  days_supply::Attr(Person, Numerical)
  sig::Attr(Person, Label)
  route_concept_id::Attr(Person, Numerical)
  lot_number::Attr(Person, Label)
  # provider_id::Attr(Person, Numerical)
  # visit_occurrence_id::Attr(Person, Numerical)
  # visit_detail_id::Attr(Person, Numerical)
  drug_source_value::Attr(Person, Label)
  drug_source_concept_id::Attr(Person, Numerical)
  route_source_value::Attr(Person, Label)
  dose_unit_source_value::Attr(Person, Numerical)

  ProcedureOccurrence::Ob
  procedure_occurrence::Hom(ProcedureOccurrence, Person)
  procedure_occurrence_id::Attr(Person, Numerical)
  # person_id::Attr(Person, Numerical)
  procedure_concept_id::Attr(Person, Numerical)
  procedure_date::Attr(Person, Numerical)
  procedure_datetime::Attr(Person, Numerical)
  procedure_end_date::Attr(Person, Numerical)
  procedure_end_datetime::Attr(Person, Numerical)
  procedure_type_concept_id::Attr(Person, Numerical)
  modifier_concept_id::Attr(Person, Numerical)
  quantity::Attr(Person, Numerical)
  # provider_id::Attr(Person, Numerical)
  # visit_occurrence_id::Attr(Person, Numerical)
  # visit_detail_id::Attr(Person, Numerical)
  procedure_source_value::Attr(Person, Label)
  procedure_source_concept_id::Attr(Person, Numerical)
  modifier_source_value::Attr(Person, Label)

  DeviceExposure::Ob
  device_occurrence::Hom(DeviceExposure, Person)
  device_exposure_id::Attr(Person, Numerical)
  # person_id::Attr(Person, Numerical)
  device_concept_id::Attr(Person, Numerical)
  device_exposure_start_date::Attr(Person, Numerical)
  device_exposure_start_datetime::Attr(Person, Numerical)
  device_exposure_end_date::Attr(Person, Numerical)
  device_exposure_end_datetime::Attr(Person, Numerical)
  device_type_concept_id::Attr(Person, Numerical)
  unique_device_id::Attr(Person, Label)
  production_id::Attr(Person, Label)
  quantity::Attr(Person, Numerical)
  # provider_id::Attr(Person, Numerical)
  # visit_occurrence_id::Attr(Person, Numerical)
  # visit_detail_id::Attr(Person, Numerical)
  device_source_value::Attr(Person, Label)
  device_source_concept_id::Attr(Person, Numerical)
  unit_concept_id::Attr(Person, Numerical)
  unit_source_value::Attr(Person, Label)
  unit_source_concept_id::Attr(Person, Numerical)

  Measurement::Ob
  measurement::Hom(Measurement, Person)
  measurement_id::Attr(Person, Numerical)
  # person_id::Attr(Person, Numerical)
  measurement_concept_id::Attr(Person, Numerical)
  measurement_date::Attr(Person, Numerical)
  measurement_datetime::Attr(Person, Numerical)
  measurement_time::Attr(Person, Label)
  measurement_type_concept_id::Attr(Person, Numerical)
  operator_concept_id::Attr(Person, Numerical)
  value_as_number::Attr(Person, Numerical)
  value_as_concept_id::Attr(Person, Numerical)
  unit_concept_id::Attr(Person, Numerical)
  range_low::Attr(Person, Numerical)
  range_high::Attr(Person, Numerical)
  # provider_id::Attr(Person, Numerical)
  # visit_occurrence_id::Attr(Person, Numerical)
  # visit_detail_id::Attr(Person, Numerical)
  measurement_source_value::Attr(Person, Label)
  measurement_source_concept_id::Attr(Person, Numerical)
  unit_source_value::Attr(Person, Label)
  unit_source_concept_id::Attr(Person, Numerical)
  value_source_value::Attr(Person, Label)
  measurement_event_id::Attr(Person, Numerical)
  meas_event_field_concept_id::Attr(Person, Numerical)

  Observation::Ob
  observation::Hom(Observation, Person)
  observation_id::Attr(Person, Numerical)
  # person_id::Attr(Person, Numerical)
  observation_concept_id::Attr(Person, Numerical)
  observation_date::Attr(Person, Numerical)
  observation_datetime::Attr(Person, Numerical)
  observation_type_concept_id::Attr(Person, Numerical)
  value_as_number::Attr(Person, Numerical)
  value_as_string::Attr(Person, Label)
  value_as_concept_id::Attr(Person, Numerical)
  qualifier_concept_id::Attr(Person, Numerical)
  unit_concept_id::Attr(Person, Numerical)
  # provider_id::Attr(Person, Numerical)
  # visit_occurrence_id::Attr(Person, Numerical)
  # visit_detail_id::Attr(Person, Numerical)
  observation_source_value::Attr(Person, Label)
  observation_source_concept_id::Attr(Person, Numerical)
  unit_source_value::Attr(Person, Label)
  qualifier_source_value::Attr(Person, Label)
  value_source_value::Attr(Person, Label)
  observation_event_id::Attr(Person, Numerical)
  obs_event_field_concept_id::Attr(Person, Numerical)

  Note::Ob
  note::Hom(Note, Person)
  note_id::Attr(Person, Numerical)
  person_id::Attr(Person, Numerical)
  note_date::Attr(Person, Numerical)
  note_datetime::Attr(Person, Numerical)
  note_type_concept_id::Attr(Person, Numerical)
  note_class_concept_id::Attr(Person, Numerical)
  note_title::Attr(Person, Label)
  note_text::Attr(Person, Label)
  encoding_concept_id::Attr(Person, Numerical)
  language_concept_id::Attr(Person, Numerical)
  provider_id::Attr(Person, Numerical)
  visit_occurrence_id::Attr(Person, Numerical)
  visit_detail_id::Attr(Person, Numerical)
  note_source_value::Attr(Person, Label)
  note_event_id::Attr(Person, Numerical)
  note_event_field_concept_id::Attr(Person, Numerical)

  Episode::Ob
  episode::Hom(Episode, Person)
  episode_id::Attr(Person, Numerical)
  # person_id::Attr(Person, Numerical)
  episode_concept_id::Attr(Person, Numerical)
  episode_start_date::Attr(Person, Numerical)
  episode_start_datetime::Attr(Person, Numerical)
  episode_end_date::Attr(Person, Numerical)
  episode_end_datetime::Attr(Person, Numerical)
  episode_parent_id::Attr(Person, Numerical)
  episode_number::Attr(Person, Numerical)
  episode_object_concept_id::Attr(Person, Numerical)
  episode_type_concept_id::Attr(Person, Numerical)
  episode_source_value::Attr(Person, Label)
  episode_source_concept_id::Attr(Person, Numerical)

  Specimen::Ob
  specimen::Hom(Specimen, Person)
  specimen_id::Attr(Person, Numerical)
  # person_id::Attr(Person, Numerical)
  specimen_concept_id::Attr(Person, Numerical)
  specimen_type_concept_id::Attr(Person, Numerical)
  specimen_date::Attr(Person, Numerical)
  specimen_datetime::Attr(Person, Numerical)
  quantity::Attr(Person, Numerical)
  unit_concept_id::Attr(Person, Numerical)
  anatomic_site_concept_id::Attr(Person, Numerical)
  disease_status_concept_id::Attr(Person, Numerical)
  specimen_source_id::Attr(Person, Label)
  specimen_source_value::Attr(Person, Label)
  unit_source_value::Attr(Person, Label)
  anatomic_site_source_value::Attr(Person, Label)
  disease_status_source_value::Attr(Person, Label)

  VisitDetail::Ob
  visit_detail::Hom(VisitDetail, Person)
  visit_detail_id::Attr(Person, Numerical)
  person_id::Attr(Person, Numerical)
  visit_detail_concept_id::Attr(Person, Numerical)
  visit_detail_start_date::Attr(Person, Numerical)
  visit_detail_start_datetime::Attr(Person, Numerical)
  visit_detail_end_date::Attr(Person, Numerical)
  visit_detail_end_datetime::Attr(Person, Numerical)
  visit_detail_type_concept_id::Attr(Person, Numerical)
  provider_id::Attr(Person, Numerical)
  care_site_id::Attr(Person, Numerical)
  visit_detail_source_value::Attr(Person, Label)
  visit_detail_source_concept_id::Attr(Person, Numerical)
  admitted_from_concept_id::Attr(Person, Numerical)
  admitted_from_source_value::Attr(Person, Label)
  discharged_to_source_value::Attr(Person, Label)
  discharged_to_concept_id::Attr(Person, Numerical)
  preceding_visit_detail_id::Attr(Person, Numerical)
  parent_visit_detail_id::Attr(Person, Numerical)
  visit_occurrence_id::Attr(Person, Numerical)

end

In [7]:
@acset_type OMOPCDMData(SchOMOPCDM, 
    index = [:death, :observation_period],
    unique_index = [:person_id]
)

OMOPCDMData

In [8]:
person_df = execute(omop_db_conn, "SELECT * FROM person;") |> DataFrame
death_df = execute(omop_db_conn, "SELECT * FROM death;") |> DataFrame
observation_period_df = execute(omop_db_conn, "SELECT * FROM observation_period;") |> DataFrame
omopcdm = OMOPCDMData{Any, Any}()

add_parts!(omopcdm, :Person, nrow(person_df), person_id = person_df.person_id, person_source_value = person_df.person_source_value, gender_concept_id = person_df.gender_concept_id, gender_source_value = person_df.gender_source_value, gender_source_concept_id = person_df.gender_source_concept_id, year_of_birth = person_df.year_of_birth, month_of_birth = person_df.month_of_birth, day_of_birth = person_df.day_of_birth, birth_datetime = person_df.birth_datetime, race_concept_id = person_df.race_concept_id, race_source_value = person_df.race_source_value, race_source_concept_id = person_df.race_source_concept_id, ethnicity_concept_id = person_df.ethnicity_concept_id, ethnicity_source_value = person_df.ethnicity_source_value, ethnicity_source_concept_id = person_df.ethnicity_source_concept_id, location_id = person_df.location_id, provider_id = person_df.provider_id, care_site_id = person_df.care_site_id)
add_parts!(omopcdm, :Death, nrow(death_df), death_date = death_df.death_date, death_datetime = death_df.death_datetime, death_type_concept_id = death_df.death_type_concept_id, cause_concept_id = death_df.cause_concept_id, cause_source_value = death_df.cause_source_value, cause_source_concept_id = death_df.cause_source_concept_id)
add_parts!(omopcdm, :ObservationPeriod, nrow(observation_period_df), observation_period_id = observation_period_df.observation_period_id, observation_period_start_date = observation_period_df.observation_period_start_date, observation_period_end_date = observation_period_df.observation_period_end_date, period_type_concept_id = observation_period_df.period_type_concept_id)

1:5343

In [9]:
omopcdm

Person,person_id,person_source_value,gender_concept_id,gender_source_value,gender_source_concept_id,year_of_birth,month_of_birth,day_of_birth,birth_datetime,race_concept_id,race_source_value,race_source_concept_id,ethnicity_concept_id,ethnicity_source_value,ethnicity_source_concept_id,location_id,provider_id,care_site_id,death_date,death_datetime,death_type_concept_id,cause_concept_id,cause_source_value,cause_source_concept_id,observation_period_id,observation_period_start_date,observation_period_end_date,period_type_concept_id
1,6.0,001f4a87-70d0-435c-a4b9-1425f6928d33,8532.0,F,0.0,1963.0,12.0,31.0,-189475000.0,8516.0,black,0.0,0.0,west_indian,0.0,missing,missing,missing,nothing,nothing,nothing,nothing,nothing,nothing,6.0,-189475000.0,1170720000.0,44814700.0
2,123.0,052d9254-80e8-428f-b8b6-69518b0ef3f3,8507.0,M,0.0,1950.0,4.0,12.0,-622426000.0,8527.0,white,0.0,0.0,italian,0.0,missing,missing,missing,nothing,nothing,nothing,nothing,nothing,nothing,13.0,1240700000.0,1555200000.0,44814700.0
3,129.0,054d32d5-904f-4df4-846b-8c08d165b4e9,8507.0,M,0.0,1974.0,10.0,7.0,150336000.0,8527.0,white,0.0,0.0,polish,0.0,missing,missing,missing,nothing,nothing,nothing,nothing,nothing,nothing,27.0,1012350000.0,1542760000.0,44814700.0
4,16.0,00444703-f2c9-45c9-a247-f6317a43a929,8532.0,F,0.0,1971.0,10.0,13.0,56160000.0,8527.0,white,0.0,0.0,american,0.0,missing,missing,missing,nothing,nothing,nothing,nothing,nothing,nothing,16.0,56246400.0,1509580000.0,44814700.0
5,65.0,02a3dad9-f9d5-42fb-8074-c16d45b4f5c8,8532.0,F,0.0,1967.0,3.0,31.0,-87004800.0,8516.0,black,0.0,0.0,dominican,0.0,missing,missing,missing,nothing,nothing,nothing,nothing,nothing,nothing,55.0,1243640000.0,1553300000.0,44814700.0
6,74.0,02fbf1be-29b7-4da8-8bbd-14c7433f843f,8532.0,F,0.0,1972.0,1.0,5.0,63417600.0,8527.0,white,0.0,0.0,english,0.0,missing,missing,missing,nothing,nothing,nothing,nothing,nothing,nothing,60.0,659146000.0,1548200000.0,44814700.0
7,42.0,0177d2e0-98f5-4f3d-bcfd-497b7a07b3f8,8532.0,F,0.0,1909.0,11.0,2.0,-1898640000.0,8527.0,white,0.0,0.0,irish,0.0,missing,missing,missing,nothing,nothing,nothing,nothing,nothing,nothing,42.0,-1898550000.0,1552440000.0,44814700.0
8,187.0,07a1e14d-73ed-4d3a-9a39-d729745773fa,8507.0,M,0.0,1945.0,7.0,23.0,-771379000.0,8527.0,white,0.0,0.0,irish,0.0,missing,missing,missing,nothing,nothing,nothing,nothing,nothing,nothing,33.0,516240000.0,1536540000.0,44814700.0
9,18.0,0084b0fe-e30f-4930-b6d1-5e1eff4b7dea,8532.0,F,0.0,1965.0,11.0,17.0,-130118000.0,8527.0,white,0.0,0.0,english,0.0,missing,missing,missing,nothing,nothing,nothing,nothing,nothing,nothing,18.0,-130118000.0,1541550000.0,44814700.0
10,111.0,0478d6b3-bdb3-4574-9b93-cf448d725b84,8532.0,F,0.0,1975.0,5.0,2.0,168221000.0,8527.0,white,0.0,0.0,english,0.0,missing,missing,missing,nothing,nothing,nothing,nothing,nothing,nothing,25.0,1174180000.0,1554600000.0,44814700.0

ObservationPeriod,observation_period
1,0
2,0
3,0
4,0
5,0
6,0
7,0
8,0
9,0
10,0
