In [12]:
import json
import pandas as pd
from pycarrot import ConceptMapper

cmapper = ConceptMapper(
    username="admin",
    password="test!",
    hostname="localhost",
    port="5432",
    database_name="omop",
)
cmapper

<pycarrot.concept_mapper.ConceptMapper at 0x12748c160>

In [13]:
df_smr01 = pd.read_excel("./ScanReports/SMR01_ScanReport.xlsx", sheet_name=None)

In [14]:
cmapper.set_scan_report(df_smr01)

In [15]:
demo = cmapper.map(
    "demographics.csv",
    {"sex": {"1": "8507", "2": "8532"}},
    "encrypted_id",
    "dob",
)
demo

{'person': {'gender_concept_id': {'source_table': 'demographics.csv',
   'source_field': 'sex',
   'term_mapping': {'1': 8507, '2': 8532}},
  'gender_source_value': {'source_table': 'demographics.csv',
   'source_field': 'sex'},
  'birth_datetime': {'source_table': 'demographics.csv',
   'source_field': 'dob'},
  'person_id': {'source_table': 'demographics.csv',
   'source_field': 'encrypted_id'}}}

In [16]:
tab_1 = cmapper.map(
    "smr01_conditions.csv", "newcondition", "encrypted_id", "admission_date"
)
tab_1

{'condition_occurrence': {'condition_concept_id': {'source_table': 'smr01_conditions.csv',
   'source_field': 'newcondition',
   'term_mapping': {'A00.0': 4344638,
    'A00.9': 198677,
    'A01.0': 192819,
    'A01.1': 195460,
    'A01.2': 193953,
    'A01.3': 442291,
    'A01.4': 195177,
    'A02.0': 196328,
    'A02.1': 40493039,
    'A02.2': 141209,
    'A02.8': 133685,
    'A02.9': 133685,
    'A03.0': 4282310,
    'A03.1': 4145763,
    'A03.2': 4185509,
    'A03.3': 4321384,
    'A03.8': 440938,
    'A03.9': 201780,
    'A04.0': 4345206,
    'A04.1': 4345354,
    'A04.2': 4345355,
    'A04.3': 4345207,
    'A04.4': 192815,
    'A04.5': 198334,
    'A04.6': 46273592,
    'A04.7': 193688,
    'A04.8': 193402,
    'A04.9': 193402,
    'A05.0': 197776,
    'A05.1': 443444,
    'A05.2': 196324,
    'A05.3': 438670,
    'A05.4': 4058528,
    'A05.8': 441500,
    'A05.9': 441500,
    'A06.0': 4085622,
    'A06.1': 4032877,
    'A06.2': 76022,
    'A06.3': 4051447,
    'A06.4': 194560,
  

In [17]:
tab_2 = cmapper.map(
    "smr01_operations.csv", "operation_new", "encrypted_id", "date_operation"
)
tab_2

{'condition_occurrence': {'condition_concept_id': {'source_table': 'smr01_operations.csv',
   'source_field': 'operation_new',
   'term_mapping': {'A01.1': 195460,
    'A01.2': 193953,
    'A01.3': 442291,
    'A02.1': 40493039,
    'A02.2': 141209,
    'A02.8': 133685,
    'A02.9': 133685,
    'A03.1': 4145763,
    'A03.2': 4185509,
    'A03.3': 4321384,
    'A03.8': 440938,
    'A03.9': 201780,
    'A04.1': 4345354,
    'A04.2': 4345355,
    'A04.3': 4345207,
    'A04.4': 192815,
    'A04.5': 198334,
    'A04.6': 46273592,
    'A04.8': 193402,
    'A04.9': 193402,
    'A05.1': 443444,
    'A05.2': 196324,
    'A05.3': 438670,
    'A05.4': 4058528,
    'A05.8': 441500,
    'A05.9': 441500,
    'A06.1': 4032877,
    'A06.2': 76022,
    'A06.3': 4051447,
    'A06.4': 194560,
    'A06.8': 438959,
    'A06.9': 438959,
    'A07.1': 437202,
    'A07.2': 194265,
    'A07.3': 4154124,
    'A07.4': 200017,
    'A07.8': 196325,
    'A07.9': 196325,
    'A08.1': 4345352,
    'A08.2': 193120,
   

In [18]:
rules = {
    **{k: {f"{k}_{i}": v} for i, (k, v) in enumerate(demo.items())},
    **{k: {f"{k}_{i}": v} for i, (k, v) in enumerate(tab_1.items())},
    **{k: {f"{k}_{i}": v} for i, (k, v) in enumerate(tab_2.items())},
}

rules = {"cdm": rules, "metadata": {"dataset": "PHS_SMR01"}}
with open("rules_smr01.json", "w") as f:
    json.dump(rules, f, indent=6)

print(json.dumps(rules, indent=6))

{
      "cdm": {
            "person": {
                  "person_0": {
                        "gender_concept_id": {
                              "source_table": "demographics.csv",
                              "source_field": "sex",
                              "term_mapping": {
                                    "1": 8507,
                                    "2": 8532
                              }
                        },
                        "gender_source_value": {
                              "source_table": "demographics.csv",
                              "source_field": "sex"
                        },
                        "birth_datetime": {
                              "source_table": "demographics.csv",
                              "source_field": "dob"
                        },
                        "person_id": {
                              "source_table": "demographics.csv",
                              "source_field": "encrypted_id"
            

In [19]:
df_deaths = pd.read_excel("./ScanReports/Deaths_ScanReport.xlsx", sheet_name=None)
cmapper.set_scan_report(df_deaths)

In [20]:
demo = cmapper.map(
    "demographics.csv",
    {"sex": {"1": "8507", "2": "8532"}},
    "encrypted_id",
    "dob",
)
demo

{'person': {'gender_concept_id': {'source_table': 'demographics.csv',
   'source_field': 'sex',
   'term_mapping': {'1': 8507, '2': 8532}},
  'gender_source_value': {'source_table': 'demographics.csv',
   'source_field': 'sex'},
  'birth_datetime': {'source_table': 'demographics.csv',
   'source_field': 'dob'},
  'person_id': {'source_table': 'demographics.csv',
   'source_field': 'encrypted_id'}}}

In [21]:
tab_1 = cmapper.map("deaths_a.csv", "causedeath", "encrypted_id", "date_of_death")
tab_2 = cmapper.map(
    "deaths_b.csv", {"death": {"1": "4306655"}}, "encrypted_id", "date_of_death"
)

rules = {
    **{k: {f"{k}_{i}": v} for i, (k, v) in enumerate(demo.items())},
    **{k: {f"{k}_{i}": v} for i, (k, v) in enumerate(tab_1.items())},
    **{k: {f"{k}_{i}": v} for i, (k, v) in enumerate(tab_2.items())},
}

rules = {"cdm": rules, "metadata": {"dataset": "PHS_deaths"}}

with open("rules_deaths.json", "w") as f:
    json.dump(rules, f, indent=6)



print(json.dumps(rules, indent=6))



{
      "cdm": {
            "person": {
                  "person_0": {
                        "gender_concept_id": {
                              "source_table": "demographics.csv",
                              "source_field": "sex",
                              "term_mapping": {
                                    "1": 8507,
                                    "2": 8532
                              }
                        },
                        "gender_source_value": {
                              "source_table": "demographics.csv",
                              "source_field": "sex"
                        },
                        "birth_datetime": {
                              "source_table": "demographics.csv",
                              "source_field": "dob"
                        },
                        "person_id": {
                              "source_table": "demographics.csv",
                              "source_field": "encrypted_id"
            