# Exploratory Data Analysis of CAPACITY mapping
This notebook explores the mapping created by Louise Ferrera in excel to see how we can use this for building our conversion module from ZIB to the CAPACITY registry.

In [2]:
import pandas as pd
import xlrd
import itertools

In [3]:
mapping_path = '../../capacity_mapping.xlsx'
mapping_sheet_name = 'CAPACITY - required variables'

# The field 'Variable/Field Name' contains the formal field name in REDCap
field_name_col = 'Variable/Field Name'

In [4]:
mapping_df = pd.read_excel(mapping_path, sheet_name=mapping_sheet_name)
display(mapping_df)

Unnamed: 0,Variable/Field Name,Field Label,Fiel Attributes,Zib,Zib concept,Zib codesystem,EHR,Snomed,COD016-VEKT code,ICD-10,ATC
0,participant_identification_number_required,Participant Identification Number Pin,,,,,,,,,
1,subjid,Participant Identification Number (PIN),text,Patient,PatientIdentificationNumber,,pseudonym,,,,
2,studyid,STUDY ID,text,,,,,,,,
3,participant_identification_number_pin_required...,Comple?,"0-Incomplete,1-Unverified,2-Complete",,,,,,,,
4,inclusion_criteria_required,Inclusion Criteria,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
145,carhist_tr_date,(just if the ﬁeld: [carhist_valve(8)] = '1') t...,Text (date),Problem,ProblemStartDate,,,,,,
146,carhist_valve_treat,(just if the ﬁeld: [carhist(4)] = '1') Valvula...,1- No intervention performed; 2-Intervention p...,Procedure,,,,,,,
147,carhist_other,(just if the ﬁeld: [carhist(99)] = '1') Specif...,Text,Problem,ProblemName,,,,,,
148,carhist_other_2,(just if the ﬁeld: [carhist(99)] = '1') Specif...,Text (date),Problem,ProblemStartDate,,,,,,


In [5]:
# Focus on cardiovascular medication
carmed_mapping = mapping_df[(mapping_df[field_name_col].str.startswith('carmed'))]
display(carmed_mapping)

Unnamed: 0,Variable/Field Name,Field Label,Fiel Attributes,Zib,Zib concept,Zib codesystem,EHR,Snomed,COD016-VEKT code,ICD-10,ATC
78,carmed_examples,Cardiovascular medication classiﬁcation system...,descriptive,,,,,,,,
79,carmed,Cardiovascular medications,"0-None; 1-Betablocker; 3-Antiarrhytmic drugs, ...",FarmaceuticalProduct,AgreedMedicine::FarmaceuticalProduct,Snomed CT/ATC,,373254001 | Substance with beta adrenergic rec...,,,C07A|Beta blocking agents|C01B|ANTIARRHYTHMICS...
80,carmed_bb,(just if the field:[carmed(1)] = '1')Betablockers,1- Atenolol; 2-Bisoprolol;3-Carvedilol;4-Labet...,FarmaceuticalProduct,AgreedMedicine::FarmaceuticalProduct,Snomed CT/ATC,,387506000 | Atenolol (substance) |386868003 | ...,,,C07AB03|Atenolol|C07AB07|Bisoprolol|C07AG02|Ca...
81,carmed_bb_spec,(just if the ﬁeld[carmed_bb(99)] = '1') Specif...,text,FarmaceuticalProduct,AgreedMedicine::FarmaceuticalProduct,Snomed CT/ATC,,,,,
82,carmed_antiarrh,(just if the ﬁeld [carmed(3)] = '1') Antiarrhy...,"1- Class I; 3-Class III; 4- Class IV, 99- other",FarmaceuticalProduct,AgreedMedicine::FarmaceuticalProduct,Snomed CT/ATC,,373260001 | Class I antiarrhythmic agent (subs...,,,"C01BA|Antiarrhythmics, class Ia|C01BB|Antiarrh..."
83,carmed_arrhyth_class1,(just if the ﬁeld: [carmed_antiarrh(1)] = '1')...,1- 1a-Quinidine; 2- 1a-Disopyramide; 3- 1a-Pro...,FarmaceuticalProduct,AgreedMedicine::FarmaceuticalProduct,Snomed CT/ATC,,372697008 | Quinidine (substance) |2844004 | D...,,,C01BA01|Quinidine |C01BA03|Disopyramide|C01BA0...
84,carmed_arrhyth_class3,(just if the ﬁeld: [carmed_antiarrh(3)] = '1')...,1- Amiodarone; 2- Sotalol; 3- Ibutilide; 4- Do...,FarmaceuticalProduct,AgreedMedicine::FarmaceuticalProduct,Snomed CT/ATC,,72821002 | Amiodarone (substance) |372911006 |...,,,C01BD01|amiodarone|C07AA07|sotalol|C01BD05|ibu...
85,carmed_arrhyth_class4,(just if the ﬁeld: [carmed_antiarrh(4)] = '1')...,1-Diltiazem;2-Verapamil,FarmaceuticalProduct,AgreedMedicine::FarmaceuticalProduct,Snomed CT/ATC,,72793000 | Diltiazem (substance) |72754009 | V...,,,C08DB01|diltiazem|C08DA01|verapamil
86,carmed_antiarrh_spec,( just if the field [carmed_antiarrh(99)]=1) S...,text,FarmaceuticalProduct,AgreedMedicine::FarmaceuticalProduct,Snomed CT/ATC,,,,,
87,carmed_diuretic,(just if the ﬁeld: [carmed(5)] = '1')Diuretics,1-Bumetanide(Burinex);2-Chlortalidon;3-Furosem...,FarmaceuticalProduct,AgreedMedicine::FarmaceuticalProduct,Snomed CT/ATC,,387498005 | Bumetanide (substance) |387324004 ...,,,C03CA02|bumetanide|C03BA04|chlortalidone|C03CA...


In [6]:
carmed_mapping[[field_name_col, 'ATC']]

Unnamed: 0,Variable/Field Name,ATC
78,carmed_examples,
79,carmed,C07A|Beta blocking agents|C01B|ANTIARRHYTHMICS...
80,carmed_bb,C07AB03|Atenolol|C07AB07|Bisoprolol|C07AG02|Ca...
81,carmed_bb_spec,
82,carmed_antiarrh,"C01BA|Antiarrhythmics, class Ia|C01BB|Antiarrh..."
83,carmed_arrhyth_class1,C01BA01|Quinidine |C01BA03|Disopyramide|C01BA0...
84,carmed_arrhyth_class3,C01BD01|amiodarone|C07AA07|sotalol|C01BD05|ibu...
85,carmed_arrhyth_class4,C08DB01|diltiazem|C08DA01|verapamil
86,carmed_antiarrh_spec,
87,carmed_diuretic,C03CA02|bumetanide|C03BA04|chlortalidone|C03CA...


There is a lot of information embedded int the ATC column. For the fields with multiple choice answers every ATC code is paired with its official name. We need to unpack this information and isolate the ATC codes.

In [7]:
atc_mappings = carmed_mapping['ATC'][carmed_mapping['ATC'].notnull()]

# Turn pipe-separated data into list
atc_mappings = atc_mappings.str.split('|')

# Remove whitespace
atc_mappings = atc_mappings.apply(lambda x: [y for y in x if y and not y.isspace()])

# Pair codes with their official name
atc_mappings = atc_mappings.apply(lambda x: [tuple(x[i:i+2]) for i in range(0, len(x), 2)])

display(atc_mappings)
atc_codes = atc_mappings.apply(lambda x: [t[0] for t in x])
display(atc_codes)

79     [(C07A, Beta blocking agents), (C01B, ANTIARRH...
80     [(C07AB03, Atenolol), (C07AB07, Bisoprolol), (...
82     [(C01BA, Antiarrhythmics, class Ia), (C01BB, A...
83     [(C01BA01, Quinidine ), (C01BA03, Disopyramide...
84     [(C01BD01, amiodarone), (C07AA07, sotalol), (C...
85          [(C08DB01, diltiazem), (C08DA01, verapamil)]
87     [(C03CA02, bumetanide), (C03BA04, chlortalidon...
89     [(C08CA01, amlodipine), (C08CA12, barnidipine)...
91     [(C08CA05, captopril), (C09AA02,  enalapril), ...
95     [(C09CA06, Candesartan), (C09CA04, irbesartan)...
99     [(C03DA01, spironolactone), (C03DA04, eplereno...
100    [(G04BE10, avanafil), (G04BE03, sildenafil), (...
101    [(B01AC06, acetylsalicylic acid), (B01AC08,  c...
103    [(B01AA07, acenocoumarol), (B01AA04, phenproco...
104    [(B01AF02, apixaban), (B01AE07, dabigatran ete...
106    [(C10AA05, Atorvastatin), (C10AB08, ciprofibra...
108    [(A10BB09, gliclazide), (A10BB12, glimepiride)...
Name: ATC, dtype: object

79     [C07A, C01B, C01AA05, C03, C08, C09A , C09C, C...
80     [C07AB03, C07AB07, C07AG02, C07AG01, C07AB02, ...
82                     [C01BA, C01BB, C01BC, C01BD, C08]
83     [C01BA01, C01BA03, C01BA02, C01BB01, N03AB02, ...
84                  [C01BD01, C07AA07, C01BD05, C01BD04]
85                                    [C08DB01, C08DA01]
87                  [C03CA02, C03BA04, C03CA01, C03AA03]
89                           [C08CA01, C08CA12, C08CA05]
91                  [C08CA05, C09AA02, C09AA03, C09AA04]
95                  [C09CA06, C09CA04, C09CA01, C09CA03]
99                                    [C03DA01, C03DA04]
100                 [G04BE10, G04BE03, G04BE08, G04BE09]
101    [B01AC06, B01AC08, B01AC04, B01AC07, B01AC22, ...
103                                   [B01AA07, B01AA04]
104                 [B01AF02, B01AE07, B01AF03, B01AF01]
106    [C10AA05, C10AB08, C10AX13, C10AX09, C10AB04, ...
108                   [A10BB09, A10BB12, A10BA02, ?????]
Name: ATC, dtype: object