Notebook to process the levodopa equivalent dose information from PPMI.

In [1]:
import numpy as np
import pandas as pd
from calc_ledd import calc_ledd

### Demographics
Use preprocessed demographics. Enrolled patients have an enroll date that is not null.

In [2]:
path = ''
demo = pd.read_csv(path+'processed/demographics.csv') #note that this is the processed data created by 'Demographics_Data_Processing.ipynb
demo

Unnamed: 0,PATNO,ENROLL_DATE,ENROLL_AGE,ENROLL_STATUS,STATUS_DATE,COHORT,APPRDX,BIRTHDT,SEX,DOMSIDE
0,3000,02/2011,69.1,enrolled,05/2021,2.0,2.0,12/1941,0.0,
1,3001,03/2011,65.1,enrolled,09/2021,1.0,1.0,01/1946,1.0,2.0
2,3002,03/2011,67.6,enrolled,09/2021,1.0,1.0,08/1943,0.0,1.0
3,3003,04/2011,56.7,enrolled,01/2022,1.0,1.0,07/1954,0.0,2.0
4,3004,04/2011,59.4,enrolled,01/2022,2.0,2.0,11/1951,1.0,
...,...,...,...,...,...,...,...,...,...,...
2018,173487,10/2022,70.9,enrolled,10/2022,1.0,,12/1951,0.0,
2019,174364,10/2022,70.1,enrolled,10/2022,1.0,,09/1952,1.0,
2020,174811,10/2022,76.0,enrolled,10/2022,4.0,,10/1946,0.0,
2021,174855,10/2022,64.2,enrolled,10/2022,1.0,,08/1958,1.0,


### Concomitant Medications

In [3]:
con_med = pd.read_csv(path + 'LEDD_Concomitant_Medication_Log.csv')
con_med.head(4)

Unnamed: 0,REC_ID,PATNO,EVENT_ID,PAG_NAME,INFODT,SEQNO,LEDTRT,LEDDSTRMG,LEDDOSSTR,LEDDOSE,LEDDOSFRQ,STARTDT,STOPDT,LEDD,ORIG_ENTRY,LAST_UPDATE
0,501974201,3001,LOG,CMED,,18,SINEMET 25/100,,,,,12/2014,03/2015,300.0,02/2015,2020-06-25 16:04:31.0
1,413441801,3001,LOG,CMED,,14,ROTIGOTINE TRANSDERMAL SYSTEM,,,,,09/2013,10/2013,121.2,10/2013,2020-06-25 16:04:31.0
2,370113101,3001,LOG,CMED,,11,SELEGILINE,,,,,08/2012,06/2020,100.0,11/2012,2022-09-13 07:29:02.0
3,516506501,3001,LOG,CMED,,19,SINEMET 25/100,,,,,03/2015,03/2016,400.0,04/2015,2020-06-25 16:04:31.0


DISMED is a flag as to whether or not the medication is for PD. LEDD contains the levodopa equivalent daily dose (LEDD). 

Data we're intested in PATNO, EVENT_ID, LEDD, STARTDT, STARTEST, STOPDT, STOPEST, CMTRT, CMDOSE, CMDOSU, ONGOING for only PD medications and patients who are enrolled.

In [4]:
# cm = con_med[(con_med.DISMED==1) | ((con_med.DISMED.isnull()) & (~con_med.LEDD.isnull()))].merge(enrol_pat[['APPRDX','PATNO']], on=["PATNO"])
con_med = con_med[(~con_med.LEDD.isnull())]
con_med

Unnamed: 0,REC_ID,PATNO,EVENT_ID,PAG_NAME,INFODT,SEQNO,LEDTRT,LEDDSTRMG,LEDDOSSTR,LEDDOSE,LEDDOSFRQ,STARTDT,STOPDT,LEDD,ORIG_ENTRY,LAST_UPDATE
0,501974201,3001,LOG,CMED,,18,SINEMET 25/100,,,,,12/2014,03/2015,300.00,02/2015,2020-06-25 16:04:31.0
1,413441801,3001,LOG,CMED,,14,ROTIGOTINE TRANSDERMAL SYSTEM,,,,,09/2013,10/2013,121.20,10/2013,2020-06-25 16:04:31.0
2,370113101,3001,LOG,CMED,,11,SELEGILINE,,,,,08/2012,06/2020,100.00,11/2012,2022-09-13 07:29:02.0
3,516506501,3001,LOG,CMED,,19,SINEMET 25/100,,,,,03/2015,03/2016,400.00,04/2015,2020-06-25 16:04:31.0
4,733300401,3001,LOG,CMED,,31,AMANTADINE,,,,,07/2018,06/2020,200.00,03/2019,2022-09-13 07:29:02.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6654,dd2eb794-3024-4c68-9c05-aebe909ddcde,150110,SC,LEDDLOG,08/2022,1,Rytary (Carbidopa/Levodopa),95.000,23.75/95 mg PO,2.0,2.0,07/2022,,266.0,08/2022,2022-08-09 00:00:00.0
6655,e3c72762-90ec-48c4-9279-9f0787c9b621,158434,SC,LEDDLOG,06/2022,1,Azilect (Rasagiline),1.000,1 mg PO,1.0,1.0,03/2022,06/2022,100.0,07/2022,2022-07-14 00:00:00.0
6656,25745afa-1ee4-4c8a-91dd-794802b03eb4,158434,SC,LEDDLOG,06/2022,0,Pramipexole ER,0.750,0.75 mg PO,1.0,1.0,03/2022,06/2022,75.0,07/2022,2022-07-14 00:00:00.0
6657,d01cd45e-6dd8-4fcc-9da9-a395ba56cb28,158449,SC,LEDDLOG,06/2022,0,Sifrol (Pramipexole),0.125,0.125 mg PO,1.0,1.0,12/2021,01/2022,12.5,06/2022,2022-06-28 00:00:00.0


In [5]:
cm=con_med.merge(demo[['COHORT','PATNO']], on=["PATNO"]) 
len(cm)

6542

In [7]:
PD_records = cm[cm.COHORT==1].groupby('PATNO')

In [8]:
counts_per_patient = PD_records.STARTDT.count().value_counts(sort=False)
PD_pat = demo[demo.COHORT==1.0]


Calculate LEDD for each patno date combination in the signature form

In [11]:
signature = pd.read_csv(path + 'processed/pd_on.csv')
signature.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,PATNO,EVENT_ID,INFODT,NP1COG,NP1HALL,NP1DPRS,NP1ANXS,NP1APAT,NP1DDS,NP1RTOT,...,DATSCAN_DATE,DATSCAN_CAUDATE_R,DATSCAN_CAUDATE_L,DATSCAN_PUTAMEN_R,DATSCAN_PUTAMEN_L,DATSCAN_PUTAMEN_R_ANT,DATSCAN_PUTAMEN_L_ANT,DATSCAN_ANALYZED,DATSCAN_NOT_ANALYZED_REASON,DATSCAN_OTHER_SPECIFY
0,3001,BL,03/2011,0,0,0,0,0,0,0.0,...,,,,,,,,,,
1,3001,R17,11/2021,2,1,2,2,2,0,9.0,...,,,,,,,,,,
2,3001,R18,09/2022,2,1,1,2,3,1,10.0,...,,,,,,,,,,
3,3001,SC,02/2011,0,0,0,1,0,0,1.0,...,,,,,,,,,,
4,3001,V01,05/2011,0,0,0,0,0,0,0.0,...,,,,,,,,,,


In [12]:
ledd_processed = pd.DataFrame(columns=['PATNO','EVENT_ID','INFODT','LEDD'])
cm.STARTDT = pd.to_datetime(cm.STARTDT)
cm.STOPDT = pd.to_datetime(cm.STOPDT)

In [21]:
cm[cm.PATNO==3001]

Unnamed: 0,REC_ID,PATNO,EVENT_ID,PAG_NAME,INFODT,SEQNO,LEDTRT,LEDDSTRMG,LEDDOSSTR,LEDDOSE,LEDDOSFRQ,STARTDT,STOPDT,LEDD,ORIG_ENTRY,LAST_UPDATE,COHORT
0,501974201,3001,LOG,CMED,,18,SINEMET 25/100,,,,,2014-12-01,2015-03-01,300.0,02/2015,2020-06-25 16:04:31.0,1.0
1,413441801,3001,LOG,CMED,,14,ROTIGOTINE TRANSDERMAL SYSTEM,,,,,2013-09-01,2013-10-01,121.2,10/2013,2020-06-25 16:04:31.0,1.0
2,370113101,3001,LOG,CMED,,11,SELEGILINE,,,,,2012-08-01,2020-06-01,100.0,11/2012,2022-09-13 07:29:02.0,1.0
3,516506501,3001,LOG,CMED,,19,SINEMET 25/100,,,,,2015-03-01,2016-03-01,400.0,04/2015,2020-06-25 16:04:31.0,1.0
4,733300401,3001,LOG,CMED,,31,AMANTADINE,,,,,2018-07-01,2020-06-01,200.0,03/2019,2022-09-13 07:29:02.0,1.0
5,468228001,3001,LOG,CMED,,15,ROTIGOTINE TRANSDERMAL SYSTEM,,,,,2013-10-01,2014-12-01,181.8,10/2014,2020-06-25 16:04:31.0,1.0
6,413441701,3001,LOG,CMED,,13,ROTIGOTINE TRANSDERMAL SYSTEM,,,,,2013-08-01,2013-09-01,60.6,10/2013,2020-06-25 16:04:31.0,1.0
7,370113001,3001,LOG,CMED,,10,SELEGILINE,,,,,2012-07-01,2012-08-01,50.0,11/2012,2020-06-25 16:04:31.0,1.0
8,559553601,3001,LOG,CMED,,20,SINEMET 25/100,,,,,2016-03-01,2020-06-01,600.0,03/2016,2022-09-13 07:29:02.0,1.0
9,6dda354e-db2b-4ecf-a057-a2044548395e,3001,V18,LEDDLOG,07/2022,1,Carbidopa/Levodopa CR,100.0,25/100 mg PO,1.0,7.0,2017-01-01,NaT,490.0,12/2021,2022-01-06 00:00:00.0,1.0


In [15]:
for idx in signature.index:
    pn = signature.loc[idx].PATNO
    ei = signature.loc[idx].EVENT_ID
    idt = signature.loc[idx].INFODT
    ledd = calc_ledd(cm, pn, pd.to_datetime(idt))
    ledd_processed = ledd_processed.append({'PATNO':pn, 'EVENT_ID':ei, 'INFODT': idt, 'LEDD':ledd},
                                   ignore_index=True)

In [17]:
ledd_processed.drop_duplicates(inplace=True)

In [None]:
ledd_processed.to_csv('ledd.csv')