In [1]:
import pandas as pd
import requests
import json
from pandas import json_normalize

In [2]:
# Collecting subjectid of ae
ae = 'https://pharmanlp-177020.uc.r.appspot.com/api/1/StudyHack/ae/subject/list'
df_ae = requests.get(ae)
df_ae.json().keys()
df_ae=pd.DataFrame.from_dict(df_ae.json()['data'])
df_ae= df_ae.set_axis(["subid"], axis=1)

In [3]:
# Collecting subjectid of cm
cm = 'https://pharmanlp-177020.uc.r.appspot.com/api/1/StudyHack/cm/subject/list'
df_cm = requests.get(cm)
df_cm.json().keys()
df_cm=pd.DataFrame.from_dict(df_cm.json()['data'])
df_cm= df_cm.set_axis(["subid"], axis=1)

In [4]:
df_ae.shape

(728, 1)

In [5]:
df_cm.shape

(722, 1)

In [6]:
# Merging ae and cm subject id's to get the ae and cm dataset
df_list = pd.merge(df_ae, df_cm, on='subid', how='outer')

In [7]:
df_list.shape

(980, 1)

In [8]:
subid = df_list['subid'].tolist()

In [9]:
# collecting ae dataset with respect to the subjectid's
def ae_dataset():
    data_list_am = []
    for i in subid:
        ae_subid = 'https://pharmanlp-177020.uc.r.appspot.com/api/1/StudyHack/ae/subject/{}/list'.format(i)
        df_ae_subid = requests.get(ae_subid)
        df_ae_subid=json.loads(df_ae_subid.text)
        df_ae_subid=json_normalize(df_ae_subid,record_path = ['data'])
        data_list_am.append(df_ae_subid)
    data_am = pd.concat((data_list_am), ignore_index=True)
    return data_am
dfae = ae_dataset()

In [10]:
dfae.shape

(106, 88)

In [None]:
dfae.head(2)

In [12]:
# collecting cm dataset with respect to the subjectid's
def cm_subset():
    data_list_cm = []
    for i in subid:
        cm_subid = 'https://pharmanlp-177020.uc.r.appspot.com/api/1/StudyHack/cm/subject/{}/list'.format(i)
        df_cm_subid = requests.get(cm_subid)
        df_cm_subid=json.loads(df_cm_subid.text)
        df_cm_subid=json_normalize(df_cm_subid,record_path = ['data'])
        data_list_cm.append(df_cm_subid)
    data_cm = pd.concat((data_list_cm), ignore_index=True)
    return data_cm
dfcm = cm_subset()

In [13]:
# Fixing of invalid date with na
dfcm['cmstdat'] = pd.to_datetime(dfcm['cmstdat'], errors='coerce')
dfcm['cmendat'] = pd.to_datetime(dfcm['cmendat'], errors='coerce')
dfae['aestdat'] = pd.to_datetime(dfae['aestdat'], errors='coerce')
dfae['aeendat'] = pd.to_datetime(dfae['aeendat'], errors='coerce')

In [14]:
dfcm.shape

(249, 49)

In [None]:
dfcm.head(2)

## Type 1: Patients and rows for which Medication are given prior to the Adverse Events.

In [16]:
# cmstdat should start before aestdat. Comparing the aestdat and cmstdat with respect to subjectid and formidx on the domain ae and cm
df1_cm = []
df1_ae = []
for i, r1 in dfae.iterrows():
    for j, r2 in dfcm.iterrows():
        if (r1['subjectid'] == r2['subjectid']):
            if (r1['formidx'] == r2['formidx']):
                if (r1['aestdat'] > r2['cmstdat']):
                    df1_cm.append(dfcm.iloc[j])
                    df1_ae.append(dfae.iloc[i])
            else:
                continue
        else:
            continue

In [17]:
df1_cm = pd.DataFrame(df1_cm)
df1_ae = pd.DataFrame(df1_ae)
df1_ae.reset_index(inplace=True)
df1_cm.reset_index(inplace=True)

In [None]:
df1_cm.head(5)

In [19]:
df1_cm.shape

(44, 50)

## Type 2: Patients and rows for which days Medications are given and Adverse Event occur don't match.

In [20]:
# If aestdat occur before cmstdat, in that case the adverse event is mismatch 
df2_cm = []
df2_ae = []
for i, r1 in dfae.iterrows():
    for j, r2 in dfcm.iterrows():
        if (r1['subjectid'] == r2['subjectid']):
            if (r1['formidx'] == r2['formidx']):
                if (r1['aestdat'] < r2['cmstdat']):
                    df2_cm.append(dfcm.iloc[j])
                    df2_ae.append(dfae.iloc[i])
            else:
                continue
        else:
            continue

In [21]:
df2_cm = pd.DataFrame(df2_cm)
df2_ae = pd.DataFrame(df2_ae)
df2_ae.reset_index(inplace=True)
df2_cm.reset_index(inplace=True)

In [None]:
df2_ae.head(5)

In [23]:
df2_ae.shape

(22, 89)

## Type 3: Duplicate Adverse events are entered or Adverse Events overlap.

In [24]:
# Checking aestdat and aeendat on ae event to find the duplicates
df3 = dfae[dfae.duplicated(['aestdat', 'aeendat'])]

In [None]:
df3[['aestdat', 'aeendat']].head(4)

In [26]:
df3.shape

(12, 88)

## Type 4: Patients and rows which have overlapping of Concomitant medications.

In [27]:
# Checking aestdat and aeendat on ae event to find the duplicates
df4 = dfcm[dfcm.duplicated(['cmstdat', 'cmendat'])]

In [None]:
#df4 = df_cm_subid[df_cm_subid.duplicated()]
df4[['cmstdat', 'cmendat']].head(4)

In [29]:
df4.shape

(64, 49)

## Type 5: Patients for which the duration of Adverse Events is not adding up to corresponding concomitant medication. 

In [30]:
# If cmstdat starts first, but end earlier to aeendat, then the Adverse Event is not adding up
df6_cm = []
df6_ae = []
for i, r1 in dfae.iterrows():
    for j, r2 in dfcm.iterrows():
        if (r1['subjectid'] == r2['subjectid']):
            if (r1['formidx'] == r2['formidx']):
                if (r1['aestdat'] > r2['cmstdat']):
                    if (r1['aeendat'] < r2['cmendat']):
                        df6_cm.append(dfcm.iloc[j])
                        df6_ae.append(dfae.iloc[i])
                    else:
                        continue

In [31]:
df6_cm = pd.DataFrame(df6_cm)
df6_ae = pd.DataFrame(df6_ae)
df6_ae.reset_index(inplace=True)
df6_cm.reset_index(inplace=True)

In [32]:
df6_cm[['cmstdat', 'cmendat']].head(3)

Unnamed: 0,cmstdat,cmendat
0,2018-05-01,2018-05-26
1,2018-05-22,2018-09-03
2,2018-04-25,2018-05-28


In [33]:
df6_ae[['aestdat', 'aeendat']].head(3)

Unnamed: 0,aestdat,aeendat
0,2018-05-07,2018-05-19
1,2018-06-18,2018-06-24
2,2018-05-14,2018-05-21


In [34]:
df6_ae.shape

(4, 89)