Date Created - 5th September 2020

Change log - Updating the code to
1. Catch the event data when available
2. Drop the patients at risk data
3. Decode the arm names, for example EG000 to 'Dupilumab 300 mg qw'

In [1]:
import pandas as pd
import numpy as np
import requests
import datetime
import json
from pandas.io.json import json_normalize
import xlrd

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
nctid = 'NCT01859988'

In [4]:
def decode_event_group(coded_value, event_group):
    '''
    Decode arm names, example - EG000 to 'Dupilumab 300 mg qw'
    '''
    decoded_value = event_group[event_group.EventGroupId==coded_value]['EventGroupTitle'].values[0]
    if not decoded_value:
        decoded_value = coded_value
    return decoded_value

def get_oae(nctid):
    # Get CT.gov data on the NCTID
    URL = f'https://clinicaltrials.gov/api/query/full_studies?expr={nctid}&max_rnk=1&fmt=JSON'
    r = requests.get(URL)
    j = json.loads(r.content)
    # Other AE data
    tt = j['FullStudiesResponse']['FullStudies'][0]['Study']['ResultsSection']['AdverseEventsModule']['OtherEventList']['OtherEvent']
    event_groups = pd.json_normalize(j['FullStudiesResponse']['FullStudies'][0]['Study']['ResultsSection']['AdverseEventsModule']['EventGroupList']['EventGroup'])
    # convert into tabular format
    tt2 = pd.json_normalize(tt,
              ['OtherEventStatsList','OtherEventStats'],
              ['OtherEventTerm', 'OtherEventOrganSystem'],
              errors='ignore')
    # convert into multi-indexed column
    #if tt[0]['OtherEventStatsList']['OtherEventStats'][0]['OtherEventStatsNumEvents']:
    try:
        tt3 = tt2.pivot(columns='OtherEventStatsGroupId',
        values=['OtherEventStatsNumAffected','OtherEventStatsNumEvents'],
        index='OtherEventTerm')
        tt3.rename(columns={'OtherEventStatsNumEvents':'Events'}, inplace=True, level=0)
    except KeyError:
        tt3 = tt2.pivot(columns='OtherEventStatsGroupId',
        values=['OtherEventStatsNumAffected'],
        index='OtherEventTerm')
    tt3.rename(columns=lambda x: decode_event_group(x,event_groups), inplace=True, level=1)
    tt3.rename(columns={'OtherEventStatsNumAffected':'Subjects'}, inplace=True, level=0)
    return(tt3)

def get_sae(nctid):
    # Get CT.gov data on the NCTID
    URL = f'https://clinicaltrials.gov/api/query/full_studies?expr={nctid}&max_rnk=1&fmt=JSON'
    r = requests.get(URL)
    j = json.loads(r.content)
    # Other AE data
    tt = j['FullStudiesResponse']['FullStudies'][0]['Study']['ResultsSection']['AdverseEventsModule']['SeriousEventList']['SeriousEvent']
    event_groups = pd.json_normalize(j['FullStudiesResponse']['FullStudies'][0]['Study']['ResultsSection']['AdverseEventsModule']['EventGroupList']['EventGroup'])
    # convert into tabular format
    tt2 = pd.json_normalize(tt,
              ['SeriousEventStatsList','SeriousEventStats'],
              ['SeriousEventTerm', 'SeriousEventOrganSystem'],
              errors='ignore')
    # convert into multi-indexed column
    try:
        tt3 = tt2.pivot(columns='SeriousEventStatsGroupId',
                    values=['SeriousEventStatsNumAffected','SeriousEventStatsNumEvents'],
                    index='SeriousEventTerm')
        tt3.rename(columns={'SeriousEventStatsNumEvents':'Events'}, inplace=True, level=0)
    except KeyError:
        tt3 = tt2.pivot(columns='SeriousEventStatsGroupId',
                    values=['SeriousEventStatsNumAffected'],
                    index='SeriousEventTerm')
    tt3.rename(columns=lambda x: decode_event_group(x,event_groups), inplace=True, level=1)
    tt3.rename(columns={'SeriousEventStatsNumAffected':'Subjects'}, inplace=True, level=0)
    return(tt3)

In [5]:
#event_groups = pd.json_normalize(j['FullStudiesResponse']['FullStudies'][0]['Study']['ResultsSection']['AdverseEventsModule']['EventGroupList']['EventGroup'])
#event_groups.iloc[:,:3]

In [6]:
#decode_event_group(coded_value = 'EG000', event_group=event_groups)

In [7]:
nctid = 'NCT01859988'

In [8]:
get_sae(nctid)

Unnamed: 0_level_0,Subjects,Subjects,Subjects,Subjects,Subjects,Subjects,Events,Events,Events,Events,Events,Events
SeriousEventStatsGroupId,Dupilumab 300 mg qw,Dupilumab 300 mg q2w,Dupilumab 200 mg q2w,Dupilumab 300 mg q4w,Dupilumab 100 mg q4w,Placebo,Dupilumab 300 mg qw,Dupilumab 300 mg q2w,Dupilumab 200 mg q2w,Dupilumab 300 mg q4w,Dupilumab 100 mg q4w,Placebo
SeriousEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Abortion induced,0,0,0,0,0,1,0,0,0,0,0,1
Anaphylactic shock,0,0,1,0,0,0,0,0,1,0,0,0
Asthma,0,0,0,0,1,0,0,0,0,0,1,0
Cellulitis,0,0,0,0,1,0,0,0,0,0,1,0
Dermatitis atopic,0,1,0,0,4,1,0,1,0,0,5,1
Dermatitis exfoliative,0,0,0,1,0,0,0,0,0,1,0,0
Hip dysplasia,0,0,0,0,0,1,0,0,0,0,0,1
Osteonecrosis,0,0,0,0,0,1,0,0,0,0,0,1
Peritonsillar abscess,0,0,0,1,0,0,0,0,0,1,0,0
Respiratory failure,0,0,1,0,0,0,0,0,1,0,0,0


In [9]:
tt = get_oae(nctid)
tt

Unnamed: 0_level_0,Subjects,Subjects,Subjects,Subjects,Subjects,Subjects,Events,Events,Events,Events,Events,Events
OtherEventStatsGroupId,Dupilumab 300 mg qw,Dupilumab 300 mg q2w,Dupilumab 200 mg q2w,Dupilumab 300 mg q4w,Dupilumab 100 mg q4w,Placebo,Dupilumab 300 mg qw,Dupilumab 300 mg q2w,Dupilumab 200 mg q2w,Dupilumab 300 mg q4w,Dupilumab 100 mg q4w,Placebo
OtherEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Abdominal pain upper,1,2,0,0,4,1,1,2,0,0,4,1
Arthralgia,1,4,4,1,1,0,1,4,5,1,1,0
Back pain,2,2,0,2,3,5,16,3,0,5,3,5
Blood triglycerides increased,0,1,0,4,0,0,0,1,0,5,0,0
Conjunctivitis,4,1,0,1,0,0,5,1,0,1,0,0
Conjunctivitis allergic,3,2,6,3,1,2,5,4,9,3,1,2
Cough,4,4,2,1,0,1,4,4,2,1,0,1
Dermatitis atopic,8,13,8,10,11,10,9,19,10,12,13,12
Fatigue,2,1,1,4,0,3,2,1,1,4,0,3
Headache,8,5,9,5,7,2,40,13,24,6,18,2


In [10]:
tt['Subjects']

OtherEventStatsGroupId,Dupilumab 300 mg qw,Dupilumab 300 mg q2w,Dupilumab 200 mg q2w,Dupilumab 300 mg q4w,Dupilumab 100 mg q4w,Placebo
OtherEventTerm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Abdominal pain upper,1,2,0,0,4,1
Arthralgia,1,4,4,1,1,0
Back pain,2,2,0,2,3,5
Blood triglycerides increased,0,1,0,4,0,0
Conjunctivitis,4,1,0,1,0,0
Conjunctivitis allergic,3,2,6,3,1,2
Cough,4,4,2,1,0,1
Dermatitis atopic,8,13,8,10,11,10
Fatigue,2,1,1,4,0,3
Headache,8,5,9,5,7,2


### Test 2

In [11]:
nctid = 'NCT02369484'

In [12]:
get_oae(nctid)

Unnamed: 0_level_0,Subjects
OtherEventStatsGroupId,Afatinib
OtherEventTerm,Unnamed: 1_level_2
Abdominal pain,3
Alopecia,1
Anemia,2
Arthalgia,1
Asparate aminotransferase increased,1
Back pain,1
Bladder infection,1
Bone pain,1
Constipation,1
Cough,2


In [13]:
get_sae(nctid)

Unnamed: 0_level_0,Subjects
SeriousEventStatsGroupId,Afatinib
SeriousEventTerm,Unnamed: 1_level_2
Acute kidney injury,1
Dehydration,1
Diarrhea,1
Dyspnea,1
Epistaxis,1
Febrile neutropenia,1
Muscle weakness lower limb,1
Pericardial effusion,1
Pleural effusion,1


## Test 3

In [14]:
nctid = 'NCT00770588'

In [15]:
get_oae(nctid)

Unnamed: 0_level_0,Subjects,Subjects
OtherEventStatsGroupId,Gefitinib,Placebo
OtherEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2
Alanine Aminotransferase Increased,31,12
aspartate aminotransferase increased,21,6
cough,9,20
diarrhoea,37,13
dry skin,9,3
pruritus,10,7
rash,73,14
serious hepatic dysfunction,43,16
skin exfoliation,9,0
transaminases increased,8,2


In [16]:
get_sae(nctid)

Unnamed: 0_level_0,Subjects,Subjects
SeriousEventStatsGroupId,Gefitinib,Placebo
SeriousEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2
Accidental Death,0,1
Alanine Aminotransferase Increased,0,1
Arterial Thrombosis Limb,1,0
Aspartate Aminotransferase Increased,0,1
Circulatory Collapse,1,0
Completed Suicide,0,1
Death,1,0
Haemoptysis,1,0
Interstitial Lung Disease,2,0
Lung Infection,2,0


In [17]:
get_sae(nctid)

Unnamed: 0_level_0,Subjects,Subjects
SeriousEventStatsGroupId,Gefitinib,Placebo
SeriousEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2
Accidental Death,0,1
Alanine Aminotransferase Increased,0,1
Arterial Thrombosis Limb,1,0
Aspartate Aminotransferase Increased,0,1
Circulatory Collapse,1,0
Completed Suicide,0,1
Death,1,0
Haemoptysis,1,0
Interstitial Lung Disease,2,0
Lung Infection,2,0


### Test 4

In [18]:
nctid = 'NCT00820755'

In [19]:
get_oae(nctid)

Unnamed: 0_level_0,Subjects,Subjects,Subjects
OtherEventStatsGroupId,Cetuximab 250 mg/m^2 q1w + Platinum-based Doublet Chemotherapy,Cetuximab 500 mg/m^2 Every 2 Weeks,Cetuximab 250 mg/m^2 Weekly
OtherEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Abdominal pain,21,0,0
Abdominal pain upper,31,0,0
Acne,31,0,0
Alanine aminotransferase increased,26,0,0
Alopecia,109,0,0
Anaemia,134,9,15
Arthralgia,33,5,11
Asthenia,124,6,16
Back pain,30,6,8
Bone pain,0,2,8


In [20]:
get_sae(nctid)

Unnamed: 0_level_0,Subjects,Subjects,Subjects
SeriousEventStatsGroupId,Cetuximab 250 mg/m^2 q1w + Platinum-based Doublet Chemotherapy,Cetuximab 500 mg/m^2 Every 2 Weeks,Cetuximab 250 mg/m^2 Weekly
SeriousEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Abdominal distension,1,0,0
Abdominal pain,2,0,0
Acute myocardial infarction,1,0,0
Acute respiratory failure,2,0,0
Adrenal insufficiency,0,0,1
Alanine aminotransferase increased,1,0,1
Altered state of consciousness,0,1,0
Anaemia,6,1,1
Anaphylactic reaction,3,0,0
Anaphylactic shock,1,0,0
