Date Created - 13th September 2020

Date Created - 4th October 2020

Change log - Updating the code to
1. Catch the event data when available
2. Add the patients at risk data
3. Validation for missing AEs, no results
4. Validation for duplicates in pivot functions
5. Calculating Sparsity
6. Tested on 1000 studies (NCTIDs)

## Basics

In [1]:
import pandas as pd
import numpy as np
import requests
import datetime
import json
from pandas.io.json import json_normalize
import xlrd

In [2]:
import plotly.express as px 
import plotly.graph_objects as go

import dash  
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import dash_table_experiments as dt

In [3]:
import dash_table

In [4]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [5]:
nctid = 'NCT01859988'

## AE Fetching Functions

In [6]:
def decode_event_group(coded_value, event_group):
    '''
    Decode arm names, example - EG000 to 'Dupilumab 300 mg qw'
    '''
    decoded_value = event_group[event_group.EventGroupId==coded_value]['EventGroupTitle'].values[0]
    if not decoded_value:
        decoded_value = coded_value
    return decoded_value

def get_oae(nctid):
    # Get CT.gov data on the NCTID
    URL = f'https://clinicaltrials.gov/api/query/full_studies?expr={nctid}&max_rnk=1&fmt=JSON'
    r = requests.get(URL)
    j = json.loads(r.content)
    # Other AE data
    tt = j['FullStudiesResponse']['FullStudies'][0]['Study']['ResultsSection']['AdverseEventsModule']['OtherEventList']['OtherEvent']
    event_groups = pd.json_normalize(j['FullStudiesResponse']['FullStudies'][0]['Study']['ResultsSection']['AdverseEventsModule']['EventGroupList']['EventGroup'])
    # convert into tabular format
    tt2 = pd.json_normalize(tt,
              ['OtherEventStatsList','OtherEventStats'],
              ['OtherEventTerm', 'OtherEventOrganSystem'],
              errors='ignore')
    # In rare cases there could be multiple entries for a single arm + Event Term e.g.NCT01425281
    tt2 = tt2.drop_duplicates(subset=['OtherEventStatsGroupId',                                  
                                  'OtherEventStatsNumAtRisk',
                                  'OtherEventTerm'], keep='last')
    # convert into multi-indexed column
    try:
        tt2 = tt2.drop_duplicates(subset=['OtherEventStatsGroupId','OtherEventStatsNumAffected','OtherEventStatsNumEvents','OtherEventStatsNumAtRisk','OtherEventTerm'])
        tt3 = tt2.pivot(columns='OtherEventStatsGroupId',
        values=['OtherEventStatsNumAffected','OtherEventStatsNumEvents','OtherEventStatsNumAtRisk'],
        index='OtherEventTerm')
        tt3.rename(columns={'OtherEventStatsNumEvents':'Events'}, inplace=True, level=0)
    except KeyError:
        tt3 = tt2.pivot(columns='OtherEventStatsGroupId',
        values=['OtherEventStatsNumAffected','OtherEventStatsNumAtRisk'],
        index='OtherEventTerm')
    tt3.rename(columns=lambda x: decode_event_group(x,event_groups), inplace=True, level=1)
    tt3.rename(columns={'OtherEventStatsNumAffected':'Subjects','OtherEventStatsNumAtRisk':'Total_Subjects'}, inplace=True, level=0)
    return(tt3)

def get_sae(nctid):
    # Get CT.gov data on the NCTID
    URL = f'https://clinicaltrials.gov/api/query/full_studies?expr={nctid}&max_rnk=1&fmt=JSON'
    r = requests.get(URL)
    j = json.loads(r.content)
    # Other AE data
    tt = j['FullStudiesResponse']['FullStudies'][0]['Study']['ResultsSection']['AdverseEventsModule']['SeriousEventList']['SeriousEvent']
    event_groups = pd.json_normalize(j['FullStudiesResponse']['FullStudies'][0]['Study']['ResultsSection']['AdverseEventsModule']['EventGroupList']['EventGroup'])
    # convert into tabular format
    tt2 = pd.json_normalize(tt,
              ['SeriousEventStatsList','SeriousEventStats'],
              ['SeriousEventTerm', 'SeriousEventOrganSystem'],
              errors='ignore')
    # In rare cases there could be multiple entries for a single arm + Event Term e.g NCT01425281
    tt2 = tt2.drop_duplicates(subset=['SeriousEventStatsGroupId',                                  
                                  'SeriousEventStatsNumAtRisk',
                                  'SeriousEventTerm'], keep='last')
    # convert into multi-indexed column
    try:
        tt3 = tt2.pivot(columns='SeriousEventStatsGroupId',
                    values=['SeriousEventStatsNumAffected','SeriousEventStatsNumEvents','SeriousEventStatsNumAtRisk'],
                    index='SeriousEventTerm')
        tt3.rename(columns={'SeriousEventStatsNumEvents':'Events'}, inplace=True, level=0)
    except KeyError:
        tt3 = tt2.pivot(columns='SeriousEventStatsGroupId',
                    values=['SeriousEventStatsNumAffected','SeriousEventStatsNumAtRisk'],
                    index='SeriousEventTerm')
    tt3.rename(columns=lambda x: decode_event_group(x,event_groups), inplace=True, level=1)
    tt3.rename(columns={'SeriousEventStatsNumAffected':'Subjects','SeriousEventStatsNumAtRisk':'Total_Subjects'}, inplace=True, level=0)
    return(tt3)

def get_ae_summary(nctid):
    # Get CT.gov data on the NCTID
    URL = f'https://clinicaltrials.gov/api/query/full_studies?expr={nctid}&max_rnk=1&fmt=JSON'
    r = requests.get(URL)
    j = json.loads(r.content)
    tt = pd.json_normalize(j['FullStudiesResponse']['FullStudies'][0]['Study']['ResultsSection']['AdverseEventsModule']['EventGroupList']['EventGroup'])
    for cols in ['EventGroupSeriousNumAffected', 'EventGroupSeriousNumAtRisk', 'EventGroupOtherNumAffected']:
        tt[cols] = tt[cols].apply(pd.to_numeric, errors='coerce')
    return tt.sum(axis = 0, skipna = True, numeric_only = True) 

## Testing

In [9]:
URL = f'https://clinicaltrials.gov/api/query/full_studies?expr={nctid}&max_rnk=1&fmt=JSON'
r = requests.get(URL)
j = json.loads(r.content)
# Other AE data
j['FullStudiesResponse']['FullStudies'][0]['Study']['ResultsSection']['AdverseEventsModule']['OtherEventList']

{'OtherEvent': [{'OtherEventTerm': 'Conjunctivitis',
   'OtherEventOrganSystem': 'Eye disorders',
   'OtherEventSourceVocabulary': 'meddra (16.0)',
   'OtherEventAssessmentType': 'Systematic Assessment',
   'OtherEventStatsList': {'OtherEventStats': [{'OtherEventStatsGroupId': 'EG000',
      'OtherEventStatsNumEvents': '5',
      'OtherEventStatsNumAffected': '4',
      'OtherEventStatsNumAtRisk': '63'},
     {'OtherEventStatsGroupId': 'EG001',
      'OtherEventStatsNumEvents': '1',
      'OtherEventStatsNumAffected': '1',
      'OtherEventStatsNumAtRisk': '64'},
     {'OtherEventStatsGroupId': 'EG002',
      'OtherEventStatsNumEvents': '0',
      'OtherEventStatsNumAffected': '0',
      'OtherEventStatsNumAtRisk': '61'},
     {'OtherEventStatsGroupId': 'EG003',
      'OtherEventStatsNumEvents': '1',
      'OtherEventStatsNumAffected': '1',
      'OtherEventStatsNumAtRisk': '65'},
     {'OtherEventStatsGroupId': 'EG004',
      'OtherEventStatsNumEvents': '0',
      'OtherEventStatsNumA

In [10]:
import seaborn as sns

df = sns.load_dataset('iris')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [11]:
tt = df.iloc[:,:4].sum(axis = 0, skipna = True)
tt.shape

(4,)

### Test 1

In [12]:
nctid = 'NCT01859988'

In [13]:
get_sae(nctid)

Unnamed: 0_level_0,Subjects,Subjects,Subjects,Subjects,Subjects,Subjects,Events,Events,Events,Events,Events,Events,Total_Subjects,Total_Subjects,Total_Subjects,Total_Subjects,Total_Subjects,Total_Subjects
SeriousEventStatsGroupId,Dupilumab 300 mg qw,Dupilumab 300 mg q2w,Dupilumab 200 mg q2w,Dupilumab 300 mg q4w,Dupilumab 100 mg q4w,Placebo,Dupilumab 300 mg qw,Dupilumab 300 mg q2w,Dupilumab 200 mg q2w,Dupilumab 300 mg q4w,Dupilumab 100 mg q4w,Placebo,Dupilumab 300 mg qw,Dupilumab 300 mg q2w,Dupilumab 200 mg q2w,Dupilumab 300 mg q4w,Dupilumab 100 mg q4w,Placebo
SeriousEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
Abortion induced,0,0,0,0,0,1,0,0,0,0,0,1,63,64,61,65,65,61
Anaphylactic shock,0,0,1,0,0,0,0,0,1,0,0,0,63,64,61,65,65,61
Asthma,0,0,0,0,1,0,0,0,0,0,1,0,63,64,61,65,65,61
Cellulitis,0,0,0,0,1,0,0,0,0,0,1,0,63,64,61,65,65,61
Dermatitis atopic,0,1,0,0,4,1,0,1,0,0,5,1,63,64,61,65,65,61
Dermatitis exfoliative,0,0,0,1,0,0,0,0,0,1,0,0,63,64,61,65,65,61
Hip dysplasia,0,0,0,0,0,1,0,0,0,0,0,1,63,64,61,65,65,61
Osteonecrosis,0,0,0,0,0,1,0,0,0,0,0,1,63,64,61,65,65,61
Peritonsillar abscess,0,0,0,1,0,0,0,0,0,1,0,0,63,64,61,65,65,61
Respiratory failure,0,0,1,0,0,0,0,0,1,0,0,0,63,64,61,65,65,61


In [14]:
tt = get_oae(nctid)
tt

Unnamed: 0_level_0,Subjects,Subjects,Subjects,Subjects,Subjects,Subjects,Events,Events,Events,Events,Events,Events,Total_Subjects,Total_Subjects,Total_Subjects,Total_Subjects,Total_Subjects,Total_Subjects
OtherEventStatsGroupId,Dupilumab 300 mg qw,Dupilumab 300 mg q2w,Dupilumab 200 mg q2w,Dupilumab 300 mg q4w,Dupilumab 100 mg q4w,Placebo,Dupilumab 300 mg qw,Dupilumab 300 mg q2w,Dupilumab 200 mg q2w,Dupilumab 300 mg q4w,Dupilumab 100 mg q4w,Placebo,Dupilumab 300 mg qw,Dupilumab 300 mg q2w,Dupilumab 200 mg q2w,Dupilumab 300 mg q4w,Dupilumab 100 mg q4w,Placebo
OtherEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
Abdominal pain upper,1,2,0,0,4,1,1,2,0,0,4,1,63,64,61,65,65,61
Arthralgia,1,4,4,1,1,0,1,4,5,1,1,0,63,64,61,65,65,61
Back pain,2,2,0,2,3,5,16,3,0,5,3,5,63,64,61,65,65,61
Blood triglycerides increased,0,1,0,4,0,0,0,1,0,5,0,0,63,64,61,65,65,61
Conjunctivitis,4,1,0,1,0,0,5,1,0,1,0,0,63,64,61,65,65,61
Conjunctivitis allergic,3,2,6,3,1,2,5,4,9,3,1,2,63,64,61,65,65,61
Cough,4,4,2,1,0,1,4,4,2,1,0,1,63,64,61,65,65,61
Dermatitis atopic,8,13,8,10,11,10,9,19,10,12,13,12,63,64,61,65,65,61
Fatigue,2,1,1,4,0,3,2,1,1,4,0,3,63,64,61,65,65,61
Headache,8,5,9,5,7,2,40,13,24,6,18,2,63,64,61,65,65,61


In [15]:
tt['Subjects']

OtherEventStatsGroupId,Dupilumab 300 mg qw,Dupilumab 300 mg q2w,Dupilumab 200 mg q2w,Dupilumab 300 mg q4w,Dupilumab 100 mg q4w,Placebo
OtherEventTerm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Abdominal pain upper,1,2,0,0,4,1
Arthralgia,1,4,4,1,1,0
Back pain,2,2,0,2,3,5
Blood triglycerides increased,0,1,0,4,0,0
Conjunctivitis,4,1,0,1,0,0
Conjunctivitis allergic,3,2,6,3,1,2
Cough,4,4,2,1,0,1
Dermatitis atopic,8,13,8,10,11,10
Fatigue,2,1,1,4,0,3
Headache,8,5,9,5,7,2


### Test 2

In [16]:
nctid = 'NCT02369484'

In [17]:
get_oae(nctid)

Unnamed: 0_level_0,Subjects,Total_Subjects
OtherEventStatsGroupId,Afatinib,Afatinib
OtherEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2
Abdominal pain,3,13
Alopecia,1,13
Anemia,2,13
Arthalgia,1,13
Asparate aminotransferase increased,1,13
Back pain,1,13
Bladder infection,1,13
Bone pain,1,13
Constipation,1,13
Cough,2,13


In [18]:
get_sae(nctid)

Unnamed: 0_level_0,Subjects,Total_Subjects
SeriousEventStatsGroupId,Afatinib,Afatinib
SeriousEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2
Acute kidney injury,1,13
Dehydration,1,13
Diarrhea,1,13
Dyspnea,1,13
Epistaxis,1,13
Febrile neutropenia,1,13
Muscle weakness lower limb,1,13
Pericardial effusion,1,13
Pleural effusion,1,13


### Test 3

In [19]:
nctid = 'NCT00770588'

In [20]:
get_oae(nctid)

Unnamed: 0_level_0,Subjects,Subjects,Total_Subjects,Total_Subjects
OtherEventStatsGroupId,Gefitinib,Placebo,Gefitinib,Placebo
OtherEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Alanine Aminotransferase Increased,31,12,147,148
aspartate aminotransferase increased,21,6,147,148
cough,9,20,147,148
diarrhoea,37,13,147,148
dry skin,9,3,147,148
pruritus,10,7,147,148
rash,73,14,147,148
serious hepatic dysfunction,43,16,147,148
skin exfoliation,9,0,147,148
transaminases increased,8,2,147,148


In [21]:
get_sae(nctid)

Unnamed: 0_level_0,Subjects,Subjects,Total_Subjects,Total_Subjects
SeriousEventStatsGroupId,Gefitinib,Placebo,Gefitinib,Placebo
SeriousEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Accidental Death,0,1,147,148
Alanine Aminotransferase Increased,0,1,147,148
Arterial Thrombosis Limb,1,0,147,148
Aspartate Aminotransferase Increased,0,1,147,148
Circulatory Collapse,1,0,147,148
Completed Suicide,0,1,147,148
Death,1,0,147,148
Haemoptysis,1,0,147,148
Interstitial Lung Disease,2,0,147,148
Lung Infection,2,0,147,148


In [22]:
get_sae(nctid)

Unnamed: 0_level_0,Subjects,Subjects,Total_Subjects,Total_Subjects
SeriousEventStatsGroupId,Gefitinib,Placebo,Gefitinib,Placebo
SeriousEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Accidental Death,0,1,147,148
Alanine Aminotransferase Increased,0,1,147,148
Arterial Thrombosis Limb,1,0,147,148
Aspartate Aminotransferase Increased,0,1,147,148
Circulatory Collapse,1,0,147,148
Completed Suicide,0,1,147,148
Death,1,0,147,148
Haemoptysis,1,0,147,148
Interstitial Lung Disease,2,0,147,148
Lung Infection,2,0,147,148


### Test 4

In [23]:
nctid = 'NCT00820755'

In [24]:
get_oae(nctid)

Unnamed: 0_level_0,Subjects,Subjects,Subjects,Total_Subjects,Total_Subjects,Total_Subjects
OtherEventStatsGroupId,Cetuximab 250 mg/m^2 q1w + Platinum-based Doublet Chemotherapy,Cetuximab 500 mg/m^2 Every 2 Weeks,Cetuximab 250 mg/m^2 Weekly,Cetuximab 250 mg/m^2 q1w + Platinum-based Doublet Chemotherapy,Cetuximab 500 mg/m^2 Every 2 Weeks,Cetuximab 250 mg/m^2 Weekly
OtherEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Abdominal pain,21,0,0,583,156,155
Abdominal pain upper,31,0,0,583,156,155
Acne,31,0,0,583,156,155
Alanine aminotransferase increased,26,0,0,583,156,155
Alopecia,109,0,0,583,156,155
Anaemia,134,9,15,583,156,155
Arthralgia,33,5,11,583,156,155
Asthenia,124,6,16,583,156,155
Back pain,30,6,8,583,156,155
Bone pain,0,2,8,583,156,155


In [25]:
get_sae(nctid)

Unnamed: 0_level_0,Subjects,Subjects,Subjects,Total_Subjects,Total_Subjects,Total_Subjects
SeriousEventStatsGroupId,Cetuximab 250 mg/m^2 q1w + Platinum-based Doublet Chemotherapy,Cetuximab 500 mg/m^2 Every 2 Weeks,Cetuximab 250 mg/m^2 Weekly,Cetuximab 250 mg/m^2 q1w + Platinum-based Doublet Chemotherapy,Cetuximab 500 mg/m^2 Every 2 Weeks,Cetuximab 250 mg/m^2 Weekly
SeriousEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Abdominal distension,1,0,0,583,156,155
Abdominal pain,2,0,0,583,156,155
Acute myocardial infarction,1,0,0,583,156,155
Acute respiratory failure,2,0,0,583,156,155
Adrenal insufficiency,0,0,1,583,156,155
Alanine aminotransferase increased,1,0,1,583,156,155
Altered state of consciousness,0,1,0,583,156,155
Anaemia,6,1,1,583,156,155
Anaphylactic reaction,3,0,0,583,156,155
Anaphylactic shock,1,0,0,583,156,155


## Scalable Function

In [7]:
## Function to fetch multi-study data
def fetch_ae_data(trials):
    #print(trial)  
    
    #trials1 = trial.split(',')
    #trials = trial.split(' ')
    
    #######################
    # multi- trial input
    #######################
#     if ((len(trials1) > 1) | (len(trials2) > 1)):
#         if len(trials1)==1:
#             trials1 = trials2
    res_list = []

    # Get summary for every trial
    for trial in trials:
        try:
            print(len(res_list))
            ae_sum = get_ae_summary(trial)
        except (KeyError, RuntimeError, TypeError, NameError):
            # if results are not present just get the subject count
            try:
                URL = f'https://clinicaltrials.gov/api/query/full_studies?expr={trial}&max_rnk=1&fmt=JSON'
                r = requests.get(URL)
                j = json.loads(r.content)
                sub_count = int(j['FullStudiesResponse']['FullStudies'][0]['Study']['ProtocolSection']['DesignModule']['EnrollmentInfo']['EnrollmentCount'])
            # if NCTID is invalid
            except (KeyError, RuntimeError, TypeError, NameError):
                sub_count = 'NA'

            res_list.append([trial,
                        'NA', 
                        'NA',
                        sub_count,
                        'NA',
                        'NA', 
                        'NA',
                        'NA'
                        ])
            continue

        sae_subs_uni = ae_sum['EventGroupSeriousNumAffected']
        sae_risk = ae_sum['EventGroupSeriousNumAtRisk']
        oae_subs_uni = ae_sum['EventGroupOtherNumAffected']
        oae_risk = ae_sum['EventGroupSeriousNumAtRisk'] 
        flag = 0
        if sae_subs_uni != 0:
            sae = get_sae(trial)
            sae_subs = sae['Subjects']  
            study_arm_count = int(sae_subs.shape[1])
            flag = 1
            sae_term_count = int(sae_subs.shape[0])
            sae_df_zeros = (sae_subs == '0').sum(1).sum() + (sae_subs == 0).sum(1).sum()
            sae_df_all = sae_term_count*study_arm_count
        else:
            study_arm_count = 0
            sae_term_count = 0
            sae_df_all = 0
            sae_df_zeros = 0
        if oae_subs_uni != 0:
            oae = get_oae(trial)
            oae_subs = oae['Subjects']  
            study_arm_count = int(oae_subs.shape[1])
            oae_term_count = int(oae_subs.shape[0])            
            oae_df_zeros = (oae_subs == '0').sum(1).sum() + (oae_subs == 0).sum(1).sum()
            oae_df_all = oae_term_count*study_arm_count
        else:
            oae_term_count = 0
            oae_df_all = 0
            oae_df_zeros = 0
            if flag == 0:
                study_arm_count = 0
                
        #print(f'oae_df_zeros: {oae_df_zeros}, sae_df_zeros: {sae_df_zeros}, oae_df_all: {oae_df_all}, sae_df_all: {sae_df_all}')
        
        no_zero_values = (oae_df_all+sae_df_all) - (oae_df_zeros+sae_df_zeros)
        all_values = (oae_df_all+sae_df_all)
        try:
            spartsity = round((no_zero_values/all_values),4)
        except ZeroDivisionError as error:
            spartsity = 'NA'

        try:
            if (sae_term_count+oae_term_count) == 0:
                subs_per_ae = 'NA'
            else:
                subs_per_ae = round((oae_risk/(sae_term_count+oae_term_count)),4)
        except ZeroDivisionError as error:
            subs_per_ae = 'NA'
            
        res_list.append([trial,
                        (sae_term_count+oae_term_count), 
                        (sae_subs_uni+oae_subs_uni),
                        oae_risk,
                        round(100*((sae_subs_uni+oae_subs_uni)/oae_risk),4),
                        subs_per_ae, 
                        study_arm_count,
                        spartsity
                        ])

    ae_summary_total = pd.DataFrame(res_list, columns=['NCTID',
                     'AE Count', 
                     'Subjects with AE',
                     'Subjects in study',
                     '% subjects w AE',
                     'Subject per AE', 
                     'Arm Count',
                     'Sparsity'])
    
    # The % subjects with ae is calculated by (# SAEs + # OAEs)/(Subjects at Risk) 
    # It is derived is such a way to get the idea about the best case
    # So when percentage exceeds 100 we bring it back to 99.90%
    #ae_summary_total.loc[(ae_summary_total['% subjects w AE'] >= 100),'% subjects w AE'] = 99.90
    
    return(ae_summary_total)

In [8]:
test_data = ['NCT01425281','NCT01751906','NCT01844284','NCT01251614','NCT01071070']

In [9]:
temp = fetch_ae_data(test_data)

0
1
2
3
4


In [10]:
temp.head()

Unnamed: 0,NCTID,AE Count,Subjects with AE,Subjects in study,% subjects w AE,Subject per AE,Arm Count,Sparsity
0,NCT01425281,752,696,477,145.9119,0.6343,2,0.6217
1,NCT01751906,1476,2711,2008,135.01,1.3604,2,0.6809
2,NCT01844284,465,515,400,128.75,0.8602,2,0.6247
3,NCT01251614,59,193,306,63.0719,5.1864,10,0.3186
4,NCT01071070,48,51,216,23.6111,4.5,2,0.6042


In [11]:
temp.to_clipboard()