In [1]:
import pandas as pd
import numpy as np
import requests
import datetime
import json
from pandas.io.json import json_normalize
import xlrd

In [25]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
nctid = 'NCT01979016'

In [4]:
# Get CT.gov data on the NCTID
URL = f'https://clinicaltrials.gov/api/query/full_studies?expr={nctid}&max_rnk=1&fmt=JSON'
r = requests.get(URL)
j = json.loads(r.content)

## Part 1: Other Adverse Events

In [32]:
tt = j['FullStudiesResponse']['FullStudies'][0]['Study']['ResultsSection']['AdverseEventsModule']['OtherEventList']['OtherEvent']
tt

[{'OtherEventTerm': 'Conjunctivitis allergic',
  'OtherEventOrganSystem': 'Eye disorders',
  'OtherEventSourceVocabulary': 'MedDRA (16.0)',
  'OtherEventAssessmentType': 'Systematic Assessment',
  'OtherEventStatsList': {'OtherEventStats': [{'OtherEventStatsGroupId': 'EG000',
     'OtherEventStatsNumAffected': '0',
     'OtherEventStatsNumAtRisk': '27'},
    {'OtherEventStatsGroupId': 'EG001',
     'OtherEventStatsNumAffected': '2',
     'OtherEventStatsNumAtRisk': '27'}]}},
 {'OtherEventTerm': 'Fatigue',
  'OtherEventOrganSystem': 'General disorders',
  'OtherEventSourceVocabulary': 'MedDRA (16.0)',
  'OtherEventAssessmentType': 'Systematic Assessment',
  'OtherEventStatsList': {'OtherEventStats': [{'OtherEventStatsGroupId': 'EG000',
     'OtherEventStatsNumAffected': '1',
     'OtherEventStatsNumAtRisk': '27'},
    {'OtherEventStatsGroupId': 'EG001',
     'OtherEventStatsNumAffected': '2',
     'OtherEventStatsNumAtRisk': '27'}]}},
 {'OtherEventTerm': 'Injection site erythema',
  'Ot

In [29]:
tt2 = pd.DataFrame(json_normalize(tt))
tt2.head()

  """Entry point for launching an IPython kernel.


Unnamed: 0,OtherEventTerm,OtherEventOrganSystem,OtherEventSourceVocabulary,OtherEventAssessmentType,OtherEventStatsList.OtherEventStats
0,Conjunctivitis allergic,Eye disorders,MedDRA (16.0),Systematic Assessment,"[{'OtherEventStatsGroupId': 'EG000', 'OtherEve..."
1,Fatigue,General disorders,MedDRA (16.0),Systematic Assessment,"[{'OtherEventStatsGroupId': 'EG000', 'OtherEve..."
2,Injection site erythema,General disorders,MedDRA (16.0),Systematic Assessment,"[{'OtherEventStatsGroupId': 'EG000', 'OtherEve..."
3,Injection site reaction,General disorders,MedDRA (16.0),Systematic Assessment,"[{'OtherEventStatsGroupId': 'EG000', 'OtherEve..."
4,Folliculitis,Infections and infestations,MedDRA (16.0),Systematic Assessment,"[{'OtherEventStatsGroupId': 'EG000', 'OtherEve..."


In [160]:
tt2 = pd.DataFrame(json_normalize(tt,
              ['OtherEventStatsList','OtherEventStats'],
              ['OtherEventTerm', 'OtherEventOrganSystem'],
              errors='ignore'))
tt2.head()

  after removing the cwd from sys.path.


Unnamed: 0,OtherEventStatsGroupId,OtherEventStatsNumAffected,OtherEventStatsNumAtRisk,OtherEventTerm,OtherEventOrganSystem
0,EG000,0,27,Conjunctivitis allergic,Eye disorders
1,EG001,2,27,Conjunctivitis allergic,Eye disorders
2,EG000,1,27,Fatigue,General disorders
3,EG001,2,27,Fatigue,General disorders
4,EG000,1,27,Injection site erythema,General disorders


In [173]:
tt2.set_index('OtherEventTerm', inplace=True)
tt2.head()

Unnamed: 0_level_0,OtherEventStatsGroupId,OtherEventStatsNumAffected,OtherEventStatsNumAtRisk,OtherEventOrganSystem
OtherEventTerm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Conjunctivitis allergic,EG000,0,27,Eye disorders
Conjunctivitis allergic,EG001,2,27,Eye disorders
Fatigue,EG000,1,27,General disorders
Fatigue,EG001,2,27,General disorders
Injection site erythema,EG000,1,27,General disorders


In [174]:
tt3 = tt2.pivot(columns='OtherEventStatsGroupId',
          values=['OtherEventStatsNumAffected','OtherEventStatsNumAtRisk'])
tt3

Unnamed: 0_level_0,OtherEventStatsNumAffected,OtherEventStatsNumAffected,OtherEventStatsNumAtRisk,OtherEventStatsNumAtRisk
OtherEventStatsGroupId,EG000,EG001,EG000,EG001
OtherEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Back pain,2,1,27,27
Conjunctivitis allergic,0,2,27,27
Contusion,0,2,27,27
Dermatitis Infected,2,0,27,27
Dermatitis atopic,4,4,27,27
Fatigue,1,2,27,27
Folliculitis,0,2,27,27
Gastroenteritis,0,2,27,27
Headache,1,2,27,27
Injection site erythema,1,2,27,27


## Part 2: Serious Adverse Events

In [175]:
# Flattening multi-indexed columns
# tt4.columns = [' '.join(col).strip() for col in tt4.columns.values]

In [179]:
tt = j['FullStudiesResponse']['FullStudies'][0]['Study']['ResultsSection']['AdverseEventsModule']['SeriousEventList']['SeriousEvent']
tt

[{'SeriousEventTerm': 'Coronary artery stenosis',
  'SeriousEventOrganSystem': 'Cardiac disorders',
  'SeriousEventSourceVocabulary': 'MedDRA (16.0)',
  'SeriousEventAssessmentType': 'Systematic Assessment',
  'SeriousEventStatsList': {'SeriousEventStats': [{'SeriousEventStatsGroupId': 'EG000',
     'SeriousEventStatsNumAffected': '1',
     'SeriousEventStatsNumAtRisk': '27'},
    {'SeriousEventStatsGroupId': 'EG001',
     'SeriousEventStatsNumAffected': '0',
     'SeriousEventStatsNumAtRisk': '27'}]}},
 {'SeriousEventTerm': 'Cholecystitis',
  'SeriousEventOrganSystem': 'Hepatobiliary disorders',
  'SeriousEventSourceVocabulary': 'MedDRA (16.0)',
  'SeriousEventAssessmentType': 'Systematic Assessment',
  'SeriousEventStatsList': {'SeriousEventStats': [{'SeriousEventStatsGroupId': 'EG000',
     'SeriousEventStatsNumAffected': '1',
     'SeriousEventStatsNumAtRisk': '27'},
    {'SeriousEventStatsGroupId': 'EG001',
     'SeriousEventStatsNumAffected': '0',
     'SeriousEventStatsNumAtRisk

In [180]:
tt2 = pd.DataFrame(json_normalize(tt))
tt2.head()

  """Entry point for launching an IPython kernel.


Unnamed: 0,SeriousEventTerm,SeriousEventOrganSystem,SeriousEventSourceVocabulary,SeriousEventAssessmentType,SeriousEventStatsList.SeriousEventStats
0,Coronary artery stenosis,Cardiac disorders,MedDRA (16.0),Systematic Assessment,"[{'SeriousEventStatsGroupId': 'EG000', 'Seriou..."
1,Cholecystitis,Hepatobiliary disorders,MedDRA (16.0),Systematic Assessment,"[{'SeriousEventStatsGroupId': 'EG000', 'Seriou..."
2,Musculoskeletal chest pain,Musculoskeletal and connective tissue disorders,MedDRA (16.0),Systematic Assessment,"[{'SeriousEventStatsGroupId': 'EG000', 'Seriou..."


In [181]:
tt2 = pd.DataFrame(json_normalize(tt,
              ['SeriousEventStatsList','SeriousEventStats'],
              ['SeriousEventTerm', 'SeriousEventOrganSystem'],
              errors='ignore'))
tt2.head()

  after removing the cwd from sys.path.


Unnamed: 0,SeriousEventStatsGroupId,SeriousEventStatsNumAffected,SeriousEventStatsNumAtRisk,SeriousEventTerm,SeriousEventOrganSystem
0,EG000,1,27,Coronary artery stenosis,Cardiac disorders
1,EG001,0,27,Coronary artery stenosis,Cardiac disorders
2,EG000,1,27,Cholecystitis,Hepatobiliary disorders
3,EG001,0,27,Cholecystitis,Hepatobiliary disorders
4,EG000,1,27,Musculoskeletal chest pain,Musculoskeletal and connective tissue disorders


In [182]:
tt3 = tt2.pivot(columns='SeriousEventStatsGroupId',
                values=['SeriousEventStatsNumAffected','SeriousEventStatsNumAtRisk'],
                index='SeriousEventTerm')
tt3

Unnamed: 0_level_0,SeriousEventStatsNumAffected,SeriousEventStatsNumAffected,SeriousEventStatsNumAtRisk,SeriousEventStatsNumAtRisk
SeriousEventStatsGroupId,EG000,EG001,EG000,EG001
SeriousEventTerm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Cholecystitis,1,0,27,27
Coronary artery stenosis,1,0,27,27
Musculoskeletal chest pain,1,0,27,27
