In [1]:
import re
import pandas as pd
import requests
from sqlalchemy import create_engine
import psycopg2
#from pipelineFunctions import json_flatten, json_structure
requests.packages.urllib3.disable_warnings()

In [2]:
def json_flatten(json_dict):
    def inner_flatten(x, name=''):
        single_dict = {}
        if isinstance(x, dict):
            flattened_dict = {}
            for key,value in x.items():
                flattened_x = inner_flatten(value, key)
                flattened_dict.update(flattened_x)
            single_dict.update(flattened_dict)
        elif isinstance(x, list):
            flattened_dict = {}
            flattened_x = []
            for obj in x:
                flattened_obj = inner_flatten(obj, name)
                flattened_x.append(flattened_obj)
 
            #will always be true due to the nature of the inner function return type
            if all(isinstance(y,dict) for y in flattened_x):
                for i, d in enumerate(flattened_x):
                    for k, v in d.items():
                        flattened_dict[f'{k}_{i+1}'] = v
            single_dict.update(flattened_dict)
        else:
            #single_dict[name[:-1]] = x
            single_dict[name] = x
        return single_dict
 
    return inner_flatten(json_dict)


In [3]:
active_protocol_url = 'https://clinweb.cc.nih.gov/pqs/api/protocoldata/activeprotocols'
protocol_detail_url = 'https://clinweb.cc.nih.gov/pqs/api/protocoldata/bynum/'

In [4]:
session = requests.Session()
protocol_detail_session = requests.Session()
response = session.get(active_protocol_url, verify=False)


In [5]:
data_fields = ['protocol_number', 'simpleProtocolNumber', 'protocol_title', 'accrual_inst','accrual_status','coord_site','protrak_accrual_status','research_type','research_phase', 'study_type',
               'start_date_of_study', 'date_first_part_enrolled', 'irb_name', 'z_number', 'nct_number', 'currentEnrollment','plannedEnrollment']

In [6]:
active_study_json = response.json()['protocols']

In [7]:
detail_data = []
pi_data = []

for p in active_study_json:
    protocol_number = p['protocolNumber']
    simplified_protocol_number = ''.join(re.findall('[0-9]+', p['protocolNumber']))


    #calling api for specific protocol details
    protocol_detail_response = protocol_detail_session.get(protocol_detail_url + protocol_number, verify=False)
    if protocol_detail_response.json()['responseCode'] != 200:
        continue
    
    detail_json = protocol_detail_response.json()['returnedProtocol']
    
    #flattened_detail_table = pd.json_normalize(json_flatten(detail_json))
    flattened_detail_json = json_flatten(detail_json)
    flattened_detail_json['simpleProtocolNumber'] = simplified_protocol_number

    #getting pi information from API call
    investigators = detail_json['investigators']
    for i in investigators:
        if i['r'] == 'PI':
            pi_table = pi_data.append({'protocolNumber':protocol_number, 'simplifiedProtocolNumber':simplified_protocol_number,'firstName':i['n']['fn'], 'lastName':i['n']['ln'], 'middleName':i['n']['mn'], 'piName':'{0} {1} {2}'.format(i['n']['fn'],i['n']['mn'],i['n']['ln'])})

    #summarizing enrollment data
    enrollmentForms = detail_json.get('enrollment_forms','')
    if enrollmentForms:
        enrollmentForms = enrollmentForms[0]
        currentEnrollment = enrollmentForms['total_american_indian'] + enrollmentForms['total_asian'] + enrollmentForms['total_black'] + enrollmentForms['total_hawaiian'] + enrollmentForms['total_more_than_one_race'] + enrollmentForms['total_unknowns'] + enrollmentForms['total_white']
    else:
        currentEnrollment = ''

    targetEnrollments = detail_json.get('target_enrollments','')
    if targetEnrollments:
        targetEnrollments = targetEnrollments[0]
        targetEnrollment = targetEnrollments['total_american_indian'] + targetEnrollments['total_asian'] + targetEnrollments['total_black'] + targetEnrollments['total_hawaiian'] + targetEnrollments['total_more_than_one_race'] + targetEnrollments['total_white']
    else:
        targetEnrollment = ''

    flattened_detail_json['currentEnrollment'] = currentEnrollment
    flattened_detail_json['plannedEnrollment'] = targetEnrollment

    #flattening coord_site data
    flattened_detail_json['coord_site'] = detail_json.get('coord_site_name','')

    #detail_table = pd.concat([detail_table, flattened_detail_table])
    detail_data.append(flattened_detail_json)

pi_table = pd.json_normalize(pi_data)
detail_table = pd.json_normalize(detail_data)

In [8]:
final_table = detail_table[data_fields].join(pi_table, lsuffix='protocol_number', rsuffix='protocolNumber')

In [9]:
final_table.insert(0,'TimeStamp',pd.to_datetime('now').replace(microsecond=0))

In [None]:
db = create_engine('postgresql://dashboardServer:dashboard@10.157.90.23:5432/Dashboard')
conn = db.connect()
final_table.to_sql('Protrak_Data_Table', con=conn, if_exists='replace', index='false', method='multi')
pi_table.to_sql('Protrak_PI_Table', con=conn, if_exists='replace', index='false', method='multi')