In [1]:
import re
import pandas as pd
import requests
from sqlalchemy import create_engine
import psycopg2
from pipelineFunctions import json_flatten, json_structure
requests.packages.urllib3.disable_warnings()

In [2]:
active_protocol_url = 'https://clinweb.cc.nih.gov/pqs/api/protocoldata/activeprotocols'
protocol_detail_url = 'https://clinweb.cc.nih.gov/pqs/api/protocoldata/bynum/'

In [3]:
session = requests.Session()
protocol_detail_session = requests.Session()
response = session.get(active_protocol_url, verify=False)


In [4]:
data_fields = ['protocol_number', 'simpleProtocolNumber', 'protocol_title', 'accrual_inst','accrual_status','coord_site','protrak_accrual_status','research_type','research_phase', 'study_type',
               'start_date_of_study', 'date_first_part_enrolled', 'irb_name', 'z_number', 'nct_number', 'currentEnrollment','plannedEnrollment']

In [5]:
active_study_json = response.json()['protocols']

In [6]:
detail_data = []
pi_data = []

for p in active_study_json:
    protocol_number = p['protocolNumber']
    simplified_protocol_number = ''.join(re.findall('[0-9]+', p['protocolNumber']))


    #calling api for specific protocol details
    protocol_detail_response = protocol_detail_session.get(protocol_detail_url + protocol_number, verify=False)
    if protocol_detail_response.json()['responseCode'] != 200:
        continue
    
    detail_json = protocol_detail_response.json()['returnedProtocol']
    
    #flattened_detail_table = pd.json_normalize(json_flatten(detail_json))
    flattened_detail_json = json_flatten(detail_json)
    flattened_detail_json['simpleProtocolNumber'] = simplified_protocol_number

    #getting pi information from API call
    investigators = detail_json['investigators']
    for i in investigators:
        if i['r'] == 'PI':
            pi_table = pi_data.append({'protocolNumber':protocol_number, 'simplifiedProtocolNumber':simplified_protocol_number,'firstName':i['n']['fn'], 'lastName':i['n']['ln'], 'middleName':i['n']['mn'], 'piName':'{0} {1} {2}'.format(i['n']['fn'],i['n']['mn'],i['n']['ln'])})

    enrollmentForms = detail_json.get('enrollment_forms','')
    if enrollmentForms:
        enrollmentForms = enrollmentForms[0]
        currentEnrollment = enrollmentForms['total_american_indian'] + enrollmentForms['total_asian'] + enrollmentForms['total_black'] + enrollmentForms['total_hawaiian'] + enrollmentForms['total_more_than_one_race'] + enrollmentForms['total_unknowns'] + enrollmentForms['total_white']
    else:
        currentEnrollment = ''

    targetEnrollments = detail_json.get('target_enrollments','')
    if targetEnrollments:
        targetEnrollments = targetEnrollments[0]
        targetEnrollment = targetEnrollments['total_american_indian'] + targetEnrollments['total_asian'] + targetEnrollments['total_black'] + targetEnrollments['total_hawaiian'] + targetEnrollments['total_more_than_one_race'] + targetEnrollments['total_white']
    else:
        targetEnrollment = ''

    flattened_detail_json['currentEnrollment'] = currentEnrollment
    flattened_detail_json['plannedEnrollment'] = targetEnrollment

    #detail_table = pd.concat([detail_table, flattened_detail_table])
    detail_data.append(flattened_detail_json)

pi_table = pd.json_normalize(pi_data)
detail_table = pd.json_normalize(detail_data)

In [7]:
final_table = detail_table[data_fields].join(pi_table, lsuffix='protocol_number', rsuffix='protocolNumber')

In [8]:
final_table.insert(0,'TimeStamp',pd.to_datetime('now').replace(microsecond=0))

In [None]:
db = create_engine('postgresql://chiangpt:listle99@10.157.90.23:5432/Dashboard')
conn = db.connect()
final_table.to_sql('Protrak_Data_Table', con=conn, if_exists='replace', index='false')
#pi_table.to_sql('Protrak_PI_Table', con=conn, if_exists='replace', index='false')

In [20]:
final_table

Unnamed: 0,protocol_number,protocol_title,accrual_inst,accrual_status,coord_site,protrak_accrual_status,research_type,research_phase,study_type,start_date_of_study,date_first_part_enrolled,irb_name,z_number,nct_number,currentEnrollment,plannedEnrollment
0,002360-I,Single Use Expanded Access for 10E8.4/iMab and...,NIAID,Special Exemption,,Special Exemption,,,Expanded Access,12/03/2024,12/03/2024,Panel 1,,,,
1,002357-AG,Deprescribing of Antipsychotic Medication amon...,NIA,"No Longer Recruiting, subject follow-up only",,No longer recruiting/follow-up only,,,Observational,12/06/2024,12/06/2024,Panel 1,,,,
2,002355-I,Olorofim Multiple Patient Access Program Singl...,NIAID,Special Exemption,,Special Exemption,,,Expanded Access,12/02/2024,12/02/2024,Panel 1,,,,
3,002350-I,Single Use Compassionate Use for 10E8.4/iMab i...,NIAID,Special Exemption,,Special Exemption,,,Expanded Access,12/03/2024,12/03/2024,Panel 1,,,,
4,002333-C,Expanded Use for Retreatment for a Single Pati...,NCI,Special Exemption,,Special Exemption,,,Expanded Access,10/30/2024,10/30/2024,Panel 1,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1744,77-DK-0002,Natural History of Thyroid Function Disorders,NIDDK,Recruiting,,Participants currently recruited/enrolled,R:NH,,Observational,02/01/1977,02/01/1977,Panel 1,ZIADK047053,NCT00001159,950,
1745,76-HG-0238,Diagnosis and Treatment of Patients with Inbor...,NHGRI,Recruiting,,Participants currently recruited/enrolled,R:NH,,Observational,09/12/1978,09/12/1978,Panel 1,ZIAHG000215,NCT00369421,3616,
1746,76-H-0051,Lipoprotein Metabolism in Normal Volunteers an...,NHLBI,Open for Data Analysis,,Completed Study; data analyses ongoing,R:NH,,Observational,09/03/1976,09/03/1976,Panel 1,ZIAHL006199,NCT00001154,759,
1747,OH76-DK-0256,Prospective Studies of Diabetes Mellitus and i...,NIDDK,Open for Data Analysis,,Completed Study; data analyses ongoing,R:NH,,Observational,09/03/1976,09/03/1976,Panel 1,ZIADK069028,NCT00339482,12647,
