In [1]:
import pandas as pd
import numpy as np
import warnings
import requests
import json
import sys
import os

import psycopg2
from sqlalchemy import create_engine

warnings.filterwarnings("ignore") 
requests.packages.urllib3.disable_warnings()

In [None]:
session = requests.Session()
session.auth = (u'<PWD'>, <'PWD'>)
session.verify = False

In [None]:
main_api = 'https://protect-training.cc.nih.gov/TRAINING-IRB/sd/PublicCustomLayouts/PSLib/WebApi/multiResult?interfaceID=MultiIRBInformation'

In [None]:
response = session.get(main_api)
response

In [None]:
study_id_list = set()
submission_list = []

for j in response.json():
    submission_list.append(j['ID'])
    study_id_list.add(j['parentStudyID'])

In [None]:
len(study_id_list), len(submission_list) 

In [None]:
study_summary_api = "https://protect-training.cc.nih.gov/TRAINING-IRB/sd/PublicCustomLayouts/PSLib/WebApi/singleResult?interfaceID=BasicIRBInformation&resourceID="
multi_record_api = "https://protect-training.cc.nih.gov/TRAINING-IRB/sd/PublicCustomLayouts/PSLib/WebApi/multiResult?interfaceID=MultiIRBInformation&$filter=parentProjectID eq "
add_submission_api = "https://protect-training.cc.nih.gov/TRAINING-IRB/sd/PublicCustomLayouts/PSLib/WebApi/singleResult?interfaceID=BasicIRBInformation&resourceID="

In [None]:
def json_flatten(json_dict):
 
    def inner_flatten(x, name=''):
        single_dict = {}
        if isinstance(x, dict):
            flattened_dict = {}
            for key,value in x.items():
                #recursively calling on deeper layers of the json
                #if 'safetySubmissionCustomExtension' not in key:
                #    flattened_x = inner_flatten(value, name + key + '_')
                #else:
                #    flattened_x = inner_flatten(value, name + key[32:] + '_')
                flattened_x = inner_flatten(value, key)
                flattened_dict.update(flattened_x)
            single_dict.update(flattened_dict)
        elif isinstance(x, list):
            flattened_dict = {}
            flattened_x = []
            for obj in x:
                flattened_obj = inner_flatten(obj, name)
                flattened_x.append(flattened_obj)
 
            #will always be true due to the nature of the inner function return type
            if all(isinstance(y,dict) for y in flattened_x):
                for i, d in enumerate(flattened_x):
                    for k, v in d.items():
                        flattened_dict[f'{k}_{i+1}'] = v
            single_dict.update(flattened_dict)
        else:
            #single_dict[name[:-1]] = x
            single_dict[name] = x
        return single_dict
 
    return inner_flatten(json_dict)

In [None]:
def json_structure(f):
    main_table = pd.DataFrame()
    if isinstance(f, list):
        for protocol in f:
            main_table = pd.concat([main_table, pd.json_normalize(json_flatten(protocol))])
    elif isinstance(f, dict):
        main_table = pd.concat([main_table, pd.json_normalize(json_flatten(f[list(f.keys())[0]]))])
    return main_table

In [None]:
all_submission_df = pd.DataFrame()
study_details_df = pd.DataFrame()

for i in list(study_id_list)[:10]:
    all_submission_api = multi_record_api+f"'{i}'"
    study_det_api = study_summary_api+i
    response1 = session.get(all_submission_api)
    temp_df1 = json_structure(response1.json())
    all_submission_df = pd.concat([all_submission_df,temp_df1])
    response2 = session.get(study_det_api)
    temp_df2 = json_structure(response2.json())
    study_details_df = pd.concat([study_details_df,temp_df2])

In [None]:
submission_details_df = pd.DataFrame()
    
for i in submission_list[:2]: 
    submission_det_api = add_submission_api+i
    response3 = session.get(submission_det_api)
    temp_df = json_structure(response3.json())
    submission_details_df = pd.concat([submission_details_df,temp_df])

In [None]:
study_key = study_details_df[['parentStudyID']]

In [None]:
protect_irb_study_details = study_details_df[['parentStudyID','piUserId','piFirstName','piMiddleName','piLastName','piEarnedDegrees','piNIHADName','piSubOrgStatus','piOrgStatus','piEmployeeId','piCompanyID','piEmployerName',
 'piEmployerAbbreviation','piAccountDisabled','piIsActive','piDepartmentID','piDepartmentName','piDepartmentAbbreviation','piEmail','piPhone','leadPiUserId',
 'leadPiFirstName','leadPiMiddleName','leadPiLastName','leadPiEarnedDegrees','leadPiNIHADName','leadPiSubOrgStatus','leadPiOrgStatus','leadPiEmployerID','leadPiCompanyID',
 'leadPiEmployerName','leadPiEmployerAbbreviation','leadPiAccountDisabled','leadPiIsActive','leadPiDepartmentID','leadPiDepartmentName','leadPiDepartmentAbbreviation',
 'leadPiEmail','leadPiPhone','pcUserId','pcFirstName','pcMiddleName','pcLastName','pcEarnedDegrees','pcNIHADName','pcSubOrgStatus','pcOrgStatus','pcEmployerID',
 'pcCompanyID','pcEmployerName','pcEmployerAbbreviation','pcAccountDisabled','pcIsActive','pcDepartmentID','pcDepartmentName','pcDepartmentAbbreviation','pcEmail',
 'pcPhone','reviewLevel', 'submittedDate','studyType']]

In [None]:
db = create_engine('postgresql://<ID>:<PWD>@10.157.90.23:5432/PROTECT')
conn = db.connect()
protect_irb_study_details.to_sql('PROTECT_IRB_STUDY_DETAILS', con=conn, if_exists='replace', index='false')

In [None]:
protect_irb_study_submission_details = submission_details_df[['parentStudyID','piUserId','piFirstName','piMiddleName','piLastName','piEarnedDegrees','piNIHADName',
 'piSubOrgStatus','piOrgStatus','piEmployeeId','piCompanyID','piEmployerName','piEmployerAbbreviation','piAccountDisabled',
 'piIsActive','piDepartmentID','piDepartmentName','piDepartmentAbbreviation','piEmail','piPhone','leadPiUserId','leadPiFirstName','leadPiMiddleName','leadPiLastName',
 'leadPiEarnedDegrees','leadPiNIHADName','leadPiSubOrgStatus','leadPiOrgStatus','leadPiEmployerID','leadPiCompanyID','leadPiEmployerName',
 'leadPiEmployerAbbreviation', 'leadPiAccountDisabled','leadPiIsActive','leadPiDepartmentID','leadPiDepartmentName','leadPiDepartmentAbbreviation','leadPiEmail',
 'leadPiPhone']]

In [None]:
db = create_engine('postgresql://<ID>:<PWD>@10.157.90.23:5432/PROTECT')
conn = db.connect()
protect_irb_study_submission_details.to_sql('PROTECT_IRB_STUDY_SUBMISSION_DETAILS', con=conn, if_exists='replace', index='false')

In [None]:
db = create_engine('postgresql://<ID>:<PWD>@10.157.90.23:5432/PROTECT')
conn = db.connect()
all_submission_df.to_sql('PROTECT_IRB_STUDY_SUBMISSION_LIST', con=conn, if_exists='replace', index='false')