In [None]:
import re
import pandas as pd
import requests
from sqlalchemy import create_engine
import psycopg2
from pipelineFunctions import *
from config import *
from db_conn import *
from crinicial_trails_pipeline import *
requests.packages.urllib3.disable_warnings()


def get_pqs_protocol_list_response(session, conf_file):
    active_protocol_url = conf_file['PQS_API']['pqs_active_protocol_url']
    # print(active_protocol_url)
    response = session.get(active_protocol_url, verify=False)
    return response;

def get_pqs_protocol_detail_response(session, conf_file, protocol_number):
    protocol_detail_url = conf_file['PQS_API']['pqs_protocol_detail_url'] + protocol_number
    # print(protocol_detail_url)
    response = session.get(protocol_detail_url, verify=False)
    return response;

ct_data_fields_key = ['nctId', 'orgStudyIdInfo', 'title', 'clinicalTrialLink', 
                      'firstSubmitDate', 'primaryCompletionDate', 'lastUpdateSubmitDate']

def generate_pqs_tables(conf_file, conn):
    protocol_list_session = requests.Session()
    protocol_list_response = get_pqs_protocol_list_response(protocol_list_session, conf_file)

    protocol_detail_session = requests.Session()
    #protocol_detail_response = get_pqs_protocol_detail_response(protocol_detail_session, conf_file)
    
    data_fields = ['protocol_number', 'simpleProtocolNumber', 'protocol_title', 'accrual_inst','accrual_status','coord_site','protrak_accrual_status','research_type','research_phase', 'study_type',
               'start_date_of_study', 'primary_completion_date', 'date_first_part_enrolled', 'irb_name', 'z_number', 'nct_number', 'currentEnrollment','plannedEnrollment']
    
    
    active_study_json = protocol_list_response.json()['protocols']

    detail_data = []
    pi_data = []
    ct_dataList = list()
    print ("active protocol list size: " + str(len(active_study_json)))
    count = 0
    for p in active_study_json:
        count += 1
        protocol_number = p['protocolNumber']
        simplified_protocol_number = ''.join(re.findall('[0-9]+', p['protocolNumber']))

        # if count > 100:
        #     break
        #calling api for specific protocol details
        protocol_detail_response = get_pqs_protocol_detail_response(protocol_detail_session, conf_file, protocol_number)
        if protocol_detail_response.json()['responseCode'] != 200:
            continue
        
        detail_json = protocol_detail_response.json()['returnedProtocol']
        
        #flattened_detail_table = pd.json_normalize(json_flatten(detail_json))
        # flattened_detail_json = json_flatten(detail_json)
        flattened_detail_json = dict.fromkeys(data_fields, None)
        flattened_detail_json['protocol_number'] = protocol_number
        flattened_detail_json['simpleProtocolNumber'] = simplified_protocol_number

        #getting pi information from API call
        investigators = detail_json['investigators']
        for i in investigators:
            if i['r'] == 'PI':
                pi_table = pi_data.append({'protocol_number':protocol_number, 'simplifiedProtocolNumber':simplified_protocol_number,'firstName':i['n']['fn'], 'lastName':i['n']['ln'], 'middleName':i['n']['mn'], 'piName':'{0} {1} {2}'.format(i['n']['fn'],i['n']['mn'],i['n']['ln'])})

        #summarizing enrollment data
        enrollmentForms = detail_json.get('enrollment_forms','')
        if enrollmentForms:
            enrollmentForms = enrollmentForms[0]
            currentEnrollment = enrollmentForms['total_american_indian'] + enrollmentForms['total_asian'] + enrollmentForms['total_black'] + enrollmentForms['total_hawaiian'] + enrollmentForms['total_more_than_one_race'] + enrollmentForms['total_unknowns'] + enrollmentForms['total_white']
        else:
            currentEnrollment = ''

        targetEnrollments = detail_json.get('target_enrollments','')
        if targetEnrollments:
            targetEnrollments = targetEnrollments[0]
            targetEnrollment = targetEnrollments['total_american_indian'] + targetEnrollments['total_asian'] + targetEnrollments['total_black'] + targetEnrollments['total_hawaiian'] + targetEnrollments['total_more_than_one_race'] + targetEnrollments['total_white']
        else:
            targetEnrollment = ''

        flattened_detail_json['currentEnrollment'] = currentEnrollment
        flattened_detail_json['plannedEnrollment'] = targetEnrollment

        #flattening coord_site data
        flattened_detail_json['coord_site'] = detail_json.get('coord_site_name','')
        flattened_detail_json['primaryCompletionDate'] = detail_json.get('primary_completion_date')

        global ct_data_fields_key
        nct_number = detail_json.get('nct_number','')
        if (nct_number and nct_number != "N/A"):
            # print("nct_number: ", nct_number)
            ct_dataList = generate_ct_tables(conf_file, nct_number, ct_dataList, ct_data_fields_key)

        #detail_table = pd.concat([detail_table, flattened_detail_table])
        detail_data.append(flattened_detail_json)

    pi_table = pd.json_normalize(pi_data)
    detail_table = pd.json_normalize(detail_data)

    final_table = detail_table[data_fields].merge(pi_table, on='protocol_number')
    final_table.insert(0, 'TimeStamp', pd.to_datetime('now').replace(microsecond=0))
    final_table.index += 1
    final_table.to_sql('Protrak_Data_Table', con=conn, if_exists='replace', index=True, index_label='id', method='multi')

    
    pi_table.insert(0, 'TimeStamp', pd.to_datetime('now').replace(microsecond=0))
    pi_table.index += 1
    pi_table.to_sql('Protrak_PI_Table', con=conn, if_exists='replace', index=True, index_label='id', method='multi')

    ct_protocol_table = pd.json_normalize(ct_dataList)
    ct_protocol_table.insert(0, 'TimeStamp', pd.to_datetime('now').replace(microsecond=0))
    ct_protocol_table.index += 1
    ct_protocol_table.to_sql('CT_Data_Table', con=conn, if_exists='replace', index=True, index_label='id', )


if __name__ == '__main__': 
    conf_file = load_config_file()
      
    conn = get_db_connection(conf_file)

    generate_pqs_tables(conf_file, conn)