In [1]:
#Import Libraries
import pandas as pd
import useful_functions as use
import psycopg2
import pyodbc
from datetime import datetime, date
from tqdm.notebook import tqdm

In [2]:
#Connect to OneView
conn = psycopg2.connect(
    host='oneview-prd-content-aurora-cluster.cluster-cxgp9osuwqi3.us-east-1.rds.amazonaws.com',
    database='oneview_content',
    user='oneviewadmin',
    password='VFauC8cRXqKapC8A')

In [7]:
#Connect to EDW
username = EDW_USERNAME
password_edw = EDW_PASSWORD
w = "DSN=PRDDW; UID={}; PWD={}".format(username, password_edw)
AMAEDW = pyodbc.connect(w)

In [8]:
#Get MEs from OneView
ov_me_sql = '''
    SELECT medical_education_number,type FROM ONEVIEW.physician
    '''
ov_me = pd.read_sql_query(ov_me_sql, conn)

In [9]:
#Get Party IDs from EDW
ME_QUERY = \
    f"""
    SELECT
    P.PARTY_ID,
    P.KEY_VAL AS ME
    FROM
    AMAEDW.PARTY_KEY P
    WHERE
    P.KEY_TYPE_ID = 18
    AND
    P.ACTIVE_IND='Y'
    """
party_ids = pd.read_sql(con=AMAEDW, sql=ME_QUERY)

In [10]:
#Get TOP and PE from EDW
MED_QUERY = \
    """
    SELECT
    TOP_ID,
    EMPLOYER_ID,
    PARTY_ID
    FROM
    AMAEDW.MED_PROF
    WHERE
    THRU_DT IS NULL
    """
MED_PROF = pd.read_sql(con=AMAEDW, sql=MED_QUERY)

In [11]:
#Get MPA codes from EDW
MPA_QUERY = \
    """
    SELECT
    MPA_CD,
    TOP_ID,
    EMPLOYER_ID,
    DESC
    FROM
    AMAEDW.MPA
    WHERE
    ACTIVE_IND='Y'
    """
MPA = pd.read_sql(con=AMAEDW, sql=MPA_QUERY)

In [12]:
#Get specialties from EDW
SPEC_QUERY = \
    """
    SELECT
    M.PARTY_ID,
    M.SPEC_ID,
    M.PREFE_LVL
    FROM
    AMAEDW.MED_SPEC M
    """
SPEC = pd.read_sql(con=AMAEDW, sql=SPEC_QUERY)

In [13]:
#Get award in info from EDW
AWARD_QUERY = \
    """
    SELECT
    PARTY_ID,
    EXPIRATION_DT
    FROM
    AMAEDW.PRA_CERTIFICATE
    WHERE
    PRA_STATUS_DESC='Approved'
    """
AWARD = pd.read_sql(con=AMAEDW, sql=AWARD_QUERY)

In [14]:
#Convert expiration date to datetime
AWARD.EXPIRATION_DT = pd.to_datetime(AWARD.EXPIRATION_DT)

In [15]:
#Merge EDW data to OneView to limit universe
ov_universe = pd.merge(party_ids, ov_me, left_on='ME', right_on='medical_education_number')

In [16]:
#Get only active awards
wards = AWARD[AWARD.EXPIRATION_DT>datetime.today()].sort_values('EXPIRATION_DT').drop_duplicates('PARTY_ID', keep='last')

In [17]:
#Merge all data
ALL = pd.merge(ov_universe, MED_PROF, on='PARTY_ID', how='left')
ALL = pd.merge(ALL, SPEC[SPEC.PREFE_LVL==1], on='PARTY_ID', how='left')
ALL = pd.merge(ALL, SPEC[SPEC.PREFE_LVL==2], on='PARTY_ID', suffixes = ['_PRIM','_SEC'], how='left')
ALL = pd.merge(ALL, MPA, on=['TOP_ID','EMPLOYER_ID'], how='left')
ALL = pd.merge(ALL, wards, on='PARTY_ID', how='left')

In [19]:
#Iterate through the dataframe calculating completeness for each datatype
today = date.today()
types = ['Physician','Student','Resident']
elements = [
    {'data_element': 'TOP',
     'element':'TOP_ID',
    'null': 115.0},
    {'data_element': 'PE',
     'element':'EMPLOYER_ID',
    'null': 391.0},
    {'data_element': 'MPA',
     'element':'MPA_CD',
    'null': 'NCL'},
    {'data_element': 'Primary Specialty',
     'element':'SPEC_ID_PRIM',
    'null': 1883},
    {'data_element': 'Secondary Specialty',
     'element':'SPEC_ID_SEC',
    'null': 1883},
    {'data_element': 'PRA Award Flag',
     'element':'EXPIRATION_DT',
    'null': ''}
]
dict_list = []
for typo in types:
    total = len(ALL[ALL.type==typo])
    for element in elements:
        complete = len(ALL[(ALL.type==typo)&~(ALL[element['element']].isna())])
        true_complete = len(ALL[(ALL.type==typo)&(ALL[element['element']]!=element['null'])&~(ALL[element['element']].isna())])
        new_dict = {
            'Universe': typo,
            'Data Element': element['data_element'],
            'Complete': complete,
            'Complete and Known': true_complete,
            'Universe Total': total,
            'Date': today,
            'Measure':'Completeness'
        }
        dict_list.append(new_dict)

In [21]:
#Save
pd.DataFrame(dict_list).to_csv(f'../../Data/Measurement/Practice_Completeness_{str(date.today())}.csv', index=False)