In [2]:
import pandas as pd
import pyodbc
import os

#connect to db
def datamart_connect():
    username = 'vigrose'
    password = 'Hufflepuff10946'
    s = "DSN=PRDDM; UID={}; PWD={}".format(username, password)
    AMADM = pyodbc.connect(s)
    return AMADM

#define order query
def get_order_query(years):
    query= \
    f""" 
    SELECT DISTINCT 
    D.FULL_DT,
    H.MED_EDU_NBR AS ME,
    H.PARTY_ID,
    O.ORDER_NBR,
    O.ORDER_PRODUCT_ID, 
    O.ORDER_PHYSICIAN_HIST_KEY,
    O.CUSTOMER_KEY
    FROM 
    AMADM.DIM_DATE D, AMADM.FACT_EPROFILE_ORDERS O, AMADM.DIM_PHYSICIAN_HIST H
    WHERE  
    D.DATE_KEY = O.ORDER_DT_KEY
    AND 
    H.PHYSICIAN_HIST_KEY = O.ORDER_PHYSICIAN_HIST_KEY
    AND
    D.YR in {years}
    """
    return query

#define customer query
def get_customer_query():
    query = \
    """
    SELECT DISTINCT
    C.CUSTOMER_KEY, 
    C.CUSTOMER_NBR,
    C.CUSTOMER_ISELL_LOGIN,
    C.CUSTOMER_NAME,
    C.CUSTOMER_TYPE_DESC,
    C.CUSTOMER_TYPE,
    C.CUSTOMER_CATEGORY_DESC
    FROM 
    AMADM.dim_customer c  
    """
    return query        

def newest_delta(path, text):
    '''Get newest filename'''
    files = os.listdir(path)
    paths = [os.path.join(path, basename) for basename in files if text in basename]
    return max(paths, key=os.path.getctime)

def fix_me(me_list):
    nums = []
    for num in me_list:
        num = str(num)
        num = num.replace('.0', '')
        if len(num) == 10:
            num = '0' + num
        elif len(num) == 9:
            num = '00' + num
        elif len(num) == 8:
            num = '000' + num
        nums.append(num)
    return nums

def get_datamart_results(years, order_type='all', customer_type='all'):
    #Connect
    AMADM = datamart_connect()
    #Define Queries
    order_query = get_order_query(years)
    customer_query = get_customer_query()
    #Execute queries
    orders = pd.read_sql(con=AMADM, sql=order_query)
    print(len(orders))
    customers = pd.read_sql(con=AMADM, sql=customer_query)
    print(len(customers))
    #Filter
    customers.CUSTOMER_KEY = customers.CUSTOMER_KEY.astype(str)
    customers = customers.fillna('None')
    if customer_type=='all':
        class_cust = customers[customers.CUSTOMER_NAME != 'None']
    else:
        class_cust = customers[(customers.CUSTOMER_CATEGORY_DESC.isin(customer_type))&(customers.CUSTOMER_NAME != 'None')]
    if order_type == 'reapp':
        orders = orders[orders.ORDER_PRODUCT_ID.isin([4915514])]
    elif order_type == 'app':
        orders = orders[orders.ORDER_PRODUCT_ID.isin([4915513])]
    else:
        orders = orders[orders.ORDER_PRODUCT_ID.isin([4915513,4915502])]
    #Get ppd
    # ppd = get_ppd()
    #Merge and clean
    # physician_orders = pd.merge(orders, ppd, on='ME')
    print(len(orders))
    print(len(class_cust))
    results = pd.merge(orders, class_cust, on = 'CUSTOMER_KEY').drop_duplicates()
    return results

In [3]:
all_results = get_datamart_results((2019,2020))

1078264
144264
630775
143700


In [4]:
results = get_datamart_results((2019,2020),order_type='app')

1078264
144264
629975
143700


In [5]:
len(results)

559716

In [5]:
residents = pd.read_csv('../../Data/Credentialling/RES_ME_LAST_GME_2019.csv')

In [6]:
addresses = pd.read_excel('../../Data/Credentialling/Org_Addresses.xlsx')

In [37]:
results['FULL_DT'] = pd.to_datetime(results['FULL_DT'])

In [48]:
residents[residents['ME'].isin(results['ME'])]

Unnamed: 0,ME
2,00102081382
4,00102090993
7,00102100191
8,00102100719
9,00102101316
...,...
28133,95702110042
28136,95703080023
28138,95707060027
28141,95708910041


In [44]:
results = results[(results.FULL_DT>"05-31-2019")&(results.FULL_DT<"01-01-2020")]

In [16]:
addresses.dtypes

Customer Number      int64
Company Name        object
Attention First     object
Attention Last      object
Street 1ST          object
Street 2ND          object
Street 3RD         float64
City                object
State               object
Zip Code            object
Phone Number        object
Email Address       object
dtype: object

In [24]:
results['Customer Number']=list(results['CUSTOMER_NBR'].astype(float).astype(int))
with_adds = pd.merge(results, addresses, on='Customer Number')

In [9]:
residents['ME']=fix_me(residents['ME'])

In [49]:
res_prof = results[results['ME'].isin(residents['ME'])]

In [50]:
res_prof

Unnamed: 0,FULL_DT,ME,PARTY_ID,ORDER_NBR,ORDER_PRODUCT_ID,ORDER_PHYSICIAN_HIST_KEY,CUSTOMER_KEY,CUSTOMER_NBR,CUSTOMER_ISELL_LOGIN,CUSTOMER_NAME,CUSTOMER_TYPE_DESC,CUSTOMER_TYPE,CUSTOMER_CATEGORY_DESC,Customer Number
10,2019-07-30,01002131893,5601364,41937867,4915513,5842639,61794,000002100814,05HP07749,Kaiser Foundation Hospital,Eprofile Webstore Customer,Eprofile,Hospital,2100814
12,2019-07-30,03843161382,6444640,41937867,4915513,6525440,61794,000002100814,05HP07749,Kaiser Foundation Hospital,Eprofile Webstore Customer,Eprofile,Hospital,2100814
79,2019-06-03,01002111272,4898163,41397849,4915513,6074292,52350,000002080063,47HP07285,Baptist Memorial Health Care Corporation,Eprofile Webstore Customer,Eprofile,Hospital,2080063
93,2019-08-07,05501150110,6167917,41974732,4915513,6535248,52350,000002080063,47HP07285,Baptist Memorial Health Care Corporation,Eprofile Webstore Customer,Eprofile,Hospital,2080063
103,2019-06-21,67204070048,4946730,41788740,4915513,6971535,52350,000002080063,47HP07285,Baptist Memorial Health Care Corporation,Eprofile Webstore Customer,Eprofile,Hospital,2080063
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
615174,2019-11-01,91515080045,4505147,42471902,4915513,7382304,55864,000002086212,41HR60061,Primary Health Network,Eprofile Webstore Customer,Eprofile,Group,2086212
615178,2019-11-14,03843150658,6101063,42502144,4915513,7385128,55124,000002194823,,Stafford Hospital,Eprofile Webstore Customer,Eprofile,Hospital,2194823
615182,2019-11-27,05404151080,6167792,42557247,4915513,7419097,53148,000002099049,05AM50421,Truckee Surgery Center,Eprofile Webstore Customer,Eprofile,Ambulatory Care,2099049
615190,2019-11-05,01003110407,4898252,42478670,4915513,7462471,57521,000002099989,33AM64476,Northern Monmouth Regional Surgery Center,Eprofile Webstore Customer,Eprofile,Ambulatory Care,2099989


In [26]:
with_adds = with_adds[['FULL_DT', 'ME', 'CUSTOMER_CATEGORY_DESC', 'CUSTOMER_NAME', 'CUSTOMER_NBR', 'Company Name', 'Street 1ST',
       'Street 2ND', 'Street 3RD', 'City', 'State', 'Zip Code']]

In [29]:
cust = res_prof.drop_duplicates(['Company Name', 'Zip Code'])
cust[cust['CUSTOMER_CATEGORY_DESC'].isin(['Ambulatory Care','Hospital'])].to_csv('../../Data/Credentialling/Resident_Cust.csv', index=False)

In [14]:
ppd = pd.read_csv('../../Data/PPD/ppd_data_20200718.csv')
ppd['ME']=fix_me(ppd['ME'])
ppd.columns

  interactivity=interactivity, compiler=compiler, result=result)


Index(['ME', 'RECORD_ID', 'UPDATE_TYPE', 'ADDRESS_TYPE', 'MAILING_NAME',
       'LAST_NAME', 'FIRST_NAME', 'MIDDLE_NAME', 'SUFFIX', 'MAILING_LINE_1',
       'MAILING_LINE_2', 'CITY', 'STATE', 'ZIP', 'SECTOR', 'CARRIER_ROUTE',
       'ADDRESS_UNDELIVERABLE_FLAG', 'FIPS_COUNTY', 'FIPS_STATE',
       'PRINTER_CONTROL_CODE', 'PC_ZIP', 'PC_SECTOR', 'DELIVERY_POINT_CODE',
       'CHECK_DIGIT', 'PRINTER_CONTROL_CODE_2', 'REGION', 'DIVISION', 'GROUP',
       'TRACT', 'SUFFIX_CENSUS', 'BLOCK_GROUP', 'MSA_POPULATION_SIZE',
       'MICRO_METRO_IND', 'CBSA', 'CBSA_DIV_IND', 'MD_DO_CODE', 'BIRTH_YEAR',
       'BIRTH_CITY', 'BIRTH_STATE', 'BIRTH_COUNTRY', 'GENDER',
       'TELEPHONE_NUMBER', 'PRESUMED_DEAD_FLAG', 'FAX_NUMBER', 'TOP_CD',
       'PE_CD', 'PRIM_SPEC_CD', 'SEC_SPEC_CD', 'MPA_CD', 'PRA_RECIPIENT',
       'PRA_EXP_DT', 'GME_CONF_FLG', 'FROM_DT', 'TO_DT', 'YEAR_IN_PROGRAM',
       'POST_GRADUATE_YEAR', 'GME_SPEC_1', 'GME_SPEC_2', 'TRAINING_TYPE',
       'GME_INST_STATE', 'GME_INST_ID', 'ME

In [52]:
PPD = ppd[['ME', 'LAST_NAME', 'FIRST_NAME', 'MIDDLE_NAME']]

In [53]:
resident_profiles = pd.merge(res_prof,PPD, on='ME', how='left')

In [39]:
res_prof

Unnamed: 0,FULL_DT,ME,CUSTOMER_CATEGORY_DESC,Company Name,Street 1ST,Street 2ND,Street 3RD,City,State,Zip Code
10,2019-07-30,01002131893,Hospital,Kaiser Foundation Hospital,2350 Geary Blvd 3rd Fl,,,San Francisco,CA,941153305
12,2019-07-30,03843161382,Hospital,Kaiser Foundation Hospital,2350 Geary Blvd 3rd Fl,,,San Francisco,CA,941153305
67,2019-03-08,02701131676,Hospital,Baptist Memorial Health Care Corporation,350 N Humphreys Blvd,,,Memphis,TN,381202177
78,2019-06-03,01002111272,Hospital,Baptist Memorial Health Care Corporation,350 N Humphreys Blvd,,,Memphis,TN,381202177
81,2019-05-09,49626110036,Hospital,Baptist Memorial Health Care Corporation,350 N Humphreys Blvd,,,Memphis,TN,381202177
...,...,...,...,...,...,...,...,...,...,...
558686,2020-07-01,13002130844,Health Related,Innovative Health Services,PO Box 778,,,Easton,MD,216018914
558775,2020-07-10,03548100541,Health Related,Clearwater Cardiovascular Consultants,455 Pinellas St Ste 400,,,Clearwater,FL,337563356
558789,2020-06-23,04814151013,Ambulatory Care,Johnson City Eye Surgery Center,110 Med Tech Pkwy Ste 2,,,Johnson City,TN,376044004
558798,2020-07-02,04706151571,Health Related,Mann Eye Institute,18850 S Memorial Dr,,,Humble,TX,773384288


In [61]:
resident_profiles.groupby('CUSTOMER_CATEGORY_DESC').count()[['FULL_DT']]

Unnamed: 0_level_0,FULL_DT
CUSTOMER_CATEGORY_DESC,Unnamed: 1_level_1
Ambulatory Care,2134
Consulting,67
Credit Verification Org,4982
Government,101
Group,2223
Health Related,1941
Hospital,30123
Law,5
Long Term Care,74
Managed Care,9903


In [63]:
resident_profiles.to_excel('../../Data/Credentialling/Residents_Credentialed_2019-2020.xlsx', index=False)

In [62]:
len(resident_profiles)

61229

In [66]:
tada = resident_profiles[resident_profiles.CUSTOMER_CATEGORY_DESC.isin(['Hospital', 'Ambulatory Care'])]

In [46]:
places = pd.read_csv('../../Data/Credentialling/Resident_places.csv')

In [47]:
results

Unnamed: 0,FULL_DT,ME,PARTY_ID,ORDER_NBR,ORDER_PRODUCT_ID,ORDER_PHYSICIAN_HIST_KEY,CUSTOMER_KEY,CUSTOMER_NBR,CUSTOMER_ISELL_LOGIN,CUSTOMER_NAME,CUSTOMER_TYPE_DESC,CUSTOMER_TYPE,CUSTOMER_CATEGORY_DESC,Customer Number
4,2019-08-01,01643960270,1952382,41941203,4915513,5228627,61794,000002100814,05HP07749,Kaiser Foundation Hospital,Eprofile Webstore Customer,Eprofile,Hospital,2100814
6,2019-06-21,04114780044,2416302,41790193,4915513,5573640,61794,000002100814,05HP07749,Kaiser Foundation Hospital,Eprofile Webstore Customer,Eprofile,Hospital,2100814
10,2019-07-30,01002131893,5601364,41937867,4915513,5842639,61794,000002100814,05HP07749,Kaiser Foundation Hospital,Eprofile Webstore Customer,Eprofile,Hospital,2100814
12,2019-07-30,03843161382,6444640,41937867,4915513,6525440,61794,000002100814,05HP07749,Kaiser Foundation Hospital,Eprofile Webstore Customer,Eprofile,Hospital,2100814
14,2019-07-30,03843160939,6378130,41937867,4915513,6996349,61794,000002100814,05HP07749,Kaiser Foundation Hospital,Eprofile Webstore Customer,Eprofile,Hospital,2100814
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
624248,2019-12-30,04114991177,2568702,42691704,4915513,8090969,131125,000003397316,,Physicians Of Monmouth Llc.,Eprofile Webstore Customer,Eprofile,Ambulatory Care,3397316
624249,2019-12-30,03306030743,2946731,42691704,4915513,8402147,131125,000003397316,,Physicians Of Monmouth Llc.,Eprofile Webstore Customer,Eprofile,Ambulatory Care,3397316
624250,2019-12-30,03508040783,2972182,42691704,4915513,8422507,131125,000003397316,,Physicians Of Monmouth Llc.,Eprofile Webstore Customer,Eprofile,Ambulatory Care,3397316
624513,2019-12-30,03845940061,2231053,42692104,4915513,8004883,52093,000002105987,11HP51826,Palm Shores Behavioral Health Center,Eprofile Webstore Customer,Eprofile,Hospital,2105987


In [68]:
RESIDENTS = pd.merge(tada, places, on= 'CUSTOMER_NBR')

In [62]:
places['CUSTOMER_NBR']

0       2100814
1       2080785
2       2085978
3       2080849
4       2080722
         ...   
1924    2085852
1925    2101048
1926    2084600
1927    2686870
1928    2102226
Name: CUSTOMER_NBR, Length: 1929, dtype: int64

In [64]:
resident_profiles.CUSTOMER_NBR = list(resident_profiles['CUSTOMER_NBR'].astype(float).astype(int))

In [72]:
RESIDENTS[['FULL_DT', 'ME', 'PARTY_ID', 'ORDER_PRODUCT_ID',
       'CUSTOMER_CATEGORY_DESC_x', 
       'LAST_NAME', 'FIRST_NAME', 'MIDDLE_NAME',  'NAME',
       'OTHER_NAME', 'NPI', 'LOCATION_ADDRESS', 'LOCATION_ZIP',
       'LOCATION_CITY', 'LOCATION_STATE']].to_csv('../../Data/Credentialling/Resident_Sample_Jun2019-Dec2019.csv', index=False)