In [None]:
import pandas as pd
import requests
import xmltodict
import json
import matplotlib.pyplot as plt
from fuzzywuzzy import fuzz
from dataclasses import dataclass, asdict

In [None]:
@dataclass
class NumberSearch:
    number: int = ''
    version: str = '2.1'
    enumeration_type: any = 'NPI-2'

In [None]:
@dataclass
class Search:
    organization_name: str = ''
    state: str = ''
    city: str = ''
    postal_code: int = ''
    taxonomy_desc: str = ''
    version: str = '2.1'
    enumeration_type: any = 'NPI-2'

In [None]:
def get_results(search):
    url = 'https://npiregistry.cms.hhs.gov/api/?version=2.1'
    parameters = asdict(search)
    response =  requests.get(url, params=parameters)
    results = response.json()
    return results

In [None]:
def parse_results(results, customer_key):
    dict_list = []
    try:
        count = results['result_count']
    except KeyError:
        return dict_list
    if count > 0:
        for result in results['results']:
            npi = result['number']
            name = result['basic']['organization_name']
            if result['other_names']:
                other_name = result['other_names'][0]['organization_name']
            else:
                other_name = "None"
            date = result['basic']['last_updated']
            taxonomy = result['taxonomies'][0]['desc']
            for address in result['addresses']:
                if address['address_purpose']=='MAILING':
                    mailing_address = address['address_1']
                    mailing_zip = address['postal_code']
                    mailing_state = address['state']
                    mailing_city = address['city']
                if address['address_purpose']=='LOCATION':
                    location_address = address['address_1']
                    location_zip = address['postal_code']
                    location_state = address['state']
                    location_city = address['city']
            subpart = result['basic']['organizational_subpart']
            new_dict = {
                'CUSTOMER_KEY':customer_key,
                'NAME':name,
                'OTHER_NAME': other_name,
                'NPI': npi,
                'TAXONOMY':taxonomy,
                'SUBPART':subpart,
                'MAILING_ADDRESS': mailing_address,
                'MAILING_ZIP': mailing_zip,
                'MAILING_CITY': mailing_city,
                'MAILING_STATE': mailing_state,
                'LOCATION_ADDRESS': location_address,
                'LOCATION_ZIP': location_zip,
                'LOCATION_CITY': location_city,
                'LOCATION_STATE': location_state,
                'UPDATED': date
            }
            dict_list.append(new_dict)
    return dict_list

In [None]:
def get_npi_info(results, customer_number):
    result = results['results'][0]
    name = result['basic']['organization_name']
    npi = result['number']
    if result['other_names']:
        other_name = result['other_names'][0]['organization_name']
    else:
        other_name = "None"
    dict_list =[]
    for address in result['addresses']:
        if address['address_purpose']=='MAILING':
            mailing_address = address['address_1']
            mailing_zip = address['postal_code']
            mailing_state = address['state']
            mailing_city = address['city']
        if address['address_purpose']=='LOCATION':
            location_address = address['address_1']
            location_zip = address['postal_code']
            location_state = address['state']
            location_city = address['city']
    new_dict = {
                'CUSTOMER_NUMBER':customer_number,
                'NAME':name,
                'OTHER_NAME': other_name,
                'NPI': npi,
                'MAILING_ADDRESS': mailing_address,
                'MAILING_ZIP': mailing_zip,
                'MAILING_CITY': mailing_city,
                'MAILING_STATE': mailing_state,
                'LOCATION_ADDRESS': location_address,
                'LOCATION_ZIP': location_zip,
                'LOCATION_CITY': location_city,
                'LOCATION_STATE': location_state,
            }
    dict_list.append(new_dict)
    return dict_list

In [None]:
x = NumberSearch(1720182835)
get_npi_info(get_results(x), 55)

In [None]:
import datamart

In [None]:
customers = datamart.get_datamart_results((6),(2020))

In [None]:
customer_list = customers.drop_duplicates('CUSTOMER_NAME')

In [None]:
dict_list =[]
for row in customer_list.itertuples():
    cus_name = row.CUSTOMER_NAME.replace('+','')+'*'
    new_results = get_results(Search(cus_name))
    parsed = parse_results(new_results, row.CUSTOMER_KEY)
    print(parsed)
    if parsed:
        dict_list+=parsed   
    else:
        print(f'No results found')         
    print('---')

In [None]:
index = 0
new_list = []

In [None]:
orgs

In [None]:
orgs = orgs.fillna('None')

In [None]:
for row in orgs[index:].itertuples():
#     print(int(row.NPI))
    print('')
    zipcode = row.Zipcode[0:5]
    name = row.Company_Name.replace('.','')
    new_results = get_results(Search(name, row.State, row.City, zipcode))
    parsed = parse_results(new_results, row.Customer_Number)
    if not parsed:
        print('No results found')
        cus_name = name.replace('+','')+'*'
        new_results = get_results(Search(cus_name, row.State, row.City))
        parsed = parse_results(new_results, row.Customer_Number)
        if not parsed:
            print('ahh')
            new_results = get_results(Search("",row.State, row.City, row.Zipcode))
            parsed = parse_results(new_results, row.Customer_Number)
    new_list.append(parsed)
    if parsed:
        print(F'{row.Company_Name} matched to {parsed[0]["NAME"]}')
    else:
        print(F'{row.Company_Name} needs help')
    index += 1

In [None]:
index

In [None]:
orgs = orgs.fillna('None')

In [None]:
orgs = pd.read_csv("../../Data/Credentialling/Unmatched.csv")
orgs = orgs.rename(columns={'Customer Number': 'Customer_Number','Company Name': 'Company_Name','Zip Code': 'Zipcode'})

In [None]:
orgs.head()

In [None]:
get_results(Search('EASTERN NIAGARA HOSPITAL*', 'NY','LOCKPORT','140943201'))

In [None]:
zz

In [None]:
better_list = []
for thing in new_list:
    if thing:
        for other_thing in thing:
            better_list.append(other_thing)

In [None]:
matched = pd.DataFrame(better_list)

In [None]:
len(orgs)

In [None]:
matched_customers = list(matched['CUSTOMER_KEY'])

In [None]:
orgs[orgs['Customer_Number'].isin(matched_customers)==False]

In [None]:
orgs[:9456]

In [None]:
matched.to_csv('../../Data/Credentialling/NPI_Customer_Address_Match.csv', index=False)

In [None]:
len(new_list)

In [None]:
new_list

In [None]:
other_list = []
for thing in new_list:
    for thing_2 in thing:
        other_list.append(thing_2)

In [None]:
new_matched = pd.DataFrame(other_list)

In [None]:
one_more_time = orgs[orgs.Customer_Number.isin(new_matched['CUSTOMER_KEY'])==False]

In [None]:
one_more_time

In [None]:
listy =[]

In [None]:
for row in one_more_time.itertuples():
#     print(int(row.NPI))
    print('')
    zipcode = row.Zipcode[0:5]
    name = row.Company_Name.replace('.','')
    last_word = name.split(' ')[-1]
    name = name.replace(' '+last_word,'')+'*'
    new_results = get_results(Search(name, row.State, row.City, zipcode))
    parsed = parse_results(new_results, row.Customer_Number)
    if not parsed:
        print('No results found')
    listy+=parsed
    if parsed:
        print(F'{row.Company_Name} matched to {parsed[0]["NAME"]}')
    else:
        print(F'{row.Company_Name} needs help')

In [None]:
philly = pd.read_csv('../../Data/Credentialling/Philly_Place.csv')

In [None]:
x_list = []
bopp = []
for row in philly[12:].itertuples():
    print(' ')
    print(' ')
    print(row.CUSTOMER_NAME)
    x = Search(row.CUSTOMER_NAME+'*', row.State,row.City,row.Short_Zip, 'GENERAL ACUTE*')
    x_list.append(x)
    bopp+=parse_results(get_results(x), row.CUSTOMER_NAME)

In [None]:
number = 89
x_list[number]

In [None]:
# x_list[number].organization_name='best impression*'
x_list[number].postal_code='08103'
x_list[number].taxonomy_desc=''
# x_list[number].city='Philadelphia'
this = x_list[number]
get_results(this)

In [None]:
len(listy)

In [None]:
other_list+=listy

In [None]:
new_matched.drop_duplicates('CUSTOMER_KEY')

In [None]:
orgs_2 = pd.read_csv("../../Data/Credentialling/NPI_Customer_Address_Match.csv")

In [None]:
all_things = pd.concat([orgs_2, new_matched])

In [None]:
solos = all_things.drop_duplicates('CUSTOMER_KEY', keep=False)
dupos = all_things[all_things.CUSTOMER_KEY.isin(solos.CUSTOMER_KEY)==False]

In [None]:
dupos[dupos.CUSTOMER_KEY==2106006]

In [None]:
org_addresses = pd.read_excel('../../Data/Credentialling/Org_Addresses.xlsx')

In [None]:
customers = pd.read_csv('../../Data/Credentialling/Customers.csv')

In [None]:
cust_list

In [None]:
try_1 = ATTEMPTS[ATTEMPTS.CUSTOMER_NBR==1051645][['CUSTOMER_NAME',
       'CUSTOMER_CATEGORY_DESC',
       'Street 1ST', 'City', 'State', 'Zip Code',
       'NAME',
       'OTHER_NAME', 'NPI', 'TAXONOMY', 'SUBPART', 'MAILING_ADDRESS',
       'MAILING_ZIP', 'MAILING_CITY', 'MAILING_STATE', 'LOCATION_ADDRESS',
       'LOCATION_ZIP', 'LOCATION_CITY', 'LOCATION_STATE', 'UPDATED']]

In [None]:
CUSTOMER = pd.merge(customers, org_addresses, left_on='CUSTOMER_NBR',right_on='Customer Number')

In [None]:
ATTEMPTS = pd.merge(CUSTOMER, dupos, left_on='CUSTOMER_NBR', right_on='CUSTOMER_KEY', suffixes = ['','_NBR'])

In [None]:
cust_list = list(ATTEMPTS.CUSTOMER_NBR.unique())

In [None]:
ATTEMPTS = ATTEMPTS.sort_values('UPDATED', ascending=False)

In [None]:
listie = []
for number in cust_list:
    print('')
    new_df = ATTEMPTS[ATTEMPTS.CUSTOMER_NBR==number]
    new_df = new_df.drop_duplicates(subset = ['NPI'])
    print(len(new_df))
    if list(new_df[0:1]['CUSTOMER_CATEGORY_DESC'])[0]=='Hospital':
        new_df = new_df[new_df.TAXONOMY=='GENERAL_ACUTE_CARE_HOSPITAL']
    print(len(new_df))
    if len(new_df)>1:
        new_df = new_df[new_df.SUBPART=='NO']
    if len(new_df)>0:
        new_dict ={
            'CUSTOMER_NBR': number,
            'NPI': list(new_df[0:1]['NPI'])[0]
        }
        listie.append(new_dict)

In [None]:
len(listie)

In [None]:
zee = pd.DataFrame(listie)

In [None]:
EZZ = pd.merge(zee, dupos, left_on=['CUSTOMER_NBR','NPI'], right_on=['CUSTOMER_KEY','NPI'])

In [None]:
hope = pd.concat([solos, EZZ])

In [None]:
LETSGO = pd.merge(CUSTOMER, hope, left_on='CUSTOMER_NBR', right_on='CUSTOMER_KEY', suffixes = ['','_NBR'])

In [None]:
LETSGO.to_csv('../../Data/Credentialling/Matched_Customers.csv',index=False)

In [None]:
pd.DataFrame(bopp)[['CUSTOMER_KEY', 'NAME', 'OTHER_NAME', 'NPI', 'TAXONOMY', 'SUBPART',
       'MAILING_ADDRESS',  'LOCATION_ADDRESS', 'MAILING_CITY', 'MAILING_STATE',
         'LOCATION_CITY', 'LOCATION_STATE']][134:]

In [None]:
len(pd.DataFrame(bopp))

In [None]:
customers = pd.read_csv("../../Data/Credentialling/Resident_cust.csv")

In [None]:
orgs = customers.rename(columns={'Customer Number': 'Customer_Number','Company Name': 'Company_Name','Zip Code': 'Zipcode'})

In [None]:
len(orgs)

In [None]:
index = 0

In [None]:
new_list =[]

In [None]:
orgs = orgs.fillna('None')

In [None]:
for row in orgs[index:].itertuples():
#     print(int(row.NPI))
    print('')
    zipcode = str(row.Zipcode)[0:5]
    name = row.Company_Name.replace('.','')
    new_results = get_results(Search(name, row.State, row.City, zipcode))
    parsed = parse_results(new_results, row.CUSTOMER_NBR)
    if not parsed:
        print('No results found')
        cus_name = name.replace('+','')+'*'
        new_results = get_results(Search(cus_name, row.State, row.City))
        parsed = parse_results(new_results, row.CUSTOMER_NBR)
        if not parsed:
            print('ahh')
            new_results = get_results(Search("",row.State, row.City, row.Zipcode))
            parsed = parse_results(new_results, row.CUSTOMER_NBR)
    new_list.append(parsed)
    if parsed:
        print(F'{row.Company_Name} matched to {parsed[0]["NAME"]}')
    else:
        print(F'{row.Company_Name} needs help')
    index += 1

In [None]:
len(new_list)

In [None]:
listy=[]
for thing in new_list:
    listy+=thing

In [None]:
XYZ = pd.merge(pd.DataFrame(listy), orgs, left_on='CUSTOMER_KEY', right_on='CUSTOMER_NBR')

In [None]:
XYZ[['MAILING_ADDRESS', 'LOCATION_ADDRESS','Street 1ST', 'Zipcode','LOCATION_ZIP']][0:10]

In [None]:
def check_match(thing_1, thing_2):
    '''match check'''
    if thing_1 == thing_2:
        return 1
    elif thing_1 in thing_2 or thing_2 in thing_1:
        return 1
    elif fuzz.ratio(thing_1, thing_2) > 70:
        return 1
    else:
        return 0

In [None]:
Z='VICTO'
Z[0:4]

In [None]:
dict_list = []
for row in XYZ.itertuples():
    zip_match = 0
    if check_match(row.NAME,row.CUSTOMER_NAME.upper()) == 1:
        name_match = 1
    else:
        name_match = check_match(row.OTHER_NAME, row.CUSTOMER_NAME.upper())
    if check_match(row.LOCATION_ADDRESS, row.street_1.upper()) == 1:
        add_match = 1
    else:
        add_match = check_match(row.MAILING_ADDRESS, row.street_1.upper())
    if row.Zipcode == row.LOCATION_ZIP or str(row.Zipcode)[0:5] == str(row.LOCATION_ZIP)[0:5]:
        zip_match = 1
    new_dict = {
        'NPI':row.NPI,
        'CUSTOMER_NBR': row.CUSTOMER_NBR,
        'NAME_MATCH': name_match,
        'ADD_MATCH': add_match,
        'ZIP_MATCH': zip_match
    }
    dict_list.append(new_dict)

In [None]:
matches = pd.DataFrame(dict_list)

In [None]:
tutu = pd.merge(XYZ, matches, on=['CUSTOMER_NBR','NPI'])

In [None]:
YES = tutu[(tutu.NAME_MATCH==1)&(tutu.ZIP_MATCH==1)|(tutu.NAME_MATCH==1)&(tutu.ADD_MATCH==1)]

In [None]:
yess = YES.drop_duplicates('CUSTOMER_NBR', keep=False)

In [None]:
duplicado = YES[YES.CUSTOMER_NBR.isin(yess.CUSTOMER_NBR)==False]

In [None]:
doubles = duplicado.drop_duplicates('CUSTOMER_NBR')['CUSTOMER_NBR']

In [None]:
doubles

In [None]:
NEW_LIST =[]
df = pd.DataFrame()
for double in doubles:
    print('')
    print(double)
    new_df = duplicado[duplicado.CUSTOMER_NBR == double]
    print(len(new_df))
    new_df_2 = new_df[new_df.TAXONOMY == 'General Acute Care Hospital']
    if len(new_df_2)== 0:
        new_df_2 = new_df[(new_df.ADD_MATCH == 1)& (new_df.ZIP_MATCH==1)]
        if len(new_df_2)== 0:
            new_df_2 = new_df[new_df.SUBPART == 'NO']
            if len(new_df_2)==0:
                print('wtf')
                continue
    if len(new_df_2) >1:
        new_df_2 = new_df_2[(new_df_2.ADD_MATCH == 1)& (new_df_2.ZIP_MATCH==1)]
        if len(new_df_2)> 1:
                new_df_2 = new_df_2[new_df_2.SUBPART == 'NO']
                if len(new_df_2)>1:
                    print('wtf 2')
                    print(len(new_df_2))
                    df = pd.concat([df, new_df_2])
    if len(new_df_2) == 1:
        print('MATCH!')
        npi = new_df_2.iloc[0]['NPI']
        NEW_DICT = {
            'NPI': npi,
            'CUSTOMER_NBR': double
        }
        NEW_LIST.append(NEW_DICT)

In [None]:
len(NEW_LIST)

In [None]:
len(doubles)

In [None]:
duplicado[['NAME', 'OTHER_NAME', 'NPI', 'TAXONOMY', 'SUBPART',
       'MAILING_ADDRESS', 'MAILING_ZIP', 
       'LOCATION_ADDRESS', 'LOCATION_ZIP', 
       'UPDATED', 'CUSTOMER_NAME',
       'CUSTOMER_NBR', 'Zipcode', 'street_1', 'NAME_MATCH',
       'ADD_MATCH', 'ZIP_MATCH']]

In [None]:
df

In [None]:
df_1 = df.sort_values('UPDATED').drop_duplicates('CUSTOMER_NBR', keep='last')

In [None]:
pd.DataFrame(NEW_LIST)['NPI']

In [None]:
yess['NPI']

In [None]:
NEEPS= list(df_1['NPI']) + list(yess['NPI']) + list(pd.DataFrame(NEW_LIST)['NPI'])

In [None]:
matches

In [None]:
tutu[tutu.NPI.isin(NEEPS)][['CUSTOMER_KEY', 'NAME', 'OTHER_NAME', 'NPI', 'TAXONOMY',
       'LOCATION_ADDRESS', 'LOCATION_ZIP', 'LOCATION_CITY', 'LOCATION_STATE',
       'CUSTOMER_CATEGORY_DESC', 'CUSTOMER_NAME',
       'CUSTOMER_NBR']].to_csv('../../Data/Credentialling/Resident_places.csv', index=False)

In [None]:
orgs