# Parse data from Google Sheet for tab "normalized-cms"

## Doing
 - [ ] Merge Provider info with IL Facility CSV 
## To Do's
 - [ ] Check Normalized names in spreadsheet, seems not all names Normailized ex: "ACCOLADE HEALTHCARE" and "Accolade Healthcare"
 - [ ] Print list of names that would match if Normailized - Try for each matched Fed ID - check Normalized Names, Facility Name and do lookup to see if matching Facility Names with different Facility Normailized
 - [ ] Lookup from CMS data feed as opposed to the spreadsheet VLOOKUPs
 - [ ] Rename columns to prefix with data source: CTP_LTC_ and CMS_
 - [ ] define and add data quality checks on feeds (ex dup checks if not allowed, etc)
 - [ ] Compare CMS stats just b4 Covid and now
 
## Done
 - [x] Get Counts, Sums and Means by Ownership type and State to see if any State way better/worse based on Ownership Type
 - [x] Get Counts, Sums and Means by Ownership type
 - [x] Join CMS data to State LTC data into DF for Provider Info
 - [x] Create Facility Name to Provider Number for State to CMS mapping.
 - [x] Add "Facility Normalize Name" - which is the name used to match in CMS feed to get Federal Provider Number
 - [x] Check for more than one "Normalized Name" for the same "Facility Name"

In [1]:
from gsheets import Sheets

import json
import pandas as pd
pd.options.display.max_columns = None

# Define Functions

In [2]:
def parse_noarmalized_cms_sheet (df):
    """ # Create DFs for:
        #   1 - no_matches: Lists of "Factility Names" and "Facility Names Normalized" that do not have a matching CMS entry in the normalized-cms sheet
        #   2 - dupNames: Dictionary of "Federal Provider Numbers" that map to list where there are more than one "Facility Name"
        #   3 - CMSID2FacName: Dictionary mapping "Facility Names" to "Federal Provider Numbers"
        #   4 - FacName2CMSId: Dictionary mapping "Federal Provider Numbers" to "Facility Names"
    """
    FacName2CMSId = {}
    CMSID2FacName = {} # Initialize No Match to empty list so can always use extend for this key
    no_matches = {'Facility Names': [],
                  'Facility Names Normalized': []} # List of "Facility Names" without matches in CMS data feed
    dupNames = {} # Dictionary of "Federal Provider Numbers" that have more than one "Facility Name" 
                  #  (ex: 146182': ['Alden Courts of Waterford', 'Alden of Waterford'],
                  #      '146186': ['Alden Estates Courts of Huntley',
                  #                 'Alden Estates-Courts of Huntley'],
                  #       '145872': ['Alden Long Grove Rehab & Health',
                  #                  'Alden of Long Grove Rehab and HCC'], )

    # For Each Federal Provder Number (CMS term) in Provide ID - CMS (CTP term)
    for Fed_Prov_Num in df['Provider ID - CMS'].unique():
        # Since the gsheets api appends comments to the cell value (which for the record can be a PAIN and POTENTIAL PROBLEM)
        #    We change the value to a more understandable "No Match"
        if str(Fed_Prov_Num)[0:4]== "#N/A":
            no_matches['Facility Names'].extend(list(df[df['Provider ID - CMS'] == Fed_Prov_Num]['Facility Name'].unique()))
            no_matches['Facility Names Normalized'].extend(list(df[df['Provider ID - CMS'] == Fed_Prov_Num]['Facility Name Normalized'].unique()))
        else:
            CMSID2FacName[Fed_Prov_Num] = list(df[df['Provider ID - CMS'] == Fed_Prov_Num]['county-facName'].dropna().unique())
            if len(CMSID2FacName[Fed_Prov_Num]) > 1:
                dupNames[Fed_Prov_Num] = CMSID2FacName[Fed_Prov_Num]

            for facilityName in CMSID2FacName[Fed_Prov_Num]:
                FacName2CMSId[facilityName] = Fed_Prov_Num

    no_matches['Facility Names'] = list(dict.fromkeys(no_matches['Facility Names']))
    no_matches['Facility Names Normalized'] = list(dict.fromkeys(no_matches['Facility Names Normalized']))
    return FacName2CMSId, CMSID2FacName, no_matches, dupNames

# Get Info From SpreadSheet

In [3]:
sheets = Sheets.from_files('credentials.json', '~/storage.json')

In [4]:
url = 'https://docs.google.com/spreadsheets/d/14V12Hmx5B3BXsEBklYzDVC3mbnRDPSqJdsXHdzmO6yI'
s = sheets.get(url)

In [17]:
ns_sheet = s.find('normalized-state')
norm_state_df = ns_sheet.to_frame()
norm_state_df = norm_state_df.assign(FacilityNameNormalizedUCase = lambda x: (x['Facility Name'].str.upper()))
norm_state_df['county-facName']= norm_state_df['County'].str.upper() + '-' +norm_state_df['Facility Name'].str.upper()
[FacName2CMSId, CMSID2FacName, no_matches, dupNames] = parse_noarmalized_cms_sheet(norm_state_df)

with open("IL_FacilityName_to_CMS_ID.json", "w") as outfile:  
    json.dump(FacName2CMSId, outfile) 

In [18]:
# Get Quick Stats on number of names
print("Unique Facility Name count: " + str(len(norm_state_df['Facility Name'].unique())))
print("Unique Facility Name UPPER count: " + str(len(norm_state_df['Facility Name'].str.upper().unique())))
print("Unique Facility Normalized count: " + str(len(norm_state_df['Facility Name Normalized'].unique())))
print("Unique Facility Normalized UPPER count: " + str(len(norm_state_df['Facility Name Normalized'].str.upper().unique())))
print("Facilites with no match in CMS: ", len(no_matches['Facility Names']))
print("Number of CMS entries matched to Federal Provider Number: ", len(CMSID2FacName))
print('"Federal Provider Numbers" that have more than one "Facility Name": ', len(dupNames))

Unique Facility Name count: 1214
Unique Facility Name UPPER count: 1201
Unique Facility Normalized count: 2077
Unique Facility Normalized UPPER count: 1282
Facilites with no match in CMS:  726
Number of CMS entries matched to Federal Provider Number:  541
"Federal Provider Numbers" that have more than one "Facility Name":  26


# Load CMS data

In [19]:
# added engine='python' because c engine does not support skipfooter
#cms_df = pd.read_csv('../CMS NursingHome Data/2013-07-01/ProviderInfo.csv', skipfooter=1, engine='python')
cms_df = pd.read_csv('../CMS NursingHome Data/NH_ProviderInfo_2020-10-30.csv', skipfooter=1, engine='python')
cms_df

Unnamed: 0,Federal Provider Number,Provider Name,Provider Address,Provider City,Provider State,Provider Zip Code,Provider Phone Number,Provider SSA County Code,Provider County Name,Ownership Type,Number of Certified Beds,Average Number of Residents per Day,Average Number of Residents per Day Footnote,Provider Type,Provider Resides in Hospital,Legal Business Name,Date First Approved to Provide Medicare and Medicaid Services,Continuing Care Retirement Community,Special Focus Status,Abuse Icon,Most Recent Health Inspection More Than 2 Years Ago,Provider Changed Ownership in Last 12 Months,With a Resident and Family Council,Automatic Sprinkler Systems in All Required Areas,Overall Rating,Overall Rating Footnote,Health Inspection Rating,Health Inspection Rating Footnote,QM Rating,QM Rating Footnote,Long-Stay QM Rating,Long-Stay QM Rating Footnote,Short-Stay QM Rating,Short-Stay QM Rating Footnote,Staffing Rating,Staffing Rating Footnote,RN Staffing Rating,RN Staffing Rating Footnote,Reported Staffing Footnote,Physical Therapist Staffing Footnote,Reported Nurse Aide Staffing Hours per Resident per Day,Reported LPN Staffing Hours per Resident per Day,Reported RN Staffing Hours per Resident per Day,Reported Licensed Staffing Hours per Resident per Day,Reported Total Nurse Staffing Hours per Resident per Day,Reported Physical Therapist Staffing Hours per Resident Per Day,Case-Mix Nurse Aide Staffing Hours per Resident per Day,Case-Mix LPN Staffing Hours per Resident per Day,Case-Mix RN Staffing Hours per Resident per Day,Case-Mix Total Nurse Staffing Hours per Resident per Day,Adjusted Nurse Aide Staffing Hours per Resident per Day,Adjusted LPN Staffing Hours per Resident per Day,Adjusted RN Staffing Hours per Resident per Day,Adjusted Total Nurse Staffing Hours per Resident per Day,Rating Cycle 1 Standard Survey Health Date,Rating Cycle 1 Total Number of Health Deficiencies,Rating Cycle 1 Number of Standard Health Deficiencies,Rating Cycle 1 Number of Complaint Health Deficiencies,Rating Cycle 1 Health Deficiency Score,Rating Cycle 1 Number of Health Revisits,Rating Cycle 1 Health Revisit Score,Rating Cycle 1 Total Health Score,Rating Cycle 2 Standard Health Survey Date,Rating Cycle 2 Total Number of Health Deficiencies,Rating Cycle 2 Number of Standard Health Deficiencies,Rating Cycle 2 Number of Complaint Health Deficiencies,Rating Cycle 2 Health Deficiency Score,Rating Cycle 2 Number of Health Revisits,Rating Cycle 2 Health Revisit Score,Rating Cycle 2 Total Health Score,Rating Cycle 3 Standard Health Survey Date,Rating Cycle 3 Total Number of Health Deficiencies,Rating Cycle 3 Number of Standard Health Deficiencies,Rating Cycle 3 Number of Complaint Health Deficiencies,Rating Cycle 3 Health Deficiency Score,Rating Cycle 3 Number of Health Revisits,Rating Cycle 3 Health Revisit Score,Rating Cycle 3 Total Health Score,Total Weighted Health Survey Score,Number of Facility Reported Incidents,Number of Substantiated Complaints,Number of Fines,Total Amount of Fines in Dollars,Number of Payment Denials,Total Number of Penalties,Location,Processing Date
0,015009,"BURNS NURSING HOME, INC.",701 MONROE STREET NW,RUSSELLVILLE,AL,35653,2563324110,290,Franklin,For profit - Corporation,57,44.9,,Medicare and Medicaid,N,"BURNS NURSING HOME, INC.",1969-09-01,N,,N,N,N,Both,Yes,5.0,,5.0,,5.0,,5.0,,5.0,,5.0,,5.0,,,,2.73869,0.96561,0.91573,1.88134,4.62003,0.00000,2.16170,0.70657,0.32721,3.19548,2.61545,1.00799,1.08075,4.60948,2019-08-21,2,2,0,8,1,0,8,2018-08-01,1,1,0,4,1,0,4,2017-06-22,0,0,0,0,0,0,0,5.333,0,0,0,0,0,0,"701 MONROE STREET NW,RUSSELLVILLE,AL,35653",2020-10-01
1,015010,COOSA VALLEY HEALTHCARE CENTER,260 WEST WALNUT STREET,SYLACAUGA,AL,35150,2562495604,600,Talladega,For profit - Corporation,85,76.1,,Medicare and Medicaid,N,COOSA VALLEY HEALTHCARE CENTER LLC,1967-01-01,N,,N,N,N,Both,Yes,4.0,,3.0,,4.0,,2.0,,5.0,,5.0,,5.0,,,,3.04618,0.96901,0.89255,1.86156,4.90774,0.01443,1.99014,0.69526,0.27881,2.96421,3.15990,1.02799,1.23628,5.27858,2019-06-13,1,1,0,4,1,0,4,2018-06-07,4,4,0,32,1,0,32,2017-04-06,7,7,0,36,1,0,36,18.667,0,0,0,0,0,0,"260 WEST WALNUT STREET,SYLACAUGA,AL,35150",2020-10-01
2,015012,HIGHLANDS HEALTH AND REHAB,380 WOODS COVE ROAD,SCOTTSBORO,AL,35768,2562183708,350,Jackson,Government - County,50,41.0,,Medicare and Medicaid,Y,JACKSON COUNTY HEALTH CARE AUTHORITY,1967-01-01,N,,N,N,N,Resident,Yes,3.0,,2.0,,2.0,,1.0,,3.0,,5.0,,5.0,,,,3.37264,0.63314,1.15569,1.78883,5.16147,0.05686,2.15006,0.72880,0.34471,3.22356,3.23832,0.64077,1.29471,5.10482,2019-06-06,2,2,0,20,1,0,20,2018-05-03,4,4,0,40,1,0,40,2017-03-16,5,5,0,44,1,0,44,30.667,0,0,0,0,0,0,"380 WOODS COVE ROAD,SCOTTSBORO,AL,35768",2020-10-01
3,015014,EASTVIEW REHABILITATION & HEALTHCARE CENTER,7755 FOURTH AVENUE SOUTH,BIRMINGHAM,AL,35206,2058330146,360,Jefferson,For profit - Individual,92,79.6,,Medicare and Medicaid,N,BALL HEALTHCARE EASTVIEW INC,1967-01-01,N,,N,N,N,Both,Yes,3.0,,4.0,,1.0,,3.0,,1.0,,4.0,,4.0,,,,2.32558,0.83059,0.59595,1.42654,3.75213,0.00667,1.94321,0.65408,0.28313,2.88042,2.47065,0.93663,0.81286,4.15304,2020-02-20,1,1,0,4,1,0,4,2019-01-24,2,2,0,20,1,0,20,2018-01-25,6,6,0,24,1,0,24,12.667,0,0,0,0,0,0,"7755 FOURTH AVENUE SOUTH,BIRMINGHAM,AL,35206",2020-10-01
4,015015,PLANTATION MANOR NURSING HOME,6450 OLD TUSCALOOSA HIGHWAY P O BOX 97,MC CALLA,AL,35111,2054776161,360,Jefferson,For profit - Individual,103,84.0,,Medicare and Medicaid,N,"C & G HEALTHCARE SERVICES, INC.",1971-07-01,N,,N,N,N,Resident,Yes,4.0,,4.0,,2.0,,5.0,,1.0,,2.0,,2.0,,,,2.16341,0.89929,0.34000,1.23928,3.40269,0.00330,1.86332,0.63314,0.26047,2.75693,2.39690,1.04763,0.50409,3.93496,2019-05-02,1,1,0,4,1,0,4,2018-04-19,7,7,0,28,1,0,28,2017-03-09,2,2,0,16,1,0,16,14.000,0,0,2,29611,0,2,"6450 OLD TUSCALOOSA HIGHWAY P O BOX 97,MC CA...",2020-10-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15345,676482,CYPRESS POINTE HEALTH & WELLNESS,8561 EASTON COMMONS DR.,HOUSTON,TX,77095,6512467534,610,Harris,For profit - Corporation,124,15.0,,Medicare and Medicaid,N,ML - EASTON COMMONS LLC,2020-04-02,N,,N,,N,,Yes,,1.0,,1.0,,1.0,,1.0,,1.0,,1.0,,1.0,,,2.96445,2.83215,0.82432,3.65647,6.62092,0.02771,2.01513,0.84378,0.46780,3.32671,,,,,,.,.,.,.,.,.,.,,.,.,.,.,.,.,.,,.,.,.,.,.,.,.,,0,0,0,0,0,0,"8561 EASTON COMMONS DR.,HOUSTON,TX,77095",2020-10-01
15346,676483,VENTANA BY BUCKNER,8301 N. CENTRAL EXPRESSWAY,DALLAS,TX,75201,2147588031,390,Dallas,Non profit - Corporation,72,,10.0,Medicare and Medicaid,N,"BUCKNER SENIOR LIVING, INC.",2020-03-17,N,,N,,N,,Yes,,1.0,,1.0,,1.0,,1.0,,1.0,,1.0,,1.0,6.0,6.0,,,,,,,,,,,,,,,,.,.,.,.,.,.,.,,.,.,.,.,.,.,.,,.,.,.,.,.,.,.,,0,0,0,0,0,0,"8301 N. CENTRAL EXPRESSWAY,DALLAS,TX,75201",2020-10-01
15347,676485,PRINCETON MEDICAL LODGE,1401 W. PRINCETON DR.,PRINCETON,TX,75407,9727342100,310,Collin,For profit - Corporation,138,,10.0,Medicare and Medicaid,N,FOURSQUARE TEXAS 16 LLC,2020-06-11,N,,N,,N,,Yes,,1.0,,1.0,,1.0,,1.0,,1.0,,1.0,,1.0,6.0,6.0,,,,,,,,,,,,,,,,.,.,.,.,.,.,.,,.,.,.,.,.,.,.,,.,.,.,.,.,.,.,,0,0,0,0,0,0,"1401 W. PRINCETON DR.,PRINCETON,TX,75407",2020-10-01
15348,686123,KENDALL LAKES HEALTH AND REHABILITATION CENTER,5280 SW 157 AVENUE,MIAMI,FL,33185,7864337400,120,Miami-Dade,For profit - Corporation,150,131.5,,Medicare and Medicaid,N,LA MER NH LLC,2019-05-29,N,,N,N,N,Resident,Yes,,1.0,,1.0,,1.0,,1.0,,1.0,,1.0,,1.0,,,2.64694,0.50230,1.23487,1.73717,4.38411,0.04293,2.21322,0.75769,0.35521,3.32611,,,,,2019-05-29,.,.,.,.,.,.,.,,.,.,.,.,.,.,.,,.,.,.,.,.,.,.,,0,0,0,0,0,0,"5280 SW 157 AVENUE,MIAMI,FL,33185",2020-10-01


In [20]:
merged_df = pd.merge(norm_state_df, cms_df, left_on='Provider ID - CMS',  right_on='Federal Provider Number', how='left')
merged_df

Unnamed: 0,Facility Name,Facility Name Normalized,VLOOKUP,Provider ID - CMS,Address - CMS,City - CMS,Overall Star Ratings CMS,Health Inspection Rating_x,QM Rating_x,Staffing Rating_x,Owner type,Owner name,Organization Type,Provider Type_x,Legal Business Name_x,Changes,Date Collected,State,County,City,State Facility Type,CTP Facility Categorization,State/Fed Regulated,State Facility ID,CMS Facility ID,Date outbreak opened,Date outreak closed,Outbreak Status,Resident Census,Resident Positives,Resident Probable Positives,Resident Deaths,Resident Probable Deaths,Staff Positive,Staff Probable Positives,Staff Deaths,Staff Probable Deaths,Resident/Staff Positives,Resident/Staff Probable Positives,Resident/Staff Deaths,Resident/Staff Probable Deaths,Resident Positives.1,Resident Probable Positives.1,Resident Deaths.1,Resident Probable Deaths.1,Staff Positive.1,Staff Probable Positives.1,Staff Deaths.1,Staff Probable Deaths.1,Resident/Staff Positives.1,Resident/Staff Probable Positives.1,Resident/Staff Deaths.1,Resident/Staff Probable Deaths.1,Residents Tested,Staff Tested,Personal Protective Equipment,FacilityNameNormalizedUCase,county-facName,Federal Provider Number,Provider Name,Provider Address,Provider City,Provider State,Provider Zip Code,Provider Phone Number,Provider SSA County Code,Provider County Name,Ownership Type,Number of Certified Beds,Average Number of Residents per Day,Average Number of Residents per Day Footnote,Provider Type_y,Provider Resides in Hospital,Legal Business Name_y,Date First Approved to Provide Medicare and Medicaid Services,Continuing Care Retirement Community,Special Focus Status,Abuse Icon,Most Recent Health Inspection More Than 2 Years Ago,Provider Changed Ownership in Last 12 Months,With a Resident and Family Council,Automatic Sprinkler Systems in All Required Areas,Overall Rating,Overall Rating Footnote,Health Inspection Rating_y,Health Inspection Rating Footnote,QM Rating_y,QM Rating Footnote,Long-Stay QM Rating,Long-Stay QM Rating Footnote,Short-Stay QM Rating,Short-Stay QM Rating Footnote,Staffing Rating_y,Staffing Rating Footnote,RN Staffing Rating,RN Staffing Rating Footnote,Reported Staffing Footnote,Physical Therapist Staffing Footnote,Reported Nurse Aide Staffing Hours per Resident per Day,Reported LPN Staffing Hours per Resident per Day,Reported RN Staffing Hours per Resident per Day,Reported Licensed Staffing Hours per Resident per Day,Reported Total Nurse Staffing Hours per Resident per Day,Reported Physical Therapist Staffing Hours per Resident Per Day,Case-Mix Nurse Aide Staffing Hours per Resident per Day,Case-Mix LPN Staffing Hours per Resident per Day,Case-Mix RN Staffing Hours per Resident per Day,Case-Mix Total Nurse Staffing Hours per Resident per Day,Adjusted Nurse Aide Staffing Hours per Resident per Day,Adjusted LPN Staffing Hours per Resident per Day,Adjusted RN Staffing Hours per Resident per Day,Adjusted Total Nurse Staffing Hours per Resident per Day,Rating Cycle 1 Standard Survey Health Date,Rating Cycle 1 Total Number of Health Deficiencies,Rating Cycle 1 Number of Standard Health Deficiencies,Rating Cycle 1 Number of Complaint Health Deficiencies,Rating Cycle 1 Health Deficiency Score,Rating Cycle 1 Number of Health Revisits,Rating Cycle 1 Health Revisit Score,Rating Cycle 1 Total Health Score,Rating Cycle 2 Standard Health Survey Date,Rating Cycle 2 Total Number of Health Deficiencies,Rating Cycle 2 Number of Standard Health Deficiencies,Rating Cycle 2 Number of Complaint Health Deficiencies,Rating Cycle 2 Health Deficiency Score,Rating Cycle 2 Number of Health Revisits,Rating Cycle 2 Health Revisit Score,Rating Cycle 2 Total Health Score,Rating Cycle 3 Standard Health Survey Date,Rating Cycle 3 Total Number of Health Deficiencies,Rating Cycle 3 Number of Standard Health Deficiencies,Rating Cycle 3 Number of Complaint Health Deficiencies,Rating Cycle 3 Health Deficiency Score,Rating Cycle 3 Number of Health Revisits,Rating Cycle 3 Health Revisit Score,Rating Cycle 3 Total Health Score,Total Weighted Health Survey Score,Number of Facility Reported Incidents,Number of Substantiated Complaints,Number of Fines,Total Amount of Fines in Dollars,Number of Payment Denials,Total Number of Penalties,Location,Processing Date
0,"Villas of Holly Brook Marshall, Clark","Villas of Holly Brook Marshall, Clark",#N/A (Did not find value 'Villas of Holly Broo...,#N/A (Did not find value 'Villas of Holly Broo...,#N/A (Did not find value 'Villas of Holly Broo...,#N/A (Did not find value 'Villas of Holly Broo...,#N/A (Did not find value 'Villas of Holly Broo...,#N/A (Did not find value 'Villas of Holly Broo...,#N/A (Did not find value 'Villas of Holly Broo...,#N/A (Did not find value 'Villas of Holly Broo...,#N/A (Did not find value 'Villas of Holly Broo...,#N/A (Did not find value 'Villas of Holly Broo...,#N/A (Did not find value 'Villas of Holly Broo...,#N/A (Did not find value 'Villas of Holly Broo...,#N/A (Did not find value 'Villas of Holly Broo...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"VILLAS OF HOLLY BROOK MARSHALL, CLARK",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,Abbington Rehab & Nursing Center,ABBINGTON REHABILITATION AND NURSING CENTER,ABBINGTON REHABILITATION AND NURSING CENTER,146065,31 WEST CENTRAL,ROSELLE,#N/A (Did not find value '146065' in VLOOKUP e...,#N/A (Did not find value '146065' in VLOOKUP e...,#N/A (Did not find value '146065' in VLOOKUP e...,#N/A (Did not find value '146065' in VLOOKUP e...,#N/A (Did not find value '146065' in VLOOKUP e...,#N/A (Did not find value '146065' in VLOOKUP e...,#N/A (Did not find value '146065' in VLOOKUP e...,#N/A (Did not find value '146065' in VLOOKUP e...,#N/A (Did not find value '146065' in VLOOKUP e...,,20201105.0,IL,DUPAGE,,,Uncategorized LTC,,,,,,Open,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ABBINGTON REHAB & NURSING CENTER,DUPAGE-ABBINGTON REHAB & NURSING CENTER,146065,ABBINGTON REHAB & NURSING CTR,31 WEST CENTRAL,ROSELLE,IL,60172.0,6.308945e+09,250.0,Du Page,For profit - Partnership,82.0,59.1,,Medicare and Medicaid,N,ABBINGTON REHAB & NURSING CENTER LTD,2004-08-01,N,,N,N,N,Resident,Yes,4.0,,4.0,,3.0,,3.0,,,2.0,2.0,,3.0,,,,1.29819,0.49191,0.62228,1.11419,2.41237,0.03040,1.98358,0.74007,0.37275,3.09640,1.35110,0.49025,0.64469,2.48388,2019-10-10,5,5,0,20,1,0,20,2018-11-01,10,8,2,36,1,0,36,2017-09-27,6,6,0,48,1,0,48,30.000,0.0,2.0,0.0,0.0,0.0,0.0,"31 WEST CENTRAL,ROSELLE,IL,60172",2020-10-01
2,Abbington Rehab & Nursing Center,ABBINGTON REHABILITATION AND NURSING CENTER,ABBINGTON REHABILITATION AND NURSING CENTER,146065,31 WEST CENTRAL,ROSELLE,#N/A (Did not find value '146065' in VLOOKUP e...,#N/A (Did not find value '146065' in VLOOKUP e...,#N/A (Did not find value '146065' in VLOOKUP e...,#N/A (Did not find value '146065' in VLOOKUP e...,#N/A (Did not find value '146065' in VLOOKUP e...,#N/A (Did not find value '146065' in VLOOKUP e...,#N/A (Did not find value '146065' in VLOOKUP e...,#N/A (Did not find value '146065' in VLOOKUP e...,#N/A (Did not find value '146065' in VLOOKUP e...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ABBINGTON REHAB & NURSING CENTER,,146065,ABBINGTON REHAB & NURSING CTR,31 WEST CENTRAL,ROSELLE,IL,60172.0,6.308945e+09,250.0,Du Page,For profit - Partnership,82.0,59.1,,Medicare and Medicaid,N,ABBINGTON REHAB & NURSING CENTER LTD,2004-08-01,N,,N,N,N,Resident,Yes,4.0,,4.0,,3.0,,3.0,,,2.0,2.0,,3.0,,,,1.29819,0.49191,0.62228,1.11419,2.41237,0.03040,1.98358,0.74007,0.37275,3.09640,1.35110,0.49025,0.64469,2.48388,2019-10-10,5,5,0,20,1,0,20,2018-11-01,10,8,2,36,1,0,36,2017-09-27,6,6,0,48,1,0,48,30.000,0.0,2.0,0.0,0.0,0.0,0.0,"31 WEST CENTRAL,ROSELLE,IL,60172",2020-10-01
3,Abington of Glenview,ABINGTON OF GLENVIEW,ABINGTON OF GLENVIEW,145683,3901 GLENVIEW ROAD,GLENVIEW,#N/A (Did not find value '145683' in VLOOKUP e...,#N/A (Did not find value '145683' in VLOOKUP e...,#N/A (Did not find value '145683' in VLOOKUP e...,#N/A (Did not find value '145683' in VLOOKUP e...,#N/A (Did not find value '145683' in VLOOKUP e...,#N/A (Did not find value '145683' in VLOOKUP e...,#N/A (Did not find value '145683' in VLOOKUP e...,#N/A (Did not find value '145683' in VLOOKUP e...,#N/A (Did not find value '145683' in VLOOKUP e...,,20201105.0,IL,COOK,,,Uncategorized LTC,,,,,,Closed,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ABINGTON OF GLENVIEW,COOK-ABINGTON OF GLENVIEW,145683,ABINGTON OF GLENVIEW NURSING,3901 GLENVIEW ROAD,GLENVIEW,IL,60025.0,8.477290e+09,141.0,Cook,For profit - Individual,192.0,91.7,,Medicare and Medicaid,N,ABINGTON OF GLENVIEW NURSING & REHAB CENTER LLC,1990-08-17,N,,N,N,N,Resident,Yes,4.0,,3.0,,4.0,,4.0,,4.0,,4.0,,5.0,,,,2.02934,0.46837,1.17823,1.64660,3.67594,0.19962,2.14062,0.82805,0.38931,3.35798,1.95711,0.41720,1.16876,3.49007,2019-02-21,3,3,0,12,1,0,12,2018-01-31,13,11,2,96,1,0,96,2016-12-22,4,4,0,24,1,0,24,42.000,0.0,3.0,0.0,0.0,0.0,0.0,"3901 GLENVIEW ROAD,GLENVIEW,IL,60025",2020-10-01
4,Abington of Glenview,Abington of Glenview,ABINGTON OF GLENVIEW,145683,3901 GLENVIEW ROAD,GLENVIEW,#N/A (Did not find value '145683' in VLOOKUP e...,#N/A (Did not find value '145683' in VLOOKUP e...,#N/A (Did not find value '145683' in VLOOKUP e...,#N/A (Did not find value '145683' in VLOOKUP e...,#N/A (Did not find value '145683' in VLOOKUP e...,#N/A (Did not find value '145683' in VLOOKUP e...,#N/A (Did not find value '145683' in VLOOKUP e...,#N/A (Did not find value '145683' in VLOOKUP e...,#N/A (Did not find value '145683' in VLOOKUP e...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ABINGTON OF GLENVIEW,,145683,ABINGTON OF GLENVIEW NURSING,3901 GLENVIEW ROAD,GLENVIEW,IL,60025.0,8.477290e+09,141.0,Cook,For profit - Individual,192.0,91.7,,Medicare and Medicaid,N,ABINGTON OF GLENVIEW NURSING & REHAB CENTER LLC,1990-08-17,N,,N,N,N,Resident,Yes,4.0,,3.0,,4.0,,4.0,,4.0,,4.0,,5.0,,,,2.02934,0.46837,1.17823,1.64660,3.67594,0.19962,2.14062,0.82805,0.38931,3.35798,1.95711,0.41720,1.16876,3.49007,2019-02-21,3,3,0,12,1,0,12,2018-01-31,13,11,2,96,1,0,96,2016-12-22,4,4,0,24,1,0,24,42.000,0.0,3.0,0.0,0.0,0.0,0.0,"3901 GLENVIEW ROAD,GLENVIEW,IL,60025",2020-10-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2405,Carriage Rehab and Healthcare (2),CARRIAGE REHABILITATION AND HEALTHCARE,CARRIAGE REHABILITATION AND HEALTHCARE,145891,1660 SOUTH MULFORD,ROCKFORD,#N/A (Did not find value '145891' in VLOOKUP e...,#N/A (Did not find value '145891' in VLOOKUP e...,#N/A (Did not find value '145891' in VLOOKUP e...,#N/A (Did not find value '145891' in VLOOKUP e...,#N/A (Did not find value '145891' in VLOOKUP e...,#N/A (Did not find value '145891' in VLOOKUP e...,#N/A (Did not find value '145891' in VLOOKUP e...,#N/A (Did not find value '145891' in VLOOKUP e...,#N/A (Did not find value '145891' in VLOOKUP e...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CARRIAGE REHAB AND HEALTHCARE (2),,145891,CARRIAGE REHAB & HEALTHCARE,1660 SOUTH MULFORD,ROCKFORD,IL,61108.0,8.153979e+09,991.0,Winnebago,Government - Federal,112.0,82.3,,Medicare and Medicaid,N,ROCKFORD NH LLC,1996-05-29,N,,N,N,Y,Resident,Yes,2.0,,2.0,,2.0,,3.0,,1.0,,3.0,,4.0,,,,1.45526,0.42115,0.80092,1.22207,2.67732,0.06574,2.04304,0.75382,0.35884,3.15570,1.47049,0.41207,0.86195,2.70489,2020-01-23,11,7,4,68,1,0,68,2019-03-26,18,8,10,84,1,0,84,2018-02-06,22,15,7,207,1,0,207,96.500,3.0,11.0,1.0,68381.0,0.0,1.0,"1660 SOUTH MULFORD,ROCKFORD,IL,61108",2020-10-01
2406,Cumberland Rehab and HCC,CUMBERLAND REHABILITATION AND HEALTH CC,CUMBERLAND REHABILITATION AND HEALTH CC,146113,300 NORTH MARIETTA STREET,GREENUP,#N/A (Did not find value '146113' in VLOOKUP e...,#N/A (Did not find value '146113' in VLOOKUP e...,#N/A (Did not find value '146113' in VLOOKUP e...,#N/A (Did not find value '146113' in VLOOKUP e...,#N/A (Did not find value '146113' in VLOOKUP e...,#N/A (Did not find value '146113' in VLOOKUP e...,#N/A (Did not find value '146113' in VLOOKUP e...,#N/A (Did not find value '146113' in VLOOKUP e...,#N/A (Did not find value '146113' in VLOOKUP e...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CUMBERLAND REHAB AND HCC,,146113,CUMBERLAND REHAB & HEALTH CC,300 NORTH MARIETTA STREET,GREENUP,IL,62428.0,2.179233e+09,160.0,Cumberland,For profit - Individual,54.0,35.6,,Medicare and Medicaid,N,CUMBERLAND HCO LLC,2007-06-25,N,,N,N,Y,Resident,Yes,4.0,,4.0,,4.0,,3.0,,4.0,,3.0,,4.0,,,,1.79192,0.76221,0.62051,1.38271,3.17463,0.02295,1.97107,0.71042,0.32464,3.00613,1.87679,0.79134,0.73813,3.36689,2019-12-18,6,6,0,44,1,0,44,2019-01-10,5,3,2,36,1,0,36,2017-11-08,2,2,0,12,1,0,12,36.000,1.0,0.0,0.0,0.0,0.0,0.0,"300 NORTH MARIETTA STREET,GREENUP,IL,62428",2020-10-01
2407,Flora Rehabilitation,FLORA REHABILITATION AND HEALTH CARE CENTER,FLORA REHABILITATION AND HEALTH CARE CENTER,145692,232 GIVEN STREET,FLORA,#N/A (Did not find value '145692' in VLOOKUP e...,#N/A (Did not find value '145692' in VLOOKUP e...,#N/A (Did not find value '145692' in VLOOKUP e...,#N/A (Did not find value '145692' in VLOOKUP e...,#N/A (Did not find value '145692' in VLOOKUP e...,#N/A (Did not find value '145692' in VLOOKUP e...,#N/A (Did not find value '145692' in VLOOKUP e...,#N/A (Did not find value '145692' in VLOOKUP e...,#N/A (Did not find value '145692' in VLOOKUP e...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,FLORA REHABILITATION,,145692,FLORA REHAB & HEALTH CARE CTR,232 GIVEN STREET,FLORA,IL,62839.0,6.186628e+09,120.0,Clay,For profit - Corporation,99.0,43.0,,Medicare and Medicaid,N,"PETERSEN MANAGEMENT COMPANY, LLC",1991-02-14,N,,N,N,N,Resident,Yes,3.0,,3.0,,3.0,,3.0,,3.0,,2.0,,2.0,,,,1.54307,0.77260,0.37971,1.15231,2.69538,0.01631,2.06085,0.69353,0.30166,3.05604,1.54574,0.82167,0.48610,2.81193,2019-08-30,14,13,1,84,1,0,84,2018-10-18,4,3,1,16,1,0,16,2017-09-14,11,10,1,76,1,0,76,60.000,0.0,2.0,0.0,0.0,0.0,0.0,"232 GIVEN STREET,FLORA,IL,62839",2020-10-01
2408,Grove of St. Charles,THE GROVE ST. CHARLES,THE GROVE ST. CHARLES,145433,611 ALLEN LANE,ST CHARLES,#N/A (Did not find value '145433' in VLOOKUP e...,#N/A (Did not find value '145433' in VLOOKUP e...,#N/A (Did not find value '145433' in VLOOKUP e...,#N/A (Did not find value '145433' in VLOOKUP e...,#N/A (Did not find value '145433' in VLOOKUP e...,#N/A (Did not find value '145433' in VLOOKUP e...,#N/A (Did not find value '145433' in VLOOKUP e...,#N/A (Did not find value '145433' in VLOOKUP e...,#N/A (Did not find value '145433' in VLOOKUP e...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,GROVE OF ST. CHARLES,,145433,GROVE OF ST CHARLES,611 ALLEN LANE,ST CHARLES,IL,60174.0,6.303772e+09,530.0,Kane,For profit - Partnership,120.0,96.7,,Medicare and Medicaid,N,ST CHARLES SKILLED NURSING FACILITY LLC,1981-08-31,N,,N,N,N,Both,Yes,1.0,,1.0,,2.0,,3.0,,2.0,,3.0,,4.0,,,,1.70063,0.53412,0.85962,1.39374,3.09437,0.05918,2.08744,0.74438,0.38565,3.21748,1.68187,0.52924,0.86080,3.06620,2019-06-12,31,13,19,272,1,0,272,2018-08-23,18,10,8,96,1,0,96,2017-09-21,13,9,6,64,1,0,64,178.667,1.0,29.0,1.0,7069.0,0.0,1.0,"611 ALLEN LANE,ST CHARLES,IL,60174",2020-10-01


In [21]:
cols = ['PROVNUM', 'PROVNAME', 'ADDRESS', 'CITY', 'STATE', 'ZIP', 'PHONE',
        'COUNTY_SSA', 'County_name', 'OWNERSHIP', 'BEDCERT', 'RESTOT', 'CERTIFICATION',
        'LBN',  'Overall_Rating', 'overall_rating_fn', 'INHOSP', 'INCIDENT_CNT', 'CMPLNT_CNT', 'FINE_CNT',
        'FINE_TOT', 'PAYDEN_CNT', 'TOT_PENLTY_CNT', 'FILEDATE']
        
#        'restot_fn', 'INHOSP', 'PARTICIPATION_DATE',
#        'CCRC_FACIL', 'SFFStatus', 'ABUSE_ICON', 'OldSurvey', 'CHOW_LAST_12MOS',
#        'RESFAMCOUNCIL', 'SPRINKLER_STATUS', 'Overall_Rating',
#        'overall_rating_fn', 'SURVEY_RATING', 'survey_rating_fn',
#        'Quality_Rating', 'quality_rating_fn', 'LS_Quality_Rating',
#        'LS_quality_rating_fn', 'SS_Quality_Rating', 'SS_quality_rating_fn',
#        'Staffing_Rating', 'staffing_rating_fn', 'RN_staffing_rating',
#        'RN_staffing_rating_fn', 'STAFFING_FLAG', 'PT_STAFFING_FLAG', 'AIDHRD',
#        'VOCHRD', 'RNHRD', 'TOTLICHRD', 'TOTHRD', 'PTHRD', 'CM_AIDE', 'CM_LPN',
#        'CM_RN', 'CM_TOTAL', 'ADJ_AIDE', 'ADJ_LPN', 'ADJ_RN', 'ADJ_TOTAL',
#        'WEIGHTED_ALL_CYCLES_SCORE', 'INCIDENT_CNT', 'CMPLNT_CNT', 'FINE_CNT',
#        'FINE_TOT', 'PAYDEN_CNT', 'TOT_PENLTY_CNT', 'FILEDATE']
cms_df.columns
#cms_df[cols].head()

Index(['Federal Provider Number', 'Provider Name', 'Provider Address',
       'Provider City', 'Provider State', 'Provider Zip Code',
       'Provider Phone Number', 'Provider SSA County Code',
       'Provider County Name', 'Ownership Type', 'Number of Certified Beds',
       'Average Number of Residents per Day',
       'Average Number of Residents per Day Footnote', 'Provider Type',
       'Provider Resides in Hospital', 'Legal Business Name',
       'Date First Approved to Provide Medicare and Medicaid Services',
       'Continuing Care Retirement Community', 'Special Focus Status',
       'Abuse Icon', 'Most Recent Health Inspection More Than 2 Years Ago',
       'Provider Changed Ownership in Last 12 Months',
       'With a Resident and Family Council',
       'Automatic Sprinkler Systems in All Required Areas', 'Overall Rating',
       'Overall Rating Footnote', 'Health Inspection Rating',
       'Health Inspection Rating Footnote', 'QM Rating', 'QM Rating Footnote',
       'Lon

In [22]:
# Analysis by Ownership
cols = ['PROVNUM',  
        'OWNERSHIP', 'BEDCERT', 'RESTOT',   'Overall_Rating','INCIDENT_CNT', 'CMPLNT_CNT', 'FINE_CNT',
        'FINE_TOT', 'PAYDEN_CNT', 'TOT_PENLTY_CNT']
        
print("=== MEAN ====")
display(cms_df.groupby(['OWNERSHIP'])[cols].mean())
print("=== SUM ====")
display(cms_df.groupby(['OWNERSHIP'])[cols].sum())
print("=== Count ====")
display(cms_df.groupby(['OWNERSHIP'])[cols].count())

=== MEAN ====


KeyError: 'OWNERSHIP'

In [None]:
print(len(cms_df))
len(cms_df.PROVNUM.unique())


# 2013-07-01 Data

In [None]:
cms_df_2013 = pd.read_csv('../CMS NursingHome Data/2013-07-01/ProviderInfo.csv', skipfooter=1, engine='python')
# Analysis by Ownership
# cols = ['PROVNUM',  
#         'OWNERSHIP', 'BEDCERT', 'RESTOT',   'Overall_Rating','INCIDENT_CNT', 'CMPLNT_CNT', 'FINE_CNT',
#         'FINE_TOT', 'PAYDEN_CNT', 'TOT_PENLTY_CNT']
cols=['Federal Provider Number', 
#        'Provider Name', 'Provider Address',
#        'Provider City', 'Provider State', 'Provider Zip Code',
#        'Provider SSA County Code', 'Provider County Name', 
      'Ownership Type',
       'Number of Certified Beds', 'Number of Residents in Certified Beds',
#        'Provider Type', 'Provider Resides in Hospital', 'Legal Business Name',
#        'Date First Approved to Provide Medicare and Medicaid services',
#        'Continuing Care Retirement Community', 'Special Focus Facility',
#        'Provider Changed Ownership in Last 12 Months',
#        'With a Resident and Family Council',
#        'Automatic Sprinkler Systems in All Required Areas', 'Overall Rating',
#        'Overall Rating Footnote', 'Health Inspection Rating',
#        'Health Inspection Rating Footnote', 'QM Rating', 'QM Rating Footnote',
#        'Staffing Rating', 'Staffing Rating Footnote', 'RN Staffing Rating',
#        'RN Staffing Rating Footnote', 'Reported Staffing Footnote',
#        'Physical Therapist Staffing Footnote',
#        'Reported CNA Staffing Hours per Resident per Day',
#        'Reported LPN Staffing Hours per Resident per Day',
#        'Reported RN Staffing Hours per Resident per Day',
#        'Reported Licensed Staffing Hours per Resident per Day',
#        'Reported Total Nurse Staffing Hours per Resident per Day',
#        'Reported Physical Therapist Staffing Hours per Resident Per Day',
#        'Expected CNA Staffing Hours per Resident per Day',
#        'Expected LPN Staffing Hours per Resident per Day',
#        'Expected RN Staffing Hours per Resident per Day',
#        'Expected Total Nurse Staffing Hours per Resident per Day',
#        'Adjusted CNA Staffing Hours per Resident per Day',
#        'Adjusted LPN Staffing Hours per Resident per Day',
#        'Adjusted RN Staffing Hours per Resident per Day',
#        'Adjusted Total Nurse Staffing Hours per Resident per Day',
#        'Cycle 1 Total Number of Health Deficiencies',
#        'Cycle 1 Number of Standard Health Deficiencies',
#        'Cycle 1 Number of Complaint Health Deficiencies',
#        'Cycle 1 Health Deficiency Score',
#        'Cycle 1 Standard Survey Health Date',
#        'Cycle 1 Number of Health Revisits', 'Cycle 1 Health Revisit Score',
#        'Cycle 1 Total Health Score',
#        'Cycle 2 Total Number of Health Deficiencies',
#        'Cycle 2 Number of Standard Health Deficiencies',
#        'Cycle 2 Number of Complaint Health Deficiencies',
#        'Cycle 2 Health Deficiency Score',
#        'Cycle 2 Standard Health Survey Date',
#        'Cycle 2 Number of Health Revisits', 'Cycle 2 Health Revisit Score',
#        'Cycle 2 Total Health Score',
#        'Cycle 3 Total Number of Health Deficiencies',
#        'Cycle 3 Number of Standard Health Deficiencies',
#        'Cycle 3 Number of Complaint Health Deficiencies',
#        'Cycle 3 Health Deficiency Score',
#        'Cycle 3 Standard Health Survey Date',
#        'Cycle 3 Number of Health Revisits', 'Cycle 3 Health Revisit Score',
#        'Cycle 3 Total Health Score', 'Total Weighted Health Survey Score',
       'Number of Facility Reported Incidents',
       'Number of Substantiated Complaints', 'Number of Fines',
       'Total Amount of Fines in Dollars', 'Number of Payment Denials',
       'Total Number of Penalties', 'Location', 'Processing Date']
print("=== MEAN ====")
display(cms_df_2013.groupby(['Ownership Type'])[cols].mean())
print("=== SUM ====")
display(cms_df_2013.groupby(['Ownership Type'])[cols].sum())
print("=== Count ====")
display(cms_df_2013.groupby(['Ownership Type'])[cols].count())

In [None]:
cms_df_2013.columns

# Get Counts, Sums and Means by Ownership type and State
 - to see if any State way better/worse based on Ownership Type

In [None]:
# Analysis by Ownership
cols = ['PROVNUM',  
        'OWNERSHIP', 'BEDCERT', 'RESTOT', 'INCIDENT_CNT', 'CMPLNT_CNT', 'FINE_CNT',
        'FINE_TOT', 'PAYDEN_CNT', 'TOT_PENLTY_CNT']
        
print("=== MEAN ====")
cms_df.groupby(['STATE', 'OWNERSHIP'])[cols].mean().to_csv('Reporting_data/CMS_Stats_2020-11-27_means.csv')
display(cms_df.groupby(['STATE', 'OWNERSHIP'])[cols].mean())
print("=== SUM ====")
cms_df.groupby(['STATE', 'OWNERSHIP'])[cols].sum().to_csv('Reporting_data/CMS_Stats_2020-11-27_sums.csv')
display(cms_df.groupby(['STATE', 'OWNERSHIP'])[cols].sum())
print("=== Count ====")
cms_df.groupby(['STATE', 'OWNERSHIP'])[cols].count().to_csv('Reporting_data/CMS_Stats_2020-11-27_counts.csv')
display(cms_df.groupby(['STATE', 'OWNERSHIP'])[cols].count())

In [None]:
cms_df.columns

# Play Area

In [None]:
# ToDO - Check Normalized names in spreadsheet, seems not all names Normailized ex: "ACCOLADE HEALTHCARE" and "Accolade Healthcare"
display(no_matches['Facility Names'][0:10])
display(no_matches['Facility Names Normalized'][0:10])

In [None]:
norm_state_df.head()

In [None]:
# Create DFs for:
#   1 - no_matches: Lists of "Factility Names" and "Facility Names Normalized" that do not have a matching CMS entry in the normalized-cms sheet
#   2 - dupNames: Dictionary of "Federal Provider Numbers" that map to list where there are more than one "Facility Name"
#   3 - CMSID2FacName: Dictionary mapping "Facility Names" to "Federal Provider Numbers"
#   4 - FacName2CMSId: Dictionary mapping "Federal Provider Numbers" to "Facility Names"

FacName2CMSId = {}
CMSID2FacName = {} # Initialize No Match to empty list so can always use extend for this key
no_matches = {'Facility Names': [],
              'Facility Names Normalized': []} # List of "Facility Names" without matches in CMS data feed
dupNames = {} # Dictionary of "Federal Provider Numbers" that have more than one "Facility Name" 
              #  (ex: 146182': ['Alden Courts of Waterford', 'Alden of Waterford'],
              #      '146186': ['Alden Estates Courts of Huntley',
              #                 'Alden Estates-Courts of Huntley'],
              #       '145872': ['Alden Long Grove Rehab & Health',
              #                  'Alden of Long Grove Rehab and HCC'], )

# For Each Federal Provder Number (CMS term) in Provide ID - CMS (CTP term)
for Fed_Prov_Num in norm_state_df['Provider ID - CMS'].unique():
    # Since the gsheets api appends comments to the cell value (which for the record can be a PAIN and POTENTIAL PROBLEM)
    #    We change the value to a more understandable "No Match"
    if Fed_Prov_Num[0:4] == "#N/A":
        no_matches['Facility Names'].extend(list(norm_state_df[norm_state_df['Provider ID - CMS'] == Fed_Prov_Num]['Facility Name'].unique()))
        no_matches['Facility Names Normalized'].extend(list(norm_state_df[norm_state_df['Provider ID - CMS'] == Fed_Prov_Num]['Facility Name Normalized'].unique()))
    else:
        CMSID2FacName[Fed_Prov_Num] = list(norm_state_df[norm_state_df['Provider ID - CMS'] == Fed_Prov_Num]['Facility Name'].unique())
        if len(CMSID2FacName[Fed_Prov_Num]) > 1:
            dupNames[Fed_Prov_Num] = CMSID2FacName[Fed_Prov_Num]
            
        for facilityName in CMSID2FacName[Fed_Prov_Num]:
            FacName2CMSId[facilityName] = Fed_Prov_Num
                              
no_matches['Facility Names'] = list(dict.fromkeys(no_matches['Facility Names']))
no_matches['Facility Names Normalized'] = list(dict.fromkeys(no_matches['Facility Names Normalized']))

In [None]:
norm_state_df[['County', 'Facility Name', 'Provider ID - CMS']].dropna()

In [None]:
CMSID2FacName