In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

#### This notebook provides initial exploratory analysis of the HPSA designations
Info about HPSAs:
    -https://bhw.hrsa.gov/workforce-shortage-areas/shortage-designation
    -https://bhw.hrsa.gov/workforce-shortage-areas/shortage-designation/scoring
    -https://bhw.hrsa.gov/workforce-shortage-areas/shortage-designation/modernization-project

In [2]:
tn_hpsa = pd.read_excel('../capstone_data/HRSA_HPSA_details.xlsx')
tn_hpsa

Unnamed: 0,HPSA Name,HPSA ID,Designation Type,HPSA Discipline Class,HPSA Score,State,HPSA Status,HPSA Designation Date,HPSA Designation Last Update,Withdrawn Date,...,% of Population Below 100% Poverty,HPSA Formal Ratio,Address,City,ZIP,HHS Region,Rural Status,Longitude,Latitude,County
0,Overton,147133,Geographic HPSA,Primary Care,0,TN,Withdrawn,12/17/1980,01/21/1994,01/21/1994,...,17.9,3172:1,,,,Region 4 ...,Rural,,,"Overton County, TN"
1,"WTPC, LLC",1471038172,Rural Health Clinic,Primary Care,17,TN,Designated,09/30/2021,09/30/2021,,...,,,557 W Park Pl,Henderson,38340-2027,Region 4 ...,Rural,-88.661904,35.442291,"Chester County, TN"
2,Decatur,147039,Geographic HPSA,Primary Care,0,TN,Withdrawn,07/19/1978,09/13/2010,09/13/2010,...,,3651:1,,,,Region 4 ...,Rural,,,"Decatur County, TN"
3,"Health, Tennessee Dept Of",747999477U,Federally Qualified Health Center,Mental Health,19,TN,Designated,08/01/2015,09/11/2021,,...,,,710 James Robertson Pkwy 64 Andrew Johnson Tower,Nashville,37243-0001,Region 4 ...,Non-Rural,-86.786927,36.167605,"Davidson County, TN"
4,TENNOVA URGENT CARE - SHELBYVILLE,747999477F,Rural Health Clinic,Mental Health,15,TN,Withdrawn,10/25/2010,02/06/2020,02/06/2020,...,,,1612 N Main St STE A,Shelbyville,37160-2392,Region 4 ...,Rural,-86.456820,35.506176,"Bedford County, TN"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,LI-Loudon County,1473386147,HPSA Population,Primary Care,12,TN,Designated,03/18/2022,03/18/2022,,...,11.2,4163:1,,,,Region 4 ...,Non-Rural,,,"Loudon County, TN"
607,LI-Putnam/White Counties,1472793573,HPSA Population,Primary Care,16,TN,Proposed For Withdrawal,03/22/2012,09/10/2021,,...,24.3,5114:1,,,,Region 4 ...,Rural,,,"Putnam County, TN | White County, TN"
608,LI-McNairy County,1472279043,HPSA Population,Primary Care,17,TN,Designated,04/05/2022,04/05/2022,,...,18.8,17452:1,,,,Region 4 ...,Rural,,,"McNairy County, TN"
609,LI-Sevier County,1471717306,HPSA Population,Primary Care,14,TN,Designated,02/28/2014,09/10/2021,,...,13.6,6807:1,,,,Region 4 ...,Rural,,,"Sevier County, TN"


In [3]:
#creating dataframe of all active HPSA designations in TN
tn_hpsa_active = tn_hpsa.loc[(tn_hpsa['HPSA Status'] != 'Withdrawn')]
tn_hpsa_active.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 359 entries, 1 to 610
Data columns (total 26 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   HPSA Name                           359 non-null    object 
 1   HPSA ID                             359 non-null    object 
 2   Designation Type                    359 non-null    object 
 3   HPSA Discipline Class               359 non-null    object 
 4   HPSA Score                          359 non-null    int64  
 5   State                               359 non-null    object 
 6   HPSA Status                         359 non-null    object 
 7   HPSA Designation Date               359 non-null    object 
 8   HPSA Designation Last Update        359 non-null    object 
 9   Withdrawn Date                      0 non-null      object 
 10  Metropolitan Indicator              359 non-null    object 
 11  HPSA Population Type                219 non-n

In [4]:
#creating a column to identify those rows that contain HPSA designations that cover multiple counties
tn_hpsa_active.loc[tn_hpsa_active['County'].str.contains(pat='\|'), ['Multi-County']] = 'Yes'
tn_hpsa_active.loc[~tn_hpsa_active['County'].str.contains(pat='\|'), ['Multi-County']] = 'No'

#Splitting out the multi-county designations so that each county is represented singley in its own row 
tn_hpsa_active['County'] = tn_hpsa_active['County'].str.split('|')
tn_hpsa_active = tn_hpsa_active.apply(pd.Series.explode)
tn_hpsa_active = tn_hpsa_active.reset_index(drop=True)
tn_hpsa_active['County'] = tn_hpsa_active['County'].str.extract('(.+), TN')
tn_hpsa_active['County'] = tn_hpsa_active['County'].str.strip()

#dropping columns that aren't needed for this analysis
tn_hpsa_active = tn_hpsa_active.drop(columns = ['HPSA Degree of Shortage', 'HHS Region'])

#reorganizing columns to allow most pertinent to be readily visible in jupyter notebook
tn_hpsa_active = tn_hpsa_active[['HPSA Name', 'HPSA ID', 'Designation Type', 'HPSA Discipline Class', 'HPSA Population Type',
                   'HPSA Score', 'HPSA Status', 'HPSA Designation Date', 'HPSA Designation Last Update', 'Withdrawn Date', 
                   'Longitude', 'Latitude','Address', 'ZIP', 'City', 'State', 'County', 
                   'HPSA FTE', '# of FTE Short', 'HPSA Formal Ratio', 'HPSA Designation Population', 
                   '% of Population Below 100% Poverty', 'Metropolitan Indicator', 'Rural Status']]

tn_hpsa_active

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tn_hpsa_active.loc[tn_hpsa_active['County'].str.contains(pat='\|'), ['Multi-County']] = 'Yes'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tn_hpsa_active['County'] = tn_hpsa_active['County'].str.split('|')


Unnamed: 0,HPSA Name,HPSA ID,Designation Type,HPSA Discipline Class,HPSA Population Type,HPSA Score,HPSA Status,HPSA Designation Date,HPSA Designation Last Update,Withdrawn Date,...,City,State,County,HPSA FTE,# of FTE Short,HPSA Formal Ratio,HPSA Designation Population,% of Population Below 100% Poverty,Metropolitan Indicator,Rural Status
0,"WTPC, LLC",1471038172,Rural Health Clinic,Primary Care,,17,Designated,09/30/2021,09/30/2021,,...,Henderson,TN,Chester County,,,,59090.0,,Unknown,Rural
1,"Health, Tennessee Dept Of",747999477U,Federally Qualified Health Center,Mental Health,,19,Designated,08/01/2015,09/11/2021,,...,Nashville,TN,Davidson County,,,,17229.0,,Unknown,Non-Rural
2,Chota Community Health Services,7479994766,Federally Qualified Health Center,Mental Health,,20,Designated,09/30/2005,09/11/2021,,...,Vonore,TN,Monroe County,,,,18944.0,,Non-Metropolitan,Rural
3,"Dayspring Health, Inc.",7479994750,Federally Qualified Health Center,Mental Health,,21,Designated,11/26/2003,09/11/2021,,...,Jellico,TN,Campbell County,,,,24967.0,,Non-Metropolitan,Rural
4,East Tennessee State University,7479994742,Federally Qualified Health Center,Mental Health,,19,Designated,11/26/2003,09/11/2021,,...,Johnson City,TN,Washington County,,,,61116.0,,Non-Metropolitan,Non-Rural
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
453,LI-Putnam/White Counties,1472793573,HPSA Population,Primary Care,Low Income Population HPSA,16,Proposed For Withdrawal,03/22/2012,09/10/2021,,...,,TN,White County,9.0721,6.3879,5114:1,46395.0,24.3,Unknown,Rural
454,LI-McNairy County,1472279043,HPSA Population,Primary Care,Low Income Population HPSA,17,Designated,04/05/2022,04/05/2022,,...,,TN,McNairy County,0.6700,3.2300,17452:1,11693.0,18.8,Unknown,Rural
455,LI-Sevier County,1471717306,HPSA Population,Primary Care,Low Income Population HPSA,14,Designated,02/28/2014,09/10/2021,,...,,TN,Sevier County,5.4700,6.9400,6807:1,37235.0,13.6,Unknown,Rural
456,LI-Macon &amp; Trousdale Counties,1471425309,HPSA Population,Primary Care,Low Income Population HPSA,16,Designated,04/05/2022,04/05/2022,,...,,TN,Macon County,1.3900,2.6700,8755:1,12169.0,15.2,Unknown,Rural


In [5]:
tn_hpsa_active['Rural Status'].value_counts()

Rural              274
Non-Rural          107
Partially Rural     77
Name: Rural Status, dtype: int64

In [6]:
#In the course of exploding the multi-county entries, some rural statuses are incorrect
#Bringing in a file and creating a dictionary to correct this
tn_rural_counties =  pd.read_csv('../capstone_data/tn_county_pop_rural.csv')
county_rural_dict = tn_rural_counties.set_index('County')['HRSA Rural Status'].to_dict()
county_rural_dict

{'Anderson County': 'Non-Rural',
 'Bedford County': 'Rural',
 'Benton County': 'Rural',
 'Bledsoe County': 'Rural',
 'Blount County': 'Partially Rural',
 'Bradley County': 'Non-Rural',
 'Campbell County': 'Rural',
 'Cannon County': 'Rural',
 'Carroll County': 'Rural',
 'Carter County': 'Non-Rural',
 'Cheatham County': 'Rural',
 'Chester County': 'Rural',
 'Claiborne County': 'Rural',
 'Clay County': 'Rural',
 'Cocke County': 'Rural',
 'Coffee County': 'Rural',
 'Crockett County': 'Rural',
 'Cumberland County': 'Rural',
 'Davidson County': 'Non-Rural',
 'Decatur County': 'Rural',
 'DeKalb County': 'Rural',
 'Dickson County': 'Rural',
 'Dyer County': 'Rural',
 'Fayette County': 'Non-Rural',
 'Fentress County': 'Rural',
 'Franklin County': 'Rural',
 'Gibson County': 'Rural',
 'Giles County': 'Rural',
 'Grainger County': 'Rural',
 'Greene County': 'Rural',
 'Grundy County': 'Rural',
 'Hamblen County': 'Non-Rural',
 'Hamilton County': 'Non-Rural',
 'Hancock County': 'Rural',
 'Hardeman Coun

In [12]:
tn_hpsa_active.loc[tn_hpsa_active['County'].isna()]

Unnamed: 0,HPSA Name,HPSA ID,Designation Type,HPSA Discipline Class,HPSA Population Type,HPSA Score,HPSA Status,HPSA Designation Date,HPSA Designation Last Update,Withdrawn Date,...,City,State,County,HPSA FTE,# of FTE Short,HPSA Formal Ratio,HPSA Designation Population,% of Population Below 100% Poverty,Metropolitan Indicator,Rural Status


In [7]:
tn_hpsa_active['Rural Status'] = tn_hpsa_active['County'].map(county_rural_dict)
tn_hpsa_active['Rural Status'].value_counts()

Rural              305
Non-Rural          128
Partially Rural     25
Name: Rural Status, dtype: int64

In [9]:
#the duplicated statement is accounting for those multi-county HPSAs that I exploded out into multiple rows
tn_hpsa_active.loc[~tn_hpsa_active.duplicated(['HPSA ID'])]['Rural Status'].value_counts()

Rural              230
Non-Rural          115
Partially Rural     14
Name: Rural Status, dtype: int64

In [10]:
#tn_hpsa_active.to_csv('../capstone_data/tn_hpsa.csv')

In [None]:
tn_hpsa_active.loc[~tn_hpsa_active.duplicated(['HPSA ID'])]['Rural Status'].value_counts(normalize=True)

In [None]:
tn_hpsa_active.loc[~tn_hpsa_active.duplicated(['HPSA ID'])]['HPSA Discipline Class'].value_counts()

In [None]:
tn_hpsa_active.loc[~tn_hpsa_active.duplicated(['HPSA ID'])]['HPSA Discipline Class'].value_counts(normalize=True)

In [None]:
tn_hpsa_active.loc[~tn_hpsa_active.duplicated(['HPSA ID'])]['Designation Type'].value_counts()

In [None]:
tn_hpsa_active.loc[~tn_hpsa_active.duplicated(['HPSA ID'])]['Designation Type'].value_counts(normalize=True)

### The next section hones in on those HPSA designations that are currently active and apply to rural and partially-rural areas

In [None]:
tn_rur_hpsa = tn_hpsa.loc[(tn_hpsa['HPSA Status'] != 'Withdrawn')
                         & (tn_hpsa['Rural Status'] != 'Non-Rural')].reset_index(drop=True)
tn_rur_hpsa.info()

In [None]:
tn_rur_hpsa.loc[~tn_rur_hpsa.duplicated(['HPSA ID'])]['Rural Status'].value_counts()

In [None]:
tn_rur_hpsa.loc[~tn_rur_hpsa.duplicated(['HPSA ID'])]['Rural Status'].value_counts(normalize=True)

In [None]:
tn_rur_hpsa.loc[~tn_rur_hpsa.duplicated(['HPSA ID'])]['Designation Type'].value_counts()

In [None]:
tn_rur_hpsa.loc[~tn_rur_hpsa.duplicated(['HPSA ID'])]['Designation Type'].value_counts(normalize=True)

In [None]:
tn_rur_hpsa.loc[~tn_rur_hpsa.duplicated(['HPSA ID'])]['HPSA Status'].value_counts()

In [None]:
tn_rur_hpsa.loc[~tn_rur_hpsa.duplicated(['HPSA ID'])]['HPSA Discipline Class'].value_counts()

In [None]:
tn_rur_hpsa.loc[~tn_rur_hpsa.duplicated(['HPSA ID'])]['HPSA Discipline Class'].value_counts(normalize=True)

In [None]:
#splitting out the different types of HPSA designations (facility, geographic, population)
tn_rur_hpsa_fac = tn_rur_hpsa.loc[tn_rur_hpsa['Designation Type'].isin(['Federally Qualified Health Center', 
                                                                       'Rural Health Clinic', 
                                                                        'Federally Qualified Health Center Look A Like'])]
tn_rur_hpsa_geo = tn_rur_hpsa.loc[tn_rur_hpsa['Designation Type'].isin(['Geographic HPSA', 
                                                                       'High Needs Geographic HPSA'])]
tn_rur_hpsa_pop = tn_rur_hpsa.loc[tn_rur_hpsa['Designation Type'] =='HPSA Population']

#### Facility-based HPSA info

In [None]:
tn_rur_hpsa_fac.loc[~tn_rur_hpsa_fac.duplicated(['HPSA ID'])]['HPSA Discipline Class'].value_counts()

In [None]:
tn_rur_hpsa_fac.loc[~tn_rur_hpsa_fac.duplicated(['HPSA ID'])]['HPSA Discipline Class'].value_counts(normalize=True)

In [None]:
tn_rur_hpsa_fac[['County', 'HPSA Discipline Class']].value_counts()

In [None]:
tn_rur_hpsa_fac[['HPSA Score', 'County']].value_counts().sort_index(ascending=False)

In [None]:
tn_rur_hpsa_fac.loc[~tn_rur_hpsa_fac.duplicated(['HPSA ID'])][['HPSA Discipline Class', 
                                                               'HPSA Score']].value_counts().sort_index(ascending=False)

#### Geographically-based HPSA info

In [None]:
tn_rur_hpsa_geo.loc[~tn_rur_hpsa_geo.duplicated(['HPSA ID'])]['HPSA Discipline Class'].value_counts()

In [None]:
tn_rur_hpsa_geo[['County']].value_counts()

In [None]:
tn_rur_hpsa_geo.loc[~tn_rur_hpsa_geo.duplicated(['HPSA ID'])][['HPSA Score']].value_counts().sort_index(ascending=False)

In [None]:
tn_rur_hpsa_geo.loc[~tn_rur_hpsa_geo.duplicated(['HPSA ID'])][['HPSA Discipline Class', 
                                                               'HPSA Score']].value_counts().sort_index(ascending=False)

#### Population-based HPSA info

In [None]:
tn_rur_hpsa_pop.loc[~tn_rur_hpsa_pop.duplicated(['HPSA ID'])]['HPSA Discipline Class'].value_counts()

In [None]:
tn_rur_hpsa_pop[['County', 'HPSA Discipline Class']].value_counts()

In [None]:
tn_rur_hpsa_pop.loc[~tn_rur_hpsa_pop.duplicated(['HPSA ID'])]['HPSA Population Type'].value_counts()

In [None]:
tn_rur_hpsa_pop.loc[~tn_rur_hpsa_pop.duplicated(['HPSA ID'])][['HPSA Score']].value_counts().sort_index(ascending=False)

In [None]:
tn_rur_hpsa_pop.loc[~tn_rur_hpsa_pop.duplicated(['HPSA ID'])][['HPSA Discipline Class', 
                                                               'HPSA Score']].value_counts().sort_index(ascending=False)

#### Exploring HPSA designations that have been proposed for withdrawal

In [None]:
tn_hpsa_prop_wi = tn_hpsa.loc[tn_hpsa['HPSA Status'] == 'Proposed For Withdrawal']

In [None]:
tn_hpsa_prop_wi.loc[~tn_hpsa_prop_wi.duplicated(['HPSA ID'])]['Designation Type'].value_counts()

In [None]:
tn_hpsa_prop_wi.loc[~tn_hpsa_prop_wi.duplicated(['HPSA ID'])]['Designation Type'].value_counts(normalize=True)

In [None]:
tn_hpsa_prop_wi.loc[~tn_hpsa_prop_wi.duplicated(['HPSA ID'])]['Rural Status'].value_counts()

In [None]:
tn_hpsa_prop_wi.loc[~tn_hpsa_prop_wi.duplicated(['HPSA ID'])]['Rural Status'].value_counts(normalize=True)

In [None]:
tn_rur_hpsa_prop_wi_fac = tn_hpsa_prop_wi.loc[(tn_hpsa_prop_wi['Designation Type'].isin(['State Mental Hospital']))
                                     & (tn_hpsa_prop_wi['Rural Status'] != 'Non-Rural')]

tn_rur_hpsa_prop_wi_geo = tn_hpsa_prop_wi.loc[(tn_hpsa_prop_wi['Designation Type'].isin(['Geographic HPSA', 
                                                                       'High Needs Geographic HPSA']))
                                     & (tn_hpsa_prop_wi['Rural Status'] != 'Non-Rural')]

tn_rur_hpsa_prop_wi_pop = tn_hpsa_prop_wi.loc[tn_hpsa_prop_wi['Designation Type'].isin(['HPSA Population'])
                                 & (tn_hpsa_prop_wi['Rural Status'] != 'Non-Rural')]

In [None]:
tn_rur_hpsa_prop_wi_fac

In [None]:
tn_rur_hpsa_prop_wi_geo[~tn_rur_hpsa_prop_wi_geo.duplicated(['HPSA ID'])]['HPSA Discipline Class'].value_counts()

In [None]:
tn_rur_hpsa_prop_wi_pop[~tn_rur_hpsa_prop_wi_pop.duplicated(['HPSA ID'])]['HPSA Discipline Class'].value_counts()

In [None]:
tn_rur_hpsa_prop_wi_pop['County'].value_counts()

In [None]:
tn_rur_hpsa_prop_wi_pop[~tn_rur_hpsa_prop_wi_pop.duplicated(['HPSA ID'])]['HPSA Population Type'].value_counts()

In [None]:
tn_rur_hpsa['County'] = tn_rur_hpsa['County'].str.replace(', TN', '')

In [None]:
tn_rur_hpsa.loc[tn_rur_hpsa['County'] == 'Trousdale County']

In [None]:
#tn_rur_hpsa.to_csv('../capstone_data/tn_rur_hpsa.csv')

In [None]:
tn_hpsa.sort_values('County')

In [None]:
tn_rur_hpsa.sort_values('County')