In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

#### Bringing in data from the Census Bureau's Small Area Health Insurance Information Estimates

https://www.census.gov/programs-surveys/sahie.html

###### Notes on values in table:  

    Variable      Format      Description     
       year            4      Year of Estimate
       version         8      Release Version 
                                 Blank   : YEAR other than 2013
                                 Original: 2013 only
                                 Updated : 2013 only
       statefips       2      Unique FIPS code for each state                   
       countyfips      3      Unique FIPS code for each county within a state   
       geocat          2      Geography category             
                                40 - State geographic identifier 
                                50 - County geographic identifier
       agecat          1      Age category        
                                0 - Under 65 years
                                1 - 18 to 64 years
                                2 - 40 to 64 years
                                3 - 50 to 64 years
                                4 - Under 19 years
                                5 - 21 to 64 years
       racecat         1      Race category  
                                0 - All races
                                Only state estimates have racecat=1, 2, and 3 values
                                1 - White alone (not Hispanic)
                                2 - Black alone (not Hispanic)
                                3 - Hispanic (any race)      
       sexcat          1      Sex category    
                                0 - Both sexes
                                1 - Male      
                                2 - Female    
       iprcat          1      Income category 
                                0 - All income levels          
                                1 - At or below 200% of poverty
                                2 - At or below 250% of poverty
                                3 - At or below 138% of poverty
                                4 - At or below 400% of poverty
                                5 - Between 138% - 400%  of poverty
      NIPR             8      Number in demographic group for <income category>
         nipr_moe      8           MOE  for NIPR
      NUI              8      Number uninsured  
         nui_moe       8           MOE  for NUI 
      NIC              8      Number insured    
         nic_moe       8           MOE  for NIC 
      PCTUI            5.1    Percent uninsured in demographic group for <income category>
         pctui_moe     5.1         MOE  for PCTUI                                 
      PCTIC            5.1    Percent insured in demographic group for <income category>  
         pctic_moe     5.1         MOE  for PCTIC                                 
      PCTELIG          5.1    Percent uninsured in demographic group for all income levels
         pctelig_moe   5.1         MOE  for PCTELIG                                
      PCTLIIC          5.1    Percent insured in demographic group for all income levels  
         pctliic_moe   5.1         MOE  for PCTLIIC                                
      state_name       70     State Name
      county_name      45     County Name

In [2]:
sahie = pd.read_csv('../capstone_data/sahie_2019.csv', header=79)

sahie_tn = sahie.loc[sahie['statefips'] == 47]

In [3]:
sahie_tn = sahie_tn.drop(columns = ['year', 'version'])

In [22]:
sahie_tn = sahie_tn.reset_index(drop=True)
sahie_tn.tail(2)

Unnamed: 0,statefips,countyfips,geocat,agecat,racecat,sexcat,iprcat,NIPR,nipr_moe,NUI,...,PCTUI,pctui_moe,PCTIC,pctic_moe,PCTELIG,pctelig_moe,PCTLIIC,pctliic_moe,state_name,county_name
9484,47,189,50,5,0,2,4,21026.0,841.0,3607.0,...,17.2,2.9,82.8,2.9,8.5,1.5,41.2,2.2,Tennessee ...,Wilson County
9485,47,189,50,5,0,2,5,15917.0,752.0,2130.0,...,13.4,2.2,86.6,2.2,5.0,0.9,32.6,1.8,Tennessee ...,Wilson County


In [6]:
#sahie_tn.to_csv('../capstone_data/sahie_tn.csv')

In [38]:
hrsa_poverty = pd.read_excel('../capstone_data/HRSA_poverty_details.xlsx')

hrsa_poverty = hrsa_poverty.drop(columns = ['State County FIPS Code', 'Geography Type'])

hrsa_poverty.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95 entries, 0 to 94
Data columns (total 8 columns):
 #   Column                                                   Non-Null Count  Dtype  
---  ------                                                   --------------  -----  
 0   County                                                   95 non-null     object 
 1   State                                                    95 non-null     object 
 2   # of Families with Income Below 1.00 Times the U.S. FPL  95 non-null     int64  
 3   # of Families with Income Below 1.50 Times the U.S. FPL  95 non-null     int64  
 4   # of Families with Income Below 2.00 Times the U.S. FPL  95 non-null     int64  
 5   % of Families with Income Below 1.00 Times the U.S. FPL  95 non-null     float64
 6   % of Families with Income Below 1.50 Times the U.S. FPL  95 non-null     float64
 7   % of Families with Income Below 2.00 Times the U.S. FPL  95 non-null     float64
dtypes: float64(3), int64(3), object(2

In [40]:
#hrsa_poverty.to_csv('../capstone_data/tn_poverty.csv')

In [39]:
unemployment = pd.read_excel('../capstone_data/unemployment_by_county.xlsx', header=4)

unemployment_tn = unemployment.loc[unemployment['State FIPSCode'] == 47]
unemployment_tn['County Name/State Abbreviation'] = unemployment_tn['County Name/State Abbreviation'].str.replace(', TN', '')
unemployment_tn['State'] = 'TN'
unemployment_tn = unemployment_tn.rename(columns = {'County Name/State Abbreviation' : 'County'})

unemployment_tn = unemployment_tn.drop(columns = ['LAUS Code', 'State FIPSCode', 'County FIPS Code', 'Period'])

unemployment_tn.tail(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unemployment_tn['County Name/State Abbreviation'] = unemployment_tn['County Name/State Abbreviation'].str.replace(', TN', '')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unemployment_tn['State'] = 'TN'


Unnamed: 0,County,Labor Force,Employed,Unemployed,Unemployment Rate (%),State
44382,Williamson County,140568.0,136579.0,3989.0,2.8,TN
44383,Wilson County,83532.0,80836.0,2696.0,3.2,TN


In [36]:
unemployment_tn.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1330 entries, 2429 to 44383
Data columns (total 9 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   LAUS Code                       1330 non-null   object 
 1   State FIPSCode                  1330 non-null   float64
 2   County FIPS Code                1330 non-null   float64
 3   County Name/State Abbreviation  1330 non-null   object 
 4   Period                          1330 non-null   object 
 5   Labor Force                     1330 non-null   float64
 6   Employed                        1330 non-null   float64
 7   Unemployed                      1330 non-null   float64
 8   Unemployment Rate (%)           1330 non-null   object 
dtypes: float64(5), object(4)
memory usage: 103.9+ KB
