###### Imports and Settings

In [1]:
import pandas as pd
import numpy as np
import requests

# Veteran Status by Age for the GNRC Senior Centers

This data is not available on MySidewalk, where these senior center profiles are hosted. This notebook contains the API pulls and the data cleaning process necessary to include this as outside data in these profiles.

This data will be collected at the county level for the following counties in the State of TN:  
+ Montgomery (Ajax Turner Senior Citizens Center)  
+ Cheatham (Ashland City Senior Citizens Center)  
+ Robertson (Byrum-Porter Senior Center)  
+ Dickson (Dickson Senior Citizens Center)  
+ Williamson (FiftyForward College Grove Senior Citizens Center)  
+ Davidson (FiftyForward Donelson Senior Citizens Center)  
+ Davidson (FiftyForward Knowles Senior Citizens Center)  
+ Sumner (Gallatin Senior Center)  
+ Sumner (Senior Citizens of Hendersonville)  
+ Houston (J.D. Lewis Senior Center)  
+ Rutherford (LaVerge Senior Center)  
+ Wilson (Mt. Juliet Senior Center)  
+ Robertson (Robertson County Senior Center)  
+ Rutherford (St. Clair Street Senior Center)  
+ Stewart (Stewart County Senior Citizens Center)  
+ Trousdale (Trousdale County/Hartsville Senior Center)  
+ Humphreys (Torrey Johnson Senior Center)  

In [2]:
GNRC = ['161', #Stewart
       '125', #Montgomery
       '083', #Houston
       '085', #Humphreys
       '043', #Dickson
       '021', #Cheatham
       '147', #Robertson
       '165', #Sumner
       '037', #Davidson
       '189', #Wilson
       '169', #Trousdale
       '187', #Williamson
       '149'] #Rutherford

In [3]:
api_key = '24fc7d81b74510d599f702dbd408fb18e1466d81'

In [4]:
#veteran status 60+ over 18 civilian population
data_appended = []
for i in GNRC:
    url_str= 'https://api.census.gov/data/2020/acs/acs5/subject?key='+api_key
    predicates= {}
    get_vars= ["NAME", 'S0102_C02_041E'] #the only variable is total population
    predicates["get"]= ",". join(get_vars)
    predicates["for"]= "county:{}".format(i)
    predicates["in"]= "state:47"
    data= requests.get(url_str, params= predicates)
    col_names = ['County', 'vet60+', 'StateFIPS', 'CountyFIPS']
    data=pd.DataFrame(columns=col_names, data=data.json()[1:])
    data['Region'] = 'GNRC'
    data_appended.append(data)
    
data_appended = pd.concat(data_appended)
data = data_appended.reset_index(drop = True)
print('Your API call is complete.')

Your API call is complete.


In [5]:
data.head(13)

Unnamed: 0,County,vet60+,StateFIPS,CountyFIPS,Region
0,"Stewart County, Tennessee",,47,161,GNRC
1,"Montgomery County, Tennessee",28651.0,47,125,GNRC
2,"Houston County, Tennessee",,47,83,GNRC
3,"Humphreys County, Tennessee",,47,85,GNRC
4,"Dickson County, Tennessee",12414.0,47,43,GNRC
5,"Cheatham County, Tennessee",8788.0,47,21,GNRC
6,"Robertson County, Tennessee",14888.0,47,147,GNRC
7,"Sumner County, Tennessee",41846.0,47,165,GNRC
8,"Davidson County, Tennessee",120691.0,47,37,GNRC
9,"Wilson County, Tennessee",31003.0,47,189,GNRC


We can see that the subject tables aren't available for Stewart, Houston, Humphreys, or Trousdale Counties. We'll have to do the more granular age breakdowns from the ACS tables. I'd like to get # 65+, # all, percent of veterans who are seniors, and percent of senior population who are veterans. For this I'll need total civilian veteran population, total senior population from this data series, and total # 65+ by whatever categories this series has it.

In [6]:
data_appended = []
for i in GNRC:
    url_str= 'https://api.census.gov/data/2020/acs/acs5?key='+api_key
    predicates= {}
    get_vars= ["NAME", "GEO_ID",'B21001_016E','B21001_017E','B21001_019E','B21001_020E','B21001_034E','B21001_035E','B21001_037E','B21001_038E','B21001_002E']
    predicates["get"]= ",". join(get_vars)
    predicates["for"]= "county:{}".format(i)
    predicates["in"]= "state:47"
    data= requests.get(url_str, params= predicates)
    col_names = ['County', 'GEOID','m65_74','mv65_74','m75+','mv75+','f65_74','fv65_74','f75+','fv75+','totalv','StateFIPS','CountyFIPS']
    data=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype = str)
    data['Region'] = 'GNRC'
    data_appended.append(data)
    
data_appended = pd.concat(data_appended)
data = data_appended.reset_index(drop = True)
print('Your API call is complete')

Your API call is complete


In [7]:
data.head()

Unnamed: 0,County,GEOID,m65_74,mv65_74,m75+,mv75+,f65_74,fv65_74,f75+,fv75+,totalv,StateFIPS,CountyFIPS,Region
0,"Stewart County, Tennessee",0500000US47161,772,325,433,219,814,15,659,51,1529,47,161,GNRC
1,"Montgomery County, Tennessee",0500000US47125,5526,3232,2914,1986,6397,341,4228,57,27489,47,125,GNRC
2,"Houston County, Tennessee",0500000US47083,479,162,285,200,505,8,423,7,804,47,83,GNRC
3,"Humphreys County, Tennessee",0500000US47085,984,275,635,379,1086,4,888,3,1445,47,85,GNRC
4,"Dickson County, Tennessee",0500000US47043,2439,617,1376,716,2771,24,1927,0,3012,47,43,GNRC


In [8]:
#make columns integers that we need for mathematical operations
cols = ['m65_74','mv65_74','m75+','mv75+','f65_74','fv65_74','f75+','fv75+','totalv']
data[cols] = data[cols].astype(int)

In [9]:
#find total senior veteran population and drop the gender separated data
data['seniorvet'] = data['mv65_74']+data['mv75+']+data['fv65_74']+data['fv75+']

In [10]:
data = data.drop(columns = ['mv65_74','mv75+','fv65_74','fv75+'])

In [11]:
#find total senior population in this series and drop the gender separated data
data['senior'] = data['m65_74']+data['m75+']+data['f65_74']+data['f75+']

In [12]:
data = data.drop(columns = ['m65_74','m75+','f65_74','f75+'])

In [13]:
#calculate percentages for each county
data['percentseniors_veteran'] = round((data['seniorvet']/data['senior'])*100, 2)
data['percentveterans_senior'] = round((data['seniorvet']/data['totalv'])*100, 2)

In [14]:
region = data[['seniorvet','senior','totalv','Region']].groupby('Region').sum()

In [15]:
region

Unnamed: 0_level_0,seniorvet,senior,totalv
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
GNRC,46804,254676,119929


In [16]:
#calculate percentages for region
region['percentseniors_veteran'] = round((region['seniorvet']/region['senior'])*100, 2)
region['percentveterans_senior'] = round((region['seniorvet']/region['totalv'])*100, 2)

In [17]:
region = region.reset_index()

In [18]:
#add columns to region to make it match data so we can concatenate and export - in order to import to MySidewalk
region['County'] = 'All'
region['GEOID'] = 'x'
region['StateFIPS'] = '47'
region['CountyFIPS'] = 'x'

In [19]:
frames = [data, region]
df = pd.concat(frames).reset_index(drop = True)

In [20]:
df.head(14)

Unnamed: 0,County,GEOID,totalv,StateFIPS,CountyFIPS,Region,seniorvet,senior,percentseniors_veteran,percentveterans_senior
0,"Stewart County, Tennessee",0500000US47161,1529,47,161,GNRC,610,2678,22.78,39.9
1,"Montgomery County, Tennessee",0500000US47125,27489,47,125,GNRC,5616,19065,29.46,20.43
2,"Houston County, Tennessee",0500000US47083,804,47,083,GNRC,377,1692,22.28,46.89
3,"Humphreys County, Tennessee",0500000US47085,1445,47,085,GNRC,661,3593,18.4,45.74
4,"Dickson County, Tennessee",0500000US47043,3012,47,043,GNRC,1357,8513,15.94,45.05
5,"Cheatham County, Tennessee",0500000US47021,2620,47,021,GNRC,1109,6063,18.29,42.33
6,"Robertson County, Tennessee",0500000US47147,3924,47,147,GNRC,1657,10431,15.89,42.23
7,"Sumner County, Tennessee",0500000US47165,11680,47,165,GNRC,5605,29889,18.75,47.99
8,"Davidson County, Tennessee",0500000US47037,29594,47,037,GNRC,13803,84748,16.29,46.64
9,"Wilson County, Tennessee",0500000US47189,8996,47,189,GNRC,4081,21910,18.63,45.36


In [22]:
df.to_csv('../data/seniorveteranstatus.csv', index = False)