# Household Size by town
## Census 2000 API

* Census 2000 variables: https://api.census.gov/data/2000/sf1/variables.html
* Census 2000 geographies: https://api.census.gov/data/2000/sf1/examples.html

In [1]:
import pandas as pd

In [30]:
def get_hh_size(county):
    url = 'https://api.census.gov/data/2000/sf1?get=H012001&for=county%20subdivision:*&in=state:09%20county:'
    df = pd.read_json(url + county)
    
    # Remove header row (first), and undefined geographies
    return df[(df[3] != '00000') & (df[3] != 'county subdivision')]

In [33]:
dfs = ['{:03d}'.format(x) for x in range(1, 16, 2)]

# Get dataframe for each county
dfs = [get_hh_size(x) for x in dfs]

In [41]:
hh_size = pd.concat(dfs)
hh_size.columns = ['household_size', 'state', 'county', 'subdivision']

hh_size['fips'] = hh_size['state'] + hh_size['county'] + hh_size['subdivision']

In [45]:
# Town to county list
fips2town = pd.read_csv(
    'https://raw.githubusercontent.com/CT-Data-Collaborative/ct-town-county-fips-list/master/ct-town-county-fips-list.csv',
    usecols=[0,2],
    dtype=str
)

fips2town.head()

Unnamed: 0,Town,FIPS
0,Andover,901301080
1,Ansonia,900901220
2,Ashford,901501430
3,Avon,900302060
4,Barkhamsted,900502760


In [49]:
data = hh_size.merge(fips2town, left_on='fips', right_on='FIPS')
data.filter(['Town', 'household_size']).to_csv('./census-2000-hh-size.csv', index=False)