In [None]:
import requests
import pandas as pd
import us
import geopandas as gpd 
import matplotlib.pyplot as plt

In [74]:
# Define base URL and API key
base_url = "https://api.census.gov/data/timeseries/poverty/saipe"
API_KEY = "a9ac3d6b748deca8c30be3b51da20843b0f8adbb"

# Function to fetch data for a specific year, state, and county

def fetch_data(year, state_code, county_code, get_metric='SAEMHI_PT'):

    params = {
        'get': get_metric, # see https://api.census.gov/data/timeseries/poverty/saipe/variables.html
        'for': f'county:{county_code}',
        'in': f'state:{state_code}',
        'time': str(year),
        'key': API_KEY
    }
    response = requests.get(base_url, params=params)

    if response.status_code != 200:
        print(f"Error for year {year}, {state_code}{county_code}: {response.text}")
        return None

    try:
        data = response.json()[1][0]
        # print(f"Data for year {year}, {state_code}{county_code}: {data}")  # This will print the data structure
        return data
    except JSONDecodeError:
        print(f"JSON decode error for year {year}, {county_code}. Response content: {response.text}")
        return None



# List of state codes
# Note: This list contains all 50 US state codes. Adjust if necessary.
state_fips = [state.fips for state in us.STATES]

url_FIPS = "https://www2.census.gov/geo/docs/reference/codes/files/national_county.txt"
df_county_fips = pd.read_csv(url_FIPS, header=None, dtype=str)
df_county_fips.columns = ["State", "State FIPS", "County FIPS", "County Name", "Class Code"]
df_county_fips

# Fetch the data
years = range(2001, 2023)  # 2019 to 2022 inclusive
# initialize df with columns for geoFIPS and all years
df = pd.DataFrame(columns=['GeoFIPS'] + [str(year) for year in years])
df.set_index('GeoFIPS', inplace=True)

for year in years:
    print(f"Fetching data for year {year}")
    for sci in range(len(df_county_fips)):
        state_code = df_county_fips.loc[sci, "State FIPS"]
        county_code = df_county_fips.loc[sci, "County FIPS"]
        get_metric = 'SAEMHI_PT'; # see https://api.census.gov/data/timeseries/poverty/saipe/variables.html
        # SAEMHI_PT is the median household income
        # SAEPOVRT0_17_PT is Ages 0-17 in Poverty, Rate Estimate	
        # SAEPOVRTALL_PT is All ages in Poverty, Rate Estimate	
        data = fetch_data(year, state_code, county_code)
        if data:
            GeoFIPS = state_code + county_code
            # If GeoFIPS doesn't exist in the df, add it. Else, update the year column.
            if GeoFIPS not in df.index:
                df.loc[GeoFIPS] = [None] * len(df.columns)

            df.loc[GeoFIPS, str(year)] = data


print(df)

Fetching data for year 2001
Error for year 2001, 02105: 
Error for year 2001, 02195: 
Error for year 2001, 02198: 
Error for year 2001, 02230: 
Error for year 2001, 02275: 


In [71]:
# save to csv
df.to_csv("../../data/raw/median_personal_income.csv")
df

Unnamed: 0_level_0,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
GeoFIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
01001,42183,,,,,,,,,,...,,,,,,,,,,
01003,40406,,,,,,,,,,...,,,,,,,,,,
01005,25480,,,,,,,,,,...,,,,,,,,,,
01007,31074,,,,,,,,,,...,,,,,,,,,,
01009,36175,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
05025,33511,,,,,,,,,,...,,,,,,,,,,
05027,28385,,,,,,,,,,...,,,,,,,,,,
05029,31166,,,,,,,,,,...,,,,,,,,,,
05031,33348,,,,,,,,,,...,,,,,,,,,,
