In [1]:
# Imports
import requests
import json
from pprint import pprint
from config import api_key

In [2]:
# Imports for calculations
import pandas as pd
import numpy as np
import os

In [3]:
# Base Url for 
url = f'https://api.census.gov/data/timeseries/poverty/saipe?get=NAME,SAEMHI_PT,SAEPOVRTALL_PT,SAEPOVALL_PT&YEAR&for=state:*&time=2022&key={api_key}'

In [4]:
poverty_request = requests.get(url)
poverty_request_json = poverty_request.json()

#if poverty_request.status_code == 200:
    # Printing the response content
    #print(poverty_request.json())  # Assuming the response is in JSON format
#else:
    #print("Failed to retrieve data from the API. Status code:", poverty_request.status_code)

In [5]:
pprint(poverty_request_json)

[['NAME',
  'SAEMHI_PT',
  'SAEPOVRTALL_PT',
  'SAEPOVALL_PT',
  'YEAR',
  'time',
  'state'],
 ['Alabama', '59703', '16.2', '798469', '2022', '2022', '01'],
 ['Alaska', '88072', '10.8', '77305', '2022', '2022', '02'],
 ['Arizona', '74355', '12.5', '904567', '2022', '2022', '04'],
 ['Arkansas', '55505', '16.3', '481650', '2022', '2022', '05'],
 ['California', '91517', '12.2', '4673718', '2022', '2022', '06'],
 ['Colorado', '89096', '9.5', '542333', '2022', '2022', '08'],
 ['Connecticut', '88182', '9.8', '345710', '2022', '2022', '09'],
 ['Delaware', '81933', '10.0', '99843', '2022', '2022', '10'],
 ['District of Columbia', '99897', '14.2', '91474', '2022', '2022', '11'],
 ['Florida', '69287', '12.7', '2770789', '2022', '2022', '12'],
 ['Georgia', '72742', '12.9', '1372488', '2022', '2022', '13'],
 ['Hawaii', '93683', '10.0', '140871', '2022', '2022', '15'],
 ['Idaho', '72634', '10.5', '198979', '2022', '2022', '16'],
 ['Illinois', '76744', '11.9', '1465724', '2022', '2022', '17'],
 ['I

In [5]:
dfs = []

# Loop through years 2012 to 2022 excluding 2020
for year in range(2010, 2023):
    if year != 2020:
        # Construct the API URL for the current year
        url = f'https://api.census.gov/data/timeseries/poverty/saipe?get=NAME,SAEMHI_PT,SAEPOVRTALL_PT,SAEPOVALL_PT&YEAR={year}&for=state:*&time={year}&key={api_key}'
        
        # Read data from the API into a pandas dataframe
        df = pd.read_json(url)
        
        # Remove the first row (header row) from the dataframe
        df = df.iloc[1:]
        
        # Rename the columns and set them as row headers
        df = df.rename(columns={0: 'state', 1: 'median_h_income', 2: 'poverty_rate', 3: 'poverty_count', 4: 'year', 5: 'also_year', 6: 'state_code'})
        
        # Append the dataframe to the list
        dfs.append(df)

# Concatenate all dataframes into a single dataframe
result_df = pd.concat(dfs, ignore_index=True)

# Display the result
result_df.head()

Unnamed: 0,state,median_h_income,poverty_rate,poverty_count,year,also_year,state_code
0,Alabama,40538,18.9,883078,2010,2010,1
1,Alaska,63456,11.0,76850,2010,2010,2
2,Arizona,46787,17.6,1105075,2010,2010,4
3,Arkansas,38413,18.7,529710,2010,2010,5
4,California,57664,15.8,5785036,2010,2010,6


In [6]:
result_df.head(20)

Unnamed: 0,state,median_h_income,poverty_rate,poverty_count,year,also_year,state_code
0,Alabama,40538,18.9,883078,2010,2010,1
1,Alaska,63456,11.0,76850,2010,2010,2
2,Arizona,46787,17.6,1105075,2010,2010,4
3,Arkansas,38413,18.7,529710,2010,2010,5
4,California,57664,15.8,5785036,2010,2010,6
5,Colorado,54411,13.2,651744,2010,2010,8
6,Connecticut,64321,10.1,348881,2010,2010,9
7,Delaware,56172,11.9,104456,2010,2010,10
8,District of Columbia,60729,18.8,107279,2010,2010,11
9,Florida,44390,16.5,3048621,2010,2010,12


In [7]:
# result_df = result_df.rename(columns={'SAEMHI_PT': 'median_h_income', 'SAEPOVRTALL_PT': 'poverty_rate', 'SAEPOVALL_PT': 'poverty_count'}
#               ).drop(columns=['state', 6]
#                      )
result_df = result_df.drop(columns=['also_year', 'state_code'])

In [8]:
result_df.head()

Unnamed: 0,state,median_h_income,poverty_rate,poverty_count,year
0,Alabama,40538,18.9,883078,2010
1,Alaska,63456,11.0,76850,2010
2,Arizona,46787,17.6,1105075,2010
3,Arkansas,38413,18.7,529710,2010
4,California,57664,15.8,5785036,2010


In [None]:
result_df.to_csv('API_Poverty.csv', index=False)

In [9]:
#Test URL
#test_url = f'https://api.census.gov/data/timeseries/poverty/saipe?get=NAME,SAEPOVALL_PT,SAEPOVALL_MOE,SAEPOVRTALL_MOE,SAEPOVRTALL_PT&for=state:01&YEAR=2022&key={api_key}'

#test_url = "api.census.gov/data/timeseries/poverty/saipe?get=NAME,SAEPOVALL_PT,SAEPOVALL_MOE,SAEPOVRTALL_MOE,SAEPOVRTALL_PT&for=state:01&YEAR=2022"

In [10]:
#test_request = requests.get(test_url)
#test_request_json = test_request.json()

In [11]:
#pprint(test_request_json)

[['NAME',
  'SAEPOVALL_PT',
  'SAEPOVALL_MOE',
  'SAEPOVRTALL_MOE',
  'SAEPOVRTALL_PT',
  'YEAR',
  'state'],
 ['Alabama', '798469', '17536', '0.40', '16.2', '2022', '01']]


In [52]:
#This is the URL for the spread sheet and the following result from the pull
# url2 = f'https://api.census.gov/data/2022/acs/acs1/spp'
# pop_request = requests.get(url2)
# pop_request_json = pop_request.json()
# pprint(pop_request_json)

{'@context': 'https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld',
 '@id': 'http://api.census.gov/data/2022/acs/acs1/spp.json',
 '@type': 'dcat:Catalog',
 'conformsTo': 'https://project-open-data.cio.gov/v1.1/schema',
 'dataset': [{'@type': 'dcat:Dataset',
              'accessLevel': 'public',
              'bureauCode': ['006:07'],
              'c_dataset': ['acs', 'acs1', 'spp'],
              'c_documentationLink': 'https://www.census.gov/developer/',
              'c_examplesLink': 'http://api.census.gov/data/2022/acs/acs1/spp/examples.json',
              'c_geographyLink': 'http://api.census.gov/data/2022/acs/acs1/spp/geography.json',
              'c_groupsLink': 'http://api.census.gov/data/2022/acs/acs1/spp/groups.json',
              'c_isAggregate': True,
              'c_isAvailable': True,
              'c_isCube': True,
              'c_sorts_url': 'http://api.census.gov/data/2022/acs/acs1/spp/sorts.json',
              'c_tagsLink': 'http://api.census.gov/data