In [98]:
import requests
import json
import pandas as pd

# Create a table containing economic and technology data for each state
For each state, use the 1-year 2023 American Community Survey (ACS) to determine the percent of...
- employed people (over 16yo) who work in the Agriculture, Forestry, Fishing and Hunting sector [pct_employed_AFFH]
- people (>1 year old) in poverty [pct_poverty]
- households with computers [pct_computers]
- households with internet [pct_internet]
- people (>1 year old) living in renter-occupied housing [pct_renters]
- people (>1 year old) living in owner-occupied housing [pct_homeowners]


In [88]:
#use API to fetch ACS results for the variables we care about
variables = 'B24050_001E,B24050_003E,B07012_001E,B07012_002E,B28002_001E,B28002_002E,B28003_001E,B28003_002E,B07013_001E,B07013_002E,B07013_003E'
response = requests.get(f'https://api.census.gov/data/2023/acs/acs1?get=NAME,{variables}&for=state:*')
results = json.loads(response.content.decode())

In [89]:
#create a dictionary to indicate what each variable code represents
var_codes = {"B24050_001E" : "Employed_total",
             "B24050_003E" : "Employed_AFFH",
             "B07012_001E" : "P_Individuals_total",
             "B07012_002E" : "P_Individuals_poverty",
             "B28002_001E" : "I_Households_total",
             "B28002_002E" : "I_Households_internet",
             "B28003_001E" : "C_Households_total",
             "B28003_002E" : "C_Households_computer",
             "B07013_001E" : "H_Individuals_total",
             "B07013_002E" : "H_Individuals_owner",
             "B07013_003E" : "H_Individuals_renter"
            }

In [90]:
#create a df using the ACS results we fetched
df = pd.DataFrame(results)

#make the state column the index and the first row the column names
df.columns = df.iloc[0]
df.set_index(df['NAME'], inplace=True)
df = df.iloc[1:,1:]

#rename the columns to better represent what they contain
df.rename(columns=var_codes, inplace=True)

#remove puerto rico (missing some values that will interfere with calculations)
df.drop(index='Puerto Rico', inplace=True)


In [94]:
#change the data type of each column to integer
df = df[list(df.columns)].astype('int')

#create a new dataframe to store values calculated from the census data, then use the total value associated with each variable to determine the percent of people/households that match each condition
calc_df = df['Employed_AFFH'].div(df['Employed_total']).mul(100).to_frame('pct_employed_AFFH')
calc_df['pct_poverty'] = (df['P_Individuals_poverty']/df['P_Individuals_total'])*100
calc_df['pct_computers'] = (df['C_Households_computer']/df['C_Households_total'])*100
calc_df['pct_internet'] = (df['I_Households_internet']/df['I_Households_total'])*100
calc_df['pct_renters'] = (df['H_Individuals_renter']/df['H_Individuals_total'])*100
calc_df['pct_homeowners'] = (df['H_Individuals_owner']/df['H_Individuals_total'])*100

In [96]:
#export the data as a csv in a folder containing census data
folder = 'census_data'
calc_df.to_csv(f'{folder}/2023_ACS_calculated_data.csv')