In [2]:
import numpy as np
from cities.utils.data_grabber import find_repo_root, DataGrabber
import pandas as pd
import numpy as np
import requests

data = DataGrabber()
data.get_features_wide(["gdp"])
gdp = data.wide["gdp"]

root = find_repo_root()

In [8]:
area = "county:*&in=state:*"

variables = "S1901_C01_002E,S1901_C01_003E,S1901_C01_004E,S1901_C01_005E,S1901_C01_006E,S1901_C01_007E,S1901_C01_008E,S1901_C01_009E,S1901_C01_010E,S1901_C01_011E"


# interval = list(range(2010, 2023)) # it is possible to create this variable as a time series (only for ACS 5 year estimates)

year = 2021

url = f"https://api.census.gov/data/{year}/acs/acs5/subject?get={variables}&for={area}"

response = requests.get(url)

assert response.status_code == 200  # 200 means success

data = response.json()

df = pd.DataFrame(data[1:], columns=data[0])
df.columns

Index(['S1901_C01_002E', 'S1901_C01_003E', 'S1901_C01_004E', 'S1901_C01_005E',
       'S1901_C01_006E', 'S1901_C01_007E', 'S1901_C01_008E', 'S1901_C01_009E',
       'S1901_C01_010E', 'S1901_C01_011E', 'state', 'county'],
      dtype='object')

In [18]:
df_subset = df[df["state"] == "09"]
df_subset

income_composition = df.copy()

In [19]:
column_mapping = {
    "S1901_C01_002E": "less_10k",
    "S1901_C01_003E": "10k_15k",
    "S1901_C01_004E": "15k_25k",
    "S1901_C01_005E": "25k_35k",
    "S1901_C01_006E": "35k_50k",
    "S1901_C01_007E": "50k_75k",
    "S1901_C01_008E": "75k_100k",
    "S1901_C01_009E": "100k_150k",
    "S1901_C01_010E": "150k_200k",
    "S1901_C01_011E": "200k_more",
}

income_composition.rename(columns=column_mapping, inplace=True)

income_composition.head()

In [21]:
income_composition["GeoFIPS"] = (
    income_composition["state"] + income_composition["county"]
)
income_composition.drop(["state", "county"], axis=1, inplace=True)
income_composition["GeoFIPS"] = income_composition["GeoFIPS"].astype(np.int64)

In [23]:
common_fips = np.intersect1d(
    income_composition["GeoFIPS"].unique(), gdp["GeoFIPS"].unique()
)
income_composition = income_composition[income_composition["GeoFIPS"].isin(common_fips)]
income_composition = income_composition.merge(
    gdp[["GeoFIPS", "GeoName"]], on="GeoFIPS", how="left"
)
income_composition = income_composition.reindex(
    columns=["GeoFIPS", "GeoName"]
    + list(income_composition.columns.drop(["GeoFIPS", "GeoName"]))
)
income_composition.iloc[:, 2:] = income_composition.iloc[:, 2:].astype(float)
print(income_composition.head())
print(income_composition.shape[0])

   GeoFIPS      GeoName less_10k 10k_15k 15k_25k 25k_35k 35k_50k 50k_75k  \
0     1001  Autauga, AL      5.5     4.8    10.9     8.1    12.1    17.8   
1     1003  Baldwin, AL      4.9     4.2     7.0     9.5    13.4    17.0   
2     1005  Barbour, AL     12.3     7.1    18.4    10.7    13.7    16.2   
3     1007     Bibb, AL      7.5     7.8    11.6    10.2    10.7    17.5   
4     1009   Blount, AL      8.4     4.2     9.5    11.0    13.0    18.5   

  75k_100k 100k_150k 150k_200k 200k_more  
0     13.9      16.2       6.0       4.7  
1     14.6      15.3       7.0       7.1  
2      7.0       9.3       1.9       3.3  
3     16.8      10.9       3.7       3.3  
4     11.7      14.1       5.8       3.7  
3071


In [26]:
income_composition.to_csv(f"{root}/data/raw/income_distribution.csv", index=False)