In [1]:
import os
import sys
import pandas as pd, geopandas as gp, numpy as np
import getpass
from arcgis import GIS

user = getpass.getuser()

DVUTILS_LOCAL_CLONE_PATH = f"/Users/{user}/Documents/GitHub/dvutils"
sys.path.insert(0, DVUTILS_LOCAL_CLONE_PATH)
from utils_io import *

In [2]:
# get census api key
api_key = os.environ.get("CENSUS_API_KEY")
agol_password = os.environ.get("AGOL_CONTENT_PASSWORD")

In [3]:
# authenticate to agol
gis = GIS(url="https://mtc.maps.arcgis.com/home", username="content_MTC", password=agol_password)

### Read census api key from file

In [4]:
def get_file_contents(filename):
    """Given a filename,
    return the contents of that file
    """
    try:
        with open(filename, "r") as f:
            # It's assumed our file contains a single line,
            # with our API key
            return f.read().strip()
    except FileNotFoundError:
        print("'%s' file not found" % filename)

In [5]:
def flag_condition_calc(row, df_share_column, standard_deviation):
    cond = (
        df_share_column.mean().round(decimals=2)
        + (standard_deviation * df_share_column.std().round(decimals=2))
    ).round(decimals=2)
    if row > cond:
        return 1
    else:
        return 0

In [6]:
def flag_mult_columns(dataframe, dictionary, standard_deviation):
    for key, value in dictionary.items():
        dataframe[value] = dataframe[key].apply(
            lambda row: flag_condition_calc(row, dataframe[key], standard_deviation)
        )

In [7]:
def set_epc_class(df):
    if df["epc50p_1ha"] == 1:
        return "Highest"
    elif df["epc50p_1"] == 1:
        return "Higher"
    elif df["epc50p_1_2"] == 1:
        return "High"
    else:
        return "NA"

In [8]:
def pull_acs_5_year_est_data(
    census_api_key,
    acs_year=2019,
    tbl_prof_type="Detailed",
    table_id=None,
    select_table_vars=None,
    drop_anno_cols=True,
    drop_margin_cols=True,
):
    """
    Pull American Community Survey (ACS) 5 year estimate data. Data can be pulled for an entire
    table or for select table variables.

    !Must include a table_id or list to select_table_vars parameters!

    Parameters
    -------------------
    census_api_key (String):
    Your secret census api key.

    acs_year (Integer):
    Year for acs estimates, default is 2019 which is latest year 5 year data is available.

    tbl_prof_type (String):
    Table or profile type. These include the following types: Detailed, Subject, Data, or Comparison.

    table_id (String):
    ACS table id. Example 'B01001'

    select_table_vars (List):
    provide a list of ACS table variables as strings. Example: ['B01001_001E','B01001_002E']

    drop_anno_cols (Boolean):
    Used if table_id provided. Drops annotation of margin of error and annotation of estimate
    columns.

    drop_margin_cols (Boolean):
    Used if table_id provided. Drops margin of error columns.

    Returns
    -------------------
    Geodataframe object

    Author: Joshua Croff
    Variable Reference: https://www.census.gov/data/developers/data-sets/acs-5year.html
    """
    import requests
    import pandas as pd

    if table_id:
        var = f"group({table_id})"
    else:
        var = ",".join(select_table_vars)

    counties = "001,013,041,055,075,081,085,095,097"
    state = "06"
    # set base url
    if tbl_prof_type not in ["Detailed", "Subject", "Data", "Comparison"]:
        return "Please provide the following table types: Detailed, Subject, Data, or Comparison"
    elif tbl_prof_type == "Detailed":
        base_url = f"https://api.census.gov/data/{acs_year}/acs/acs5?"
    elif tbl_prof_type == "Subject":
        base_url = f"https://api.census.gov/data/{acs_year}/acs/acs5/subject?"
        # https://api.census.gov/data/2020/acs/acs5/subject?get=NAME,S0101_C01_001E&for=county:037&in=state:06&key=YOUR_KEY_GOES_HERE
    elif tbl_prof_type == "Data":
        base_url = f"https://api.census.gov/data/{acs_year}/acs/acs5/profile?"
    else:
        base_url = f"https://api.census.gov/data/{acs_year}/acs/acs5/cprofile?"

    # set query params
    query_params = {
        "get": var,
        "for": "tract:*",
        "in": [
            f"county:{counties}",
            f"state:{state}",
        ],
        "key": census_api_key,
    }
    rq = requests.get(base_url, params=query_params)
    data = rq.json()
    acs_df = pd.DataFrame(data[1:], columns=data[0])
    # Cast numeric columns to numeric types
    cols = acs_df.columns.to_list()

    if table_id:
        str_cols = ["GEO_ID", "NAME", "state", "county", "tract"]
    else:
        str_cols = ["state", "county", "tract"]
    num_cols = list(set(cols) - set(str_cols))
    acs_df[num_cols] = acs_df[num_cols].apply(pd.to_numeric)

    # Drop annotation columns
    if drop_anno_cols:
        acs_df = acs_df.loc[
            :, ~((acs_df.columns.str.endswith("EA")) | (acs_df.columns.str.endswith("MA")))
        ].copy()

    if drop_margin_cols:
        acs_df = acs_df.loc[:, ~acs_df.columns.str.endswith("M").copy()]

    # add tract id column
    acs_df["tract_geoid"] = acs_df["state"] + acs_df["county"] + acs_df["tract"]

    # rename columns
    acs_df = acs_df.rename(columns={"county": "fipco"})

    # drop redundent columns
    if table_id:
        acs_df = acs_df.drop(columns=["GEO_ID", "NAME", "state", "tract"])
    else:
        acs_df = acs_df.drop(columns=["state", "tract"])

    return acs_df

In [9]:
def pull_census_tracts_geodata(year=2020, cartographic=False):
    """
    Pulls Census Tracts from TIGERweb REST API and returns Geopandas GeoDataframe.
    Default year is 2020 which is the latest-available vintage for TIGER tracts.

    How to choose vintage: https://www2.census.gov/geo/pdfs/maps-data/data/tiger/How_do_I_choose_TIGER_vintage.pdf

    Parameters
    -------------------
    year (int):
    the TIGER vintage.
    list of valid years: [2012,2015,2016,2017,2018,2019,2020]

    catrographic (bool):
    If the cartographic parameter is set to true, a generalized version of tracts is returned
    with water areas clipped.

    Author: Joshua Croff
    Source: https://tigerweb.geo.census.gov/tigerwebmain/TIGERweb_restmapservice.html
    """
    import geopandas as gpd
    import requests

    valid_years = [2012, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]
    pre_2020 = [2012, 2015, 2016, 2017, 2018, 2019]
    if year not in valid_years:
        print("Error- vintage not available. Please see docstring for valid years")
        return

    if year == 2020 and cartographic == True:
        map_service = f"Generalized_TAB{year}"
    elif year == 2020:
        map_service = f"tigerWMS_Census{year}"
        layer_id = "6"
    elif year in pre_2020 and cartographic == True:
        map_service = f"Generalized_ACS{year}"
    elif year in pre_2020:
        map_service = f"tigerWMS_ACS{year}"
        layer_id = "8"
    elif year > 2020 and cartographic == True:
        map_service = f"Generalized_ACS{year}"
    else:
        map_service = f"tigerWMS_ACS{year}"
        layer_id = "6"

    state = "06"
    counties = "('001','013','041','055','075','081','085','095','097')"
    where_str = f"where=STATE='{state}'+AND+COUNTY+IN{counties}"
    query_args = [where_str, "outFields=GEOID&f=geojson"]

    if cartographic:
        url = "/".join(
            [
                "https://tigerweb.geo.census.gov",
                "arcgis",
                "rest",
                "services",
                map_service,
                "Tracts_Blocks",
                "MapServer",
                "3",
                "query?{}".format("&".join(query_args)),
            ]
        )
    else:
        url = "/".join(
            [
                "https://tigerweb.geo.census.gov",
                "arcgis",
                "rest",
                "services",
                "TIGERweb",
                map_service,
                "MapServer",
                layer_id,
                "query?{}".format("&".join(query_args)),
            ]
        )
    r = requests.get(url)
    geog_json = r.json()
    geog_gdf = gpd.GeoDataFrame.from_features(geog_json["features"], crs="EPSG:4326")

    # rename GEOID column to tract_geoid
    geog_gdf = geog_gdf.rename(columns={"GEOID": "tract_geoid"})
    return geog_gdf

In [10]:
# create a function to overwrite a feature layer
def overwrite_published_feature_layer(f_layer_id, geojson_path, client):
    """Overwrite a published feature layer

    Parameters:
    -----------
    f_layer_id : str
        id of the feature layer to overwrite
    geojson_path : str
        path to the geojson file
    client : authenticated arcgis client
        authentication example below:
        from arcgis.gis import GIS
        password = os.environ.get("AGOL_CONTENT_PASSWORD")
        gis = GIS(url="https://mtc.maps.arcgis.com/home/", username="content_MTC", password=password)
    """
    from arcgis.features import FeatureLayerCollection

    # get the feature layer
    host_flayer = client.content.get(f_layer_id)

    # create feature layer collection object
    f_layer = FeatureLayerCollection.fromitem(host_flayer)
    # overwrite the feature layer
    f_layer.manager.overwrite(geojson_path)

    print(f"Overwrote hosted feature layer with id: {f_layer_id}")

In [11]:
# create a function that publishes a geojson to agol
def publish_geojson_to_agol(
    geojson_path,
    layer_name,
    layer_snippet,
    tags,
    client,
    folder=None,
    overwrite=False,
    f_layer_id=None,
):
    """Publish a geojson to ArcGIS Online

    Parameters:
    -----------
    geojson_path : str
        path to the geojson file
    layer_name : str
        name of the layer
    layer_snippet : str
        layer snippet
    tags : list
        tags as a comma separated string (e.g. "tag1, tag2, tag3")
    client : authenticated arcgis client
        authentication example below:
        from arcgis.gis import GIS
        password = os.environ.get("AGOL_CONTENT_PASSWORD")
        gis = GIS(url="https://mtc.maps.arcgis.com/home/", username="content_MTC", password=password)
    folder : str
        name of the folder to publish to (optional)
    overwrite : bool
        if True, overwrite existing layer
    f_layer_id : str
        if overwrite is True, provide the id of the feature layer to overwrite
    """
    if overwrite:
        overwrite_published_feature_layer(f_layer_id, geojson_path, client)
    else:
        # publish the geojson
        item_prop = {
            "type": "GeoJson",
            "title": layer_name,
            "tags": tags,
            "snippet": layer_snippet,
            "overwrite": True,
        }
        item = client.content.add(item_properties=item_prop, data=geojson_path, folder=folder)

        # publish the item
        published_item = item.publish(file_type="geojson")

        print(f"Published {layer_name} to ArcGIS Online as {published_item.id}")

### Read selected ACS varibles from csv

In [12]:
acs_epc_selected_vars = pd.read_csv("Data/acs_table_variables_epc_factors.csv")

In [13]:
acs_vars_lst = acs_epc_selected_vars["ACS_Table_Variable"].tolist()

### Query ACS API
#### [Census American Community Survey 5-Year Data API Documentation](https://www.census.gov/data/developers/data-sets/acs-5year.html)

In [14]:
# pull american community survey tabular data
acs_df = pull_acs_5_year_est_data(
    census_api_key=api_key, acs_year=2022, tbl_prof_type="Detailed", select_table_vars=acs_vars_lst
)

In [15]:
# pull american community survey geographic data
acs_gdf = pull_census_tracts_geodata(year=2022, cartographic=True)

### Rename columns for consistancy with prior epcs

In [16]:
cols = {
    "fipco": "county_fip",
    "B03002_001E": "tot_pop_poc",
    "B01001_001E": "tot_pop_se",
    "C17002_001E": "tot_pop_po",
    "C18108_001E": "tot_pop_ci",
    "B08201_001E": "tot_hh",
    "B11004_001E": "tot_fam",
    "B16005_001E": "tot_pop_ov",
    "B25070_010E": "pop_hus_re",
    "B08201_002E": "pop_zvhhs",
}
acs_df.rename(columns=cols, inplace=True)

### Calculate epc and populations

In [17]:
# calculate poc population (total population - not hispanic or latino white alone)
acs_df["pop_poc"] = acs_df["tot_pop_poc"] - acs_df["B03002_003E"]

# calculate senior population
acs_df["pop_over75"] = (
    acs_df["B01001_023E"]
    + acs_df["B01001_024E"]
    + acs_df["B01001_025E"]
    + acs_df["B01001_047E"]
    + acs_df["B01001_048E"]
    + acs_df["B01001_049E"]
)

# calculate single parent family population (male householder, no spouse present + female householder, no spouse present)
acs_df["pop_spfam"] = acs_df["B11004_010E"] + acs_df["B11004_016E"]

# calculate limited english proficiency population (primarily speaks a language other than English at home and speaks English less than "very well" or "not at all")
acs_df["pop_lep"] = (
    acs_df["B16005_007E"]
    + acs_df["B16005_008E"]
    + acs_df["B16005_012E"]
    + acs_df["B16005_013E"]
    + acs_df["B16005_017E"]
    + acs_df["B16005_018E"]
    + acs_df["B16005_022E"]
    + acs_df["B16005_023E"]
    + acs_df["B16005_029E"]
    + acs_df["B16005_030E"]
    + acs_df["B16005_034E"]
    + acs_df["B16005_035E"]
    + acs_df["B16005_039E"]
    + acs_df["B16005_040E"]
    + acs_df["B16005_044E"]
    + acs_df["B16005_045E"]
)

# calculate population below 200% of poverty (total population - population above 200% of poverty)
acs_df["pop_below2"] = acs_df["tot_pop_po"] - acs_df["C17002_008E"]

# calculate population with a disability (total civilian non-institutionalized population - population with no disability)
acs_df["pop_disabi"] = acs_df["tot_pop_ci"] - (
    acs_df["C18108_005E"] + acs_df["C18108_009E"] + acs_df["C18108_013E"]
)

### Calculate epc shares

In [18]:
acs_df["pct_poc"] = np.where(
    acs_df["tot_pop_poc"] == 0, 0, (acs_df["pop_poc"] / acs_df["tot_pop_poc"])
)
acs_df["pct_over75"] = np.where(
    acs_df["tot_pop_se"] == 0, 0, (acs_df["pop_over75"] / acs_df["tot_pop_se"])
)
acs_df["pct_spfam"] = np.where(acs_df["tot_fam"] == 0, 0, (acs_df["pop_spfam"] / acs_df["tot_fam"]))
acs_df["pct_lep"] = np.where(
    acs_df["tot_pop_ov"] == 0, 0, (acs_df["pop_lep"] / acs_df["tot_pop_ov"])
)
acs_df["pct_below2"] = np.where(
    acs_df["tot_pop_po"] == 0, 0, (acs_df["pop_below2"] / acs_df["tot_pop_po"])
)
acs_df["pct_disab"] = np.where(
    acs_df["tot_pop_ci"] == 0, 0, (acs_df["pop_disabi"] / acs_df["tot_pop_ci"])
)
acs_df["pct_zvhhs"] = np.where(acs_df["tot_hh"] == 0, 0, (acs_df["pop_zvhhs"] / acs_df["tot_hh"]))
acs_df["pct_hus_re"] = np.where(acs_df["tot_hh"] == 0, 0, (acs_df["pop_hus_re"] / acs_df["tot_hh"]))

### Flag epcs and epc levels (high, higher, highest) 

### Flag halfsd columns and count factors

In [19]:
cols_dict_halfsd = {
    "pct_over75": "over75_1_2",
    "pct_poc": "poc_1_2",
    "pct_spfam": "spfam_1_2",
    "pct_disab": "disab_1_2",
    "pct_lep": "lep_1_2",
    "pct_below2": "below2_1_2",
    "pct_zvhhs": "zvhh_1_2",
    "pct_hus_re": "hus_re_1_2",
}

flag_mult_columns(acs_df, cols_dict_halfsd, 0.5)

In [20]:
halfsd_cols_list = [
    "below2_1_2",
    "poc_1_2",
    "spfam_1_2",
    "disab_1_2",
    "lep_1_2",
    "over75_1_2",
    "zvhh_1_2",
    "hus_re_1_2",
]
acs_df["count_1_2"] = acs_df[halfsd_cols_list].sum(axis=1)

### Flag halfsd epc

In [21]:
halfsd_remain = ["spfam_1_2", "disab_1_2", "lep_1_2", "over75_1_2", "zvhh_1_2", "hus_re_1_2"]
halfsd_cond = ((acs_df["poc_1_2"] == 1) & (acs_df["below2_1_2"] == 1)) | (
    (acs_df["below2_1_2"] == 1) & (acs_df[halfsd_remain].sum(axis=1) >= 3)
)
acs_df["epc50p_1_2"] = np.where(halfsd_cond, 1, 0)

### Flag onesd columns

In [22]:
cols_dict_onesd = {
    "pct_over75": "over75_1",
    "pct_poc": "poc_1",
    "pct_spfam": "spfam_1",
    "pct_disab": "disab_1",
    "pct_lep": "lep_1",
    "pct_below2": "below2_1",
    "pct_zvhhs": "zvhh_1",
    "pct_hus_re": "hus_re_1",
}

flag_mult_columns(acs_df, cols_dict_onesd, 1)

In [23]:
onesd_cols_list = [
    "below2_1",
    "poc_1",
    "spfam_1",
    "disab_1",
    "lep_1",
    "over75_1",
    "zvhh_1",
    "hus_re_1",
]
acs_df["count_1"] = acs_df[onesd_cols_list].sum(axis=1)

### Flag onesd epc

In [24]:
onesd_remain = ["spfam_1", "disab_1", "lep_1", "over75_1", "zvhh_1", "hus_re_1"]
onesd_cond = ((acs_df["poc_1"] == 1) & (acs_df["below2_1"] == 1)) | (
    (acs_df["below2_1"] == 1) & (acs_df[onesd_remain].sum(axis=1) >= 3)
)
acs_df["epc50p_1"] = np.where(onesd_cond, 1, 0)

### Flag onehalfsd columns

In [25]:
cols_dict_onehalfsd = {
    "pct_over75": "over75_1ha",
    "pct_poc": "poc_1ha",
    "pct_spfam": "spfam_1ha",
    "pct_disab": "disab_1ha",
    "pct_lep": "lep_1ha",
    "pct_below2": "below2_1ha",
    "pct_zvhhs": "zvhh_1ha",
    "pct_hus_re": "hus_re_1ha",
}

flag_mult_columns(acs_df, cols_dict_onehalfsd, 1.5)

In [26]:
onehalfsd_cols_list = [
    "below2_1ha",
    "poc_1ha",
    "spfam_1ha",
    "disab_1ha",
    "lep_1ha",
    "over75_1ha",
    "zvhh_1ha",
    "hus_re_1ha",
]
acs_df["count_1ha"] = acs_df[onehalfsd_cols_list].sum(axis=1)

### Flag onehalfsd epc

In [27]:
onehalfsd_remain = ["spfam_1ha", "disab_1ha", "lep_1ha", "over75_1ha", "zvhh_1ha", "hus_re_1ha"]
onehalfsd_cond = ((acs_df["poc_1ha"] == 1) & (acs_df["below2_1ha"] == 1)) | (
    (acs_df["below2_1ha"] == 1) & (acs_df[onehalfsd_remain].sum(axis=1) >= 3)
)
acs_df["epc50p_1ha"] = np.where(onehalfsd_cond, 1, 0)

### Flag 2050 epcs

In [28]:
acs_df["epc_2050p"] = np.where(
    (acs_df["epc50p_1ha"] == 1) | (acs_df["epc50p_1"] == 1) | (acs_df["epc50p_1_2"]), 1, 0
)

### Create epc classes

In [29]:
acs_df["epc_class"] = acs_df.apply(set_epc_class, axis=1)

In [30]:
acs_df.groupby("epc_class")["epc_2050p"].agg(sum)

  acs_df.groupby("epc_class")["epc_2050p"].agg(sum)


epc_class
High       174
Higher     131
Highest     48
NA           0
Name: epc_2050p, dtype: int64

## Compare previous EPCs

In [31]:
# census_vintage_crosswalk = pd.read_csv(
#     "https://www2.census.gov/geo/docs/maps-data/data/rel2020/tract/tab20_tract20_tract10_natl.txt",
#     sep="|",
#     dtype=str,
# )

In [32]:
# census_vintage_crosswalk.rename(
#     columns={"GEOID_TRACT_20": "tract_geoid20", "GEOID_TRACT_10": "tract_geoid10"}, inplace=True
# )

In [33]:
# pba50_epc_df = pull_geotable_agol(
#     base_url="https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/communities_of_concern_2020_acs2018/FeatureServer/0",
#     client=gis,
#     reproject_to_analysis_crs=False,
# )

In [34]:
# pba50_epc_df.rename(columns={"geoid": "tract_geoid10"}, inplace=True)

In [35]:
# pba50_epc_df["epc_2050"].sum()

In [36]:
# pba50_epc_cross = pd.merge(
#     pba50_epc_df[["tract_geoid10", "epc_2050", "geometry"]],
#     census_vintage_crosswalk[["tract_geoid20", "tract_geoid10"]],
#     on="tract_geoid10",
#     how="left",
# )

In [37]:
# acs_df.rename(columns={"tract_geoid": "tract_geoid20"}, inplace=True)

In [38]:
# # update acs_df with epc_2050 values
# acs_df["epc_2050"] = acs_df["tract_geoid20"].map(
#     pba50_epc_cross.sort_values(by="epc_2050", ascending=False)
#     .groupby("tract_geoid20")["epc_2050"]
#     .first()
# )

### Sum pba2050 and pba2040 epcs for comparison

In [39]:
# acs_df[["epc_2050", "epc_2050p"]].sum()

### Calculate epc gains and losses by tract for pba50 and pba2050 +

In [40]:
# acs_df.loc[acs_df["epc_2050"].isnull(), "epc_2050"] = 0

In [41]:
# acs_df["c2050_2050p"] = acs_df["epc_2050p"] - acs_df["epc_2050"]

## Calculate regional statistics 

In [57]:
epc_region_stats = (
    acs_df.agg(
        {
            "pct_over75": ["mean", "std"],
            "pct_poc": ["mean", "std"],
            "pct_lep": ["mean", "std"],
            "pct_spfam": ["mean", "std"],
            "pct_below2": ["mean", "std"],
            "pct_disab": ["mean", "std"],
            "pct_zvhhs": ["mean", "std"],
            "pct_hus_re": ["mean", "std"],
        }
    )
    .transpose()
    .reset_index()
)

In [58]:
epc_region_stats.rename(columns={"index": "factors"}, inplace=True)

In [59]:
epc_factors = {
    "pct_over75": "Seniors 75 Years and Over",
    "pct_poc": "People of Color",
    "pct_lep": "Limited English Proficiency",
    "pct_spfam": "Single Parent Families",
    "pct_below2": "Low-Income (<200% Federal Poverty Level-FPL)",
    "pct_disab": "People with Disability",
    "pct_zvhhs": "Zero-Vehicle Household",
    "pct_hus_re": "Rent-Burdened",
}
epc_region_stats["factors"].replace(epc_factors, inplace=True)

In [60]:
epc_region_stats["mean"] = epc_region_stats["mean"].round(decimals=2)
epc_region_stats["std"] = epc_region_stats["std"].round(decimals=2)

### Create fields for .5, 1, and 1.5 sd from mean

In [61]:
def flag_condition_calc(row, df_share_column, standard_deviation):
    cond = (
        df_share_column.mean().round(decimals=2)
        + (standard_deviation * df_share_column.std().round(decimals=2))
    ).round(decimals=2)
    if row > cond:
        return 1
    else:
        return 0

In [62]:
epc_region_stats["plus_half_sd"] = (
    epc_region_stats["mean"] + (0.5 * epc_region_stats["std"])
).round(decimals=2)

epc_region_stats["plus_one_sd"] = (epc_region_stats["mean"] + epc_region_stats["std"]).round(
    decimals=2
)
 
epc_region_stats["plus_one_half_sd"] = (
    epc_region_stats["mean"] + (1.5 * epc_region_stats["std"])
).round(decimals=2)

In [73]:
(0.18 + (0.5 * 0.13))

0.245

In [75]:
round((0.18 + (0.5 * 0.13)),2)

0.24

In [63]:
epc_region_stats

Unnamed: 0,factors,mean,std,plus_half_sd,plus_one_sd,plus_one_half_sd
0,Seniors 75 Years and Over,0.07,0.06,0.1,0.13,0.16
1,People of Color,0.61,0.23,0.72,0.84,0.96
2,Limited English Proficiency,0.07,0.08,0.11,0.15,0.19
3,Single Parent Families,0.12,0.09,0.16,0.21,0.26
4,Low-Income (<200% Federal Poverty Level-FPL),0.18,0.13,0.24,0.31,0.38
5,People with Disability,0.1,0.05,0.12,0.15,0.18
6,Zero-Vehicle Household,0.1,0.13,0.16,0.23,0.3
7,Rent-Burdened,0.1,0.08,0.14,0.18,0.22


In [48]:
epc_region_stats.to_csv("Data/epc_regional_stats_ACS2022.csv", index=False)

## Join census tracts geo to epc df

In [49]:
acs_df.rename(columns={"tract_geoid20": "tract_geoid"}, inplace=True)

In [50]:
epc_gdf = pd.merge(acs_gdf, acs_df, on="tract_geoid", how="inner")

In [51]:
final_cols = [
    "tract_geoid",
    "county_fip",
    "tot_pop_poc",
    "tot_pop_se",
    "tot_pop_po",
    "tot_pop_ci",
    "tot_pop_ov",
    "tot_hh",
    "tot_fam",
    "pop_poc",
    "pop_over75",
    "pop_spfam",
    "pop_lep",
    "pop_below2",
    "pop_disabi",
    "pop_hus_re",
    "pop_zvhhs",
    "pct_poc",
    "pct_over75",
    "pct_spfam",
    "pct_lep",
    "pct_below2",
    "pct_disab",
    "pct_hus_re",
    "pct_zvhhs",
    "poc_1_2",
    "over75_1_2",
    "spfam_1_2",
    "lep_1_2",
    "disab_1_2",
    "below2_1_2",
    "hus_re_1_2",
    "zvhh_1_2",
    "epc_2050p",
    "epc_class",
    # "epc_2050",
    # "c2050_2050p",
    "geometry",
]
epc_path = "Data/epc_acs2022.geojson"
epc_gdf[final_cols].to_file(epc_path, driver="GeoJSON")

In [52]:
# output tabular data
final_cols.remove("geometry")
acs_df[final_cols].to_csv("Data/epc_acs2022.csv", index=False)

## Publish to arcgis online

In [53]:
publish_geojson_to_agol(
    geojson_path=epc_path,
    layer_name="DRAFT Equity Priority Communities - Plan Bay Area 2050 Plus (ACS 2022)",
    layer_snippet="""This dataset represents tract information related to Equity Priority Communities 
    for Plan Bay Area 2050 Plus. The dataset was developed using American Community Survey 2018-2022 data for eight variables considered.""",
    tags="bay area, equity, policy, planning, environmental justice, acs, american community survey, epc, community of concern",
    client=gis,
    folder="plan_policy",
    overwrite=True,
    f_layer_id="15a30787f659423ea3d40c07c5b2a31a",
)

Overwrote hosted feature layer with id: 15a30787f659423ea3d40c07c5b2a31a


### Create Field Map Dictionary to Rename Feature Class Alias

In [54]:
# field_metadata = pd.read_csv("Data/EPC_Schema_pba2050p.csv")

In [55]:
# field_metadata.head(5)

In [56]:
# dict(zip(field_metadata["Field Name"], field_metadata["Alias"]))