In [1]:
import os
import sys
import pandas as pd, geopandas as gp, numpy as np
import getpass
from arcgis import GIS

user = getpass.getuser()

DVUTILS_LOCAL_CLONE_PATH = f"/Users/{user}/Documents/GitHub/dvutils"
sys.path.insert(0, DVUTILS_LOCAL_CLONE_PATH)
from utils_io import *

In [2]:
# get census api key
api_key = os.environ.get("CENSUS_API_KEY")
agol_password = os.environ.get("AGOL_CONTENT_PASSWORD")

In [3]:
# authenticate to agol
gis = GIS(url="https://mtc.maps.arcgis.com/home", username="content_MTC", password=agol_password)

In [43]:
# pull 2050 equity priority communities data
# epc_url = "https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/communities_of_concern_2020_acs2018/FeatureServer/0"
epc_2050 = pd.read_csv("Data/epc_acs2018.csv", dtype={"geoid": str})

In [44]:
# pull draft acs 2021 equity priority communities data 
# draft_epc_url = "https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/DRAFT_Equity_Priority_Communities_Plan_Bay_Area_2050_/FeatureServer/0"
epc_2050p_2021 = pd.read_csv("Data/epc_acs2021.csv", dtype={"tract_geoid": str}) 

In [45]:
# pull draft acs 2022 equity priority communities data
# draft_epc_url = "https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/DRAFT_Equity_Priority_Communities_Plan_Bay_Area_2050_Plus_ACS_2022_/FeatureServer/0"
epc_2050p_2022 = pd.read_csv("Data/epc_acs2022.csv", dtype={"tract_geoid": str})

## Concat 2022 and 2021 data

In [46]:
county_fips_dict = {
    1: "Alameda",
    13: "Contra Costa",
    41: "Marin",
    55: "Napa",
    75: "San Francisco",
    81: "San Mateo",
    85: "Santa Clara",
    95: "Solano",
    97: "Sonoma",
}
epc_2050["county"] = epc_2050["county_fip"].map(county_fips_dict)
epc_2050p_2021["county"] = epc_2050p_2021["county_fip"].map(county_fips_dict)
# epc_2050p_2021.rename(columns={"epc_2050p":"epc_2021"}, inplace=True)
epc_2050p_2022["county"] = epc_2050p_2022["county_fip"].map(county_fips_dict)
# epc_2050p_2022.rename(columns={"epc_2050p":"epc_2022"}, inplace=True)

In [47]:
# create a function to flag share columns with values that are 0 or 1
# these columns may highlight areas where the data are not reliable

def flag_share_cols(df, share_cols):
    """
    Flags share columns with values that are 0 or 1. These columns may highlight areas where the data are not reliable.

    Parameters
    -------------------
    df (geodataframe):
    Geodataframe object.

    share_cols (list):
    List of columns to flag.

    Returns
    -------------------
    Geodataframe object
    """
    for col in share_cols:
        df[f"{col}_flag"] = np.where(
            (df[col] == 0) | (df[col] == 1), 1, 0
        )
    return df

In [48]:
# flag share columns
cols = [
    "pct_poc",
    "pct_over75",
    "pct_spfam",
    "pct_lep",
    "pct_below2",
    "pct_disab",
    "pct_zvhhs",
    "pct_hus_re",
]
epc_2050 = flag_share_cols(epc_2050, cols)
epc_2050p_2021 = flag_share_cols(epc_2050p_2021, cols)
epc_2050p_2022 = flag_share_cols(epc_2050p_2022, cols)

In [49]:
epc_2050.rename(columns={"geoid":"tract_geoid"}, inplace=True)
epc_2050["tract_geoid"] = "0" + epc_2050["tract_geoid"]
epc_2050.drop(columns=["tract"], inplace=True)

In [50]:
# add vintage cols
epc_2050["vintage"] = 2018
epc_2050p_2021["vintage"] = 2021
epc_2050p_2022["vintage"] = 2022

# concat epc data
epc_concat = pd.concat([epc_2050, epc_2050p_2021, epc_2050p_2022])

In [51]:
epc_concat.to_csv("Data/epc_comparisons_2018_2021_2022.csv", index=False)

In [52]:
epc_2050.pct_below2.mean()

0.21426180780037782

In [53]:
epc_2050p_2021.pct_below2.mean()

0.18514875331275824

In [56]:
epc_2050p_2022.pct_below2.mean().round(2)

0.18

In [58]:
.5 * (epc_2050p_2022.pct_below2.std().round(2))

0.065

## Merge 2022 and 2021 data

In [None]:
epc_merge = pd.merge(epc_2050p_2021, epc_2050p_2022, on="tract_geoid", suffixes=("_2021", "_2022"))

In [None]:
epc_merge["epc_change"] = epc_merge["epc_2050p_2022"] - epc_merge["epc_2050p_2021"]

In [None]:
epc_merge["epc_change"].value_counts()

In [None]:
epc_merge["epc_change_class"] = np.where(epc_merge["epc_change"] == 0, "no_change", np.where(epc_merge["epc_change"] > 0, "gain", "loss"))

In [None]:
# count the number of 0 values for each factor in 2021 and 2022
# by county
review_cols = [
    "pct_poc_2021",
    "pct_over75_2021",
    "pct_spfam_2021",
    "pct_lep_2021",
    "pct_below2_2021",
    "pct_disab_2021",
    "pct_zvhhs_2021",
    "pct_hus_re_2021",
    "pct_poc_2022",
    "pct_over75_2022",
    "pct_spfam_2022",
    "pct_lep_2022",
    "pct_below2_2022",
    "pct_disab_2022",
    "pct_hus_re_2022",
    "pct_zvhhs_2022",
]
epc_merge.query("epc_2050p_2021 == 1 or epc_2050p_2022 == 1")[review_cols][
    epc_merge[review_cols].isin([0, 1]).any(axis=1)
]

In [None]:
analysis_cols = [
    "tract_geoid",
    "county_fip_2021",
    "tot_pop_poc_2021",
    "tot_pop_se_2021",
    "tot_pop_po_2021",
    "tot_pop_ci_2021",
    "tot_hh_2021",
    "pop_zvhhs_2021",
    "tot_fam_2021",
    "tot_pop_ov_2021",
    "pop_hus_re_2021",
    "pop_poc_2021",
    "pop_over75_2021",
    "pop_spfam_2021",
    "pop_lep_2021",
    "pop_below2_2021",
    "pop_disabi_2021",
    "tot_pop_poc_2022",
    "tot_pop_se_2022",
    "tot_pop_po_2022",
    "tot_pop_ci_2022",
    "tot_pop_ov_2022",
    "tot_hh_2022",
    "tot_fam_2022",
    "pop_poc_2022",
    "pop_over75_2022",
    "pop_spfam_2022",
    "pop_lep_2022",
    "pop_below2_2022",
    "pop_disabi_2022",
    "pop_hus_re_2022",
    "pop_zvhhs_2022",
    "pct_poc_2022",
]

In [None]:
rev_cols = [
    "tract_geoid",
    "tot_pop_po_2021",
    "tot_pop_po_2022",
    "pop_below2_2021",
    "pop_below2_2022",
    "pct_below2_2021",
    "pct_below2_2022",
    "epc_change_class",
]
# epc_merge.query("county_fip_2021 == 41 and (below2_1_2_2021 == 1 or below2_1_2_2022 == 1)").groupby(["epc_change_class"])[rev_cols].mean()

In [None]:
epc_merge.query("county_fip_2021 == 41 and (below2_1_2_2021 == 1 or below2_1_2_2022 == 1)")[rev_cols]

In [None]:
out_list = epc_merge.columns.to_list()

In [None]:
# remove geometry columns from list
for item in ["geometry_2021", "geometry_2022"]:
    out_list.remove(item)

In [None]:
epc_merge[out_list].to_csv("Data/epc_2050p_2021_2022_wide.csv")