In [1]:
import os
import sys
import uuid
import getpass
import numpy as np
import pandas as pd
import geopandas as gpd

user = getpass.getuser()

DVUTILS_LOCAL_CLONE_PATH = f"/Users/{user}/Documents/GitHub/dvutils"
sys.path.insert(0, DVUTILS_LOCAL_CLONE_PATH)
from utils_io import *

from arcgis import GIS

## Pre-processing

In [2]:
# set working directory
dir = os.path.join("/Users", user, "Library", "CloudStorage", "Box-Box", "_GIS Analyses")

In [5]:
password = os.environ.get("AGOL_CONTENT_PASSWORD")
gis = GIS(url="https://mtc.maps.arcgis.com/home/", username="content_MTC", password=password)

In [6]:
# create a function to remove z values from a geometry
def remove_z(geom):
    """
    Removes z values from a geometry

    Source: https://gist.github.com/rmania/8c88377a5c902dfbc134795a7af538d8?permalink_comment_id=2893099#gistcomment-2893099
    """
    import shapely
    return shapely.wkb.loads(shapely.wkb.dumps(geom, output_dimension=2))

In [7]:
# create a function to read kml files by geometry type and return a geodataframe
def read_kml_by_geom_type(directory, geom_type):
    """Read a kml file by geometry type and return a geodataframe

    Parameters:
    -----------
    path : str
        path to the kml directory
    geom_type : str
        one of ['Point', 'LineString', 'Polygon']
    """
    import re
    import glob
    import fiona
    fiona.supported_drivers['KML'] = 'rw'

    # use glob to get all the csv files
    # in the folder
    if geom_type == "Point":
        type_string = "_point"
    elif geom_type == "LineString":
        type_string = "_segment"
    else:
        type_string = "_polygon"

    pattern = re.compile(type_string)
    file_list = glob.glob(os.path.join(directory, "*.kml"))
    gdfs = list()

    for file in file_list:
        file_name = file.split("/")[-1]
        if pattern.search(file_name):
            # read the file
            gdf = gpd.read_file(file, driver="KML")
            # add the source file name
            gdf["source_file"] = file_name
            # remove z values
            gdf["geometry"] = gdf["geometry"].apply(remove_z)
            gdfs.append(gdf)
    gdf = pd.concat(gdfs, ignore_index=True)
    return gdf

In [21]:
# create a function to overwrite a feature layer
def overwrite_published_feature_layer(f_layer_id, geojson_path, client):
    """Overwrite a published feature layer

    Parameters:
    -----------
    f_layer_id : str
        id of the feature layer to overwrite
    geojson_path : str
        path to the geojson file
    client : authenticated arcgis client
        authentication example below:
        from arcgis.gis import GIS
        password = os.environ.get("AGOL_CONTENT_PASSWORD")
        gis = GIS(url="https://mtc.maps.arcgis.com/home/", username="content_MTC", password=password)
    """
    from arcgis.features import FeatureLayerCollection
    
    # get the feature layer
    host_flayer = client.content.get(f_layer_id)

    # create feature layer collection object
    f_layer = FeatureLayerCollection.fromitem(host_flayer)
    # overwrite the feature layer
    f_layer.manager.overwrite(geojson_path)

    print(f"Overwrote hosted feature layer with id: {f_layer_id}")

In [22]:
# create a function that publishes a geojson to agol
def publish_geojson_to_agol(
    geojson_path,
    layer_name,
    layer_snippet,
    tags,
    client,
    folder=None,
    overwrite=False,
    f_layer_id=None,
):
    """Publish a geojson to ArcGIS Online

    Parameters:
    -----------
    geojson_path : str
        path to the geojson file
    layer_name : str
        name of the layer
    layer_snippet : str
        layer snippet
    tags : list
        tags as a comma separated string (e.g. "tag1, tag2, tag3")
    client : authenticated arcgis client
        authentication example below:
        from arcgis.gis import GIS
        password = os.environ.get("AGOL_CONTENT_PASSWORD")
        gis = GIS(url="https://mtc.maps.arcgis.com/home/", username="content_MTC", password=password)
    folder : str
        name of the folder to publish to (optional)
    overwrite : bool
        if True, overwrite existing layer
    f_layer_id : str
        if overwrite is True, provide the id of the feature layer to overwrite
    """
    if overwrite:
        overwrite_published_feature_layer(f_layer_id, geojson_path, client)
    else:
        # publish the geojson
        item_prop = {
            "type": "GeoJson",
            "title": layer_name,
            "tags": tags,
            "snippet": layer_snippet,
            "overwrite": True,
        }
        item = client.content.add(item_properties=item_prop, data=geojson_path, folder=folder)

        # publish the item
        published_item = item.publish(file_type="geojson")

        print(f"Published {layer_name} to ArcGIS Online as {published_item.id}")

In [8]:
def create_flag_column(flag_gdf, original_gdf, original_id_col, out_column):
    if flag_gdf.shape[0] != original_gdf.shape[0]:
        original_gdf[out_column] = original_gdf[original_id_col].map(
            flag_gdf.groupby(original_id_col)[out_column].first())
    else:
        original_gdf[out_column] = original_gdf[original_id_col].map(
            flag_gdf.set_index(original_id_col)[out_column])

In [9]:
# read excel master list
hs_ms_df = pd.read_excel(os.path.join(dir, "BusAID Hotspot Master List_112823.xlsx"), sheet_name="hotspot_data_revised")

In [10]:
# drop rows with null id
hs_ms_df = hs_ms_df[~hs_ms_df["id"].isnull()].copy()
# set id as integer
hs_ms_df["id"] = hs_ms_df["id"].astype(int)

In [11]:
# create point and line gdfs
point_gdf = read_kml_by_geom_type(os.path.join(dir, "Spatial Data"), "Point")
line_gdf = read_kml_by_geom_type(os.path.join(dir, "Spatial Data"), "LineString")

In [12]:
# extract the id from name string column and add to a new column
line_gdf["id"] = line_gdf["Name"].str.extract(r"^\((\d+)\)", expand=False).astype(int)
point_gdf["id"] = point_gdf["Name"].str.extract(r"^\((\d+)\)", expand=False).astype(int)

In [13]:
# merge master list with point and line gdfs
hs_point_gdf = pd.merge(point_gdf[["id", "geometry"]], hs_ms_df, on="id", how="inner")
hs_line_gdf = pd.merge(line_gdf[["id", "geometry"]], hs_ms_df, on="id", how="inner")

In [14]:
# read epcs
epc_url = "https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/communities_of_concern_2020_acs2018/FeatureServer/0"
epc_gdf = pull_geotable_agol(base_url=epc_url, client=gis, reproject_to_analysis_crs=False)

Breaking feature service layer IDs into 8 chunks


In [15]:
# read pdas
pda_url = "https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/priority_development_areas_pba2050/FeatureServer/0"
pda_gdf = pull_geotable_agol(base_url=pda_url, client=gis, reproject_to_analysis_crs=False)

Breaking feature service layer IDs into 1 chunks


## Publish datasets to ArcGIS Online

In [16]:
import os
  
# checking if the directory demo_folder 
# exist or not.
if not os.path.exists("Data"):
      
    # if the demo_folder directory is not present 
    # then create it.
    os.makedirs("Data")

In [17]:
# export features to local directory
point_path = os.path.join("Data", "hs_point_gdf.geojson")
hs_point_gdf.to_file(point_path, driver="GeoJSON")

line_path = os.path.join("Data", "hs_line_gdf.geojson")
hs_line_gdf.to_file(line_path, driver="GeoJSON")

In [26]:
# publish point features to agol
publish_geojson_to_agol(
    geojson_path=point_path,
    layer_name="BusAID Hotspots - Point (December 2023)",
    layer_snippet="BusAID Hotspot Point Dataset",
    tags="mtc, bay area, busaid, bus, transit, hotspots",
    client=gis,
    folder="bus_aid",
    overwrite=True,
    f_layer_id="b03b3b392d324f2d828eaad56932e93b",
)

Overwrote hosted feature layer with id: b03b3b392d324f2d828eaad56932e93b


In [27]:
# publish line features to agol
publish_geojson_to_agol(
    geojson_path=line_path,
    layer_name="BusAID Hotspots - Line (December 2023)",
    layer_snippet="BusAID Hotspot Line Dataset",
    tags="mtc, bay area, busaid, bus, transit, hotspots",
    client=gis,
    folder="bus_aid",
    overwrite=True,
    f_layer_id="c31d2e1c793f43c68ad79cc544453fe9",
)

Published BusAID Hotspots - Line (December 2023) to ArcGIS Online as c31d2e1c793f43c68ad79cc544453fe9


## Spatial overlays

In [15]:
# intersect hotspots points with epcs pdas
point_epc_gdf = gpd.sjoin(
    hs_point_gdf,
    epc_gdf[["geoid", "epc_2050", "epc_class", "geometry"]],
    how="left",
    predicate="intersects",
)
# drop the extra index column
point_epc_gdf.drop(columns=["index_right"], inplace=True)
# intersect hotspots points with pdas
point_epc_pda_gdf = gpd.sjoin(
    point_epc_gdf, pda_gdf[["pda_name", "geometry"]], how="left", predicate="intersects"
)
# drop the extra index column
point_epc_pda_gdf.drop(columns=["index_right"], inplace=True)

# create pda flag column based on whether pda_name is null
point_epc_pda_gdf["pda_2050"] = np.where(point_epc_pda_gdf["pda_name"].isnull(), 0, 1)

In [16]:
# intersect hotspots lines with epcs pdas
line_epc_gdf = gpd.sjoin(
    hs_line_gdf,
    epc_gdf[["geoid", "epc_2050", "epc_class", "geometry"]],
    how="left",
    predicate="intersects",
)
# # drop the extra index column
# line_epc_gdf.drop(columns=["index_right"], inplace=True)
# # intersect hotspots lines with pdas
# line_epc_pda_gdf = gpd.sjoin(
#     line_epc_gdf, pda_gdf[["pda_name", "geometry"]], how="left", predicate="intersects"
# )
# # drop the extra index column
# line_epc_pda_gdf.drop(columns=["index_right"], inplace=True)

# # create pda flag column based on whether pda_name is null
# line_epc_pda_gdf["pda_2050"] = np.where(line_epc_pda_gdf["pda_name"].isnull(), 0, 1)

In [38]:
hs_line_gdf.shape

(88, 23)

In [18]:
line_summary_epc = (
    line_epc_gdf.groupby(["id", "epc_2050", "epc_class"])
    .size()
    .reset_index()
    .rename(columns={0: "count"})
)

In [19]:
line_summary_epc.query("id.duplicated()")

Unnamed: 0,id,epc_2050,epc_class,count
3,4,1,Higher,2
6,6,1,High,2
8,7,1,Higher,4
9,7,1,Highest,6
11,8,1,High,1
12,8,1,Higher,2
13,8,1,Highest,11
18,12,1,High,2
21,14,1,High,2
22,14,1,Higher,2


In [20]:
line_summary_epc.query("id == 4")

Unnamed: 0,id,epc_2050,epc_class,count
2,4,0,,3
3,4,1,Higher,2


In [21]:
line_summary_epc.query("id == 6")

Unnamed: 0,id,epc_2050,epc_class,count
5,6,0,,5
6,6,1,High,2


In [22]:
line_summary_epc.query("id == 7")

Unnamed: 0,id,epc_2050,epc_class,count
7,7,0,,1
8,7,1,Higher,4
9,7,1,Highest,6
