In [None]:
# Import modules
import pandas as pd
from arcgis.features import GeoAccessor, GeoSeriesAccessor

# Note: import geopandas as gpd - Having issues installing geopandas module in a cloned environment!!!
# https://developers.arcgis.com/python/guide/part3-introduction-to-pandas/

# Prepare table for non-spatial data using Pandas

In [None]:
# Read the data from csv
df_csv = pd.read_csv(r"D:\Data\TIGER_Line_Census\simplemaps_uscities_basicv1.76\uscities.csv")
df_csv.head()

In [None]:
# Make new dataframe containing only the [city] and [zips] columns
df_new = df_csv[['city', 'zips']]
df_new.head()

# https://sparkbyexamples.com/pandas/pandas-create-new-dataframe-by-selecting-specific-columns/

In [None]:
# Use str.split() to split a column to a list
df_new["zips"] = df_new["zips"].str.split(" ")
df_new.head()

# https://thats-it-code.com/pandas/how-to-convert-multivalue-column-to-multiple-rows/

In [None]:
# Convert the column with a list-type value to multiple rows
df_new = df_new.explode("zips")
#df_new.head()
df_new[500:510]

# Create spatial dataframe from TIGER_Line_Census .shp

In [None]:
# Create a GeoDataFrame of the zip_census feature class
geo_df = pd.DataFrame.spatial.from_featureclass(r"D:\Data\TIGER_Line_Census\tl_2022_us_zcta520\tl_2022_us_zcta520.shp")
geo_df.head()

# https://developers.arcgis.com/python/api-reference/arcgis.features.toc.html?arcgis.features.GeoAccessor.from_featureclass#arcgis.features.GeoAccessor.from_featureclass:~:text=static%20from_featureclass(,a%20Features%20class.

# Merge spatial and non-spatial data

In [None]:
# Merge the dataframes based on zip code. {geo_df - column [GEOID20]}. {df_new - column [zips]}.
df_merge = pd.merge(geo_df, df_new, left_on=  ['GEOID20'],
                   right_on= ['zips'], 
                   how = 'right')

df_merge.head()

# Clean data - Drop unnecessary columns, drop null values

In [None]:
# Drops the [zips] column as it is not needed in the output
df_merge = df_merge.drop(['zips'], axis=1)
df_merge.head()

In [None]:
# Check for null values
df_merge.info()

In [None]:
# Drop null values
df_merge = df_merge.dropna()

# https://www.geeksforgeeks.org/working-with-missing-data-in-pandas/

In [None]:
# Re-check for null values
df_merge.info()

# Export result to new polygon shapefile

In [None]:
# Export resulting dataset to a .shp file
df_merge.spatial.to_featureclass(location=r"D:\Data\TIGER_Line_Census\tl_2022_us_zcta520_cities.shp")

# https://developers.arcgis.com/python/api-reference/arcgis.features.toc.html?arcgis.features.GeoAccessor.from_featureclass#arcgis.features.GeoAccessor.from_featureclass:~:text=to_featureclass(location,a%20feature%20class.

# Create City Polygons

In [None]:
# Use ArcGIS Dissolve geoprocessing tool to group polygons by city
arcpy.management.Dissolve(
    in_features=r"D:\Data\TIGER_Line_Census\tl_2022_us_zcta520_cities.shp",
    out_feature_class=r"D:\GIS_projects\city_state_county\city_state_county\US_City_Boundaries.shp",
    dissolve_field="city",
    statistics_fields=None,
    multi_part="MULTI_PART",
    unsplit_lines="DISSOLVE_LINES",
    concatenation_separator=""
)

# Note: I am running this within ArcGIS Pro, so I did not need to import arcpy.
# If running as a standalone python script/jupyter notebook, you must import arcpy.
# https://pro.arcgis.com/en/pro-app/latest/tool-reference/data-management/dissolve.htm