In [3]:
import geopandas as gpd
import pandas as pd
import numpy as np

# Step 1: Load Data
# Purpose: Load initial points, candidate sites, and polygons for further processing.
initial_points_gdf = gpd.read_file("/save/path/osm_poi_filtered_initial.gpkg")
candidate_sites_gdf = gpd.read_file("/save/path/osm_poi_filtered_initial.gpkg/osm_poi_filtered_candidate.shp")
polygons_gdf = gpd.read_file("/save/path/final_ver7_suburban.shp")

# Step 2: Rename and Keep Necessary Columns
# Purpose: Load only the required columns (EV_port and geometry) and rename 'EV_port' to 'total_supply'.
polygons_gdf = polygons_gdf[['NAMELSAD20', 'EV_port', 'geometry']].rename(columns={'EV_port': 'total_supply'})

# Step 3: Align Coordinate Systems
# Purpose: Ensure all datasets have the same CRS (EPSG:3857) for spatial operations.
initial_points_gdf = initial_points_gdf.to_crs(epsg=3857)
candidate_sites_gdf = candidate_sites_gdf.to_crs(epsg=3857)
polygons_gdf = polygons_gdf.to_crs(epsg=3857)

# Step 4: Calculate Initial Points Count per Polygon
# Purpose: Perform a spatial join to count how many initial points fall within each polygon.
initial_overlay = gpd.sjoin(polygons_gdf, initial_points_gdf, how='left', predicate='contains')
initial_count = initial_overlay.groupby(initial_overlay.index).size()

# Step 5: Calculate Candidate Points Count per Polygon
# Purpose: Perform a spatial join to count how many candidate sites fall within each polygon.
candidate_overlay = gpd.sjoin(polygons_gdf, candidate_sites_gdf, how='left', predicate='contains')
total_count = candidate_overlay.groupby(candidate_overlay.index).size()

# Step 6: Generate osm_id_list for Each Polygon
# Purpose: For each polygon, gather the osm_id values of the initial points within it.
osm_id_list = initial_overlay.groupby(initial_overlay.index)['osm_id'].apply(list)

# Step 7: Add Computed Values to Polygons
# Purpose: Add the calculated initial_count, total_count, and osm_id_list to polygons_gdf.
polygons_gdf['initial_count'] = initial_count
polygons_gdf['total_count'] = total_count
polygons_gdf['osm_id_list'] = osm_id_list

# Step 8: Filter Polygons with Valid osm_id_list
# Purpose: Remove polygons that have an empty or NaN-filled osm_id_list.
def is_valid_osm_id_list(osm_id_list):
    if not osm_id_list:
        return False
    return not all(pd.isna(x) for x in osm_id_list)

polygons_gdf = polygons_gdf[polygons_gdf['osm_id_list'].apply(is_valid_osm_id_list)]

# Step 9: Calculate p Value for Each Polygon
# Purpose: Calculate the value of 'p' based on 'total_supply' and a regional threshold.
reg_p_downtown = 3  # Threshold for EV port supply in downtown
reg_p_atlanta = 4   # Threshold for EV port supply in Atlanta

# urban area(=Atlanta) in the same way, but apply 'reg_p_atlanta' and save it as 'urban_greedy.gpkg'

def calculate_p(row):
    if row['total_count'] == 1 or row['total_supply'] <= reg_p_downtown:
        return 1
    else:
        return max(row['total_supply'] // reg_p_downtown, 2)

polygons_gdf['p'] = polygons_gdf.apply(calculate_p, axis=1).astype(int)

# Step 10: Adjust p Values Based on total_count
# Purpose: If 'total_count' is less than 'p', set 'p' to the total count.
polygons_gdf['p'] = polygons_gdf.apply(lambda row: min(row['p'], row['total_count']), axis=1)

# Step 11: Filter Data Based on p Values
# Purpose: Separate data where p=1 (for MCLP) and p>1 (for Greedy).
mclp_data = polygons_gdf[polygons_gdf['p'] == 1]
greedy_data = polygons_gdf[polygons_gdf['p'] > 1]

# Step 12: Use Initial Points for p=1 Polygons
# Purpose: For polygons where p=1, we will directly use the initial points.
# Perform a spatial join to find the points within each polygon
points_within_polygons = gpd.sjoin(initial_points_gdf, mclp_data[['total_supply', 'geometry']], how='inner', predicate='within')

# Select only necessary columns
points_within_polygons = points_within_polygons[['osm_id', 'geometry', 'total_supply']]  # Keep only necessary columns

# Step 13: Save Final Data
# Purpose: Save the final results for MCLP (p=1) and Greedy (p>1) as the only files.
mclp_output_file = '/save/path/suburban_mclp_selected.gpkg' # If only one EVCS needs to be located, the initial point is selected directly
greedy_output_file = '/save/path/suburban_greedy.gpkg'  # urban area in the same way, but apply 'reg_p_atlanta' and save it as 'urban_greedy.gpkg'


# Save final data for p=1 and p>1 cases
points_within_polygons.to_file(mclp_output_file, driver='GPKG')
greedy_data.to_file(greedy_output_file, driver='GPKG')

print(f"'total_supply' attribute added to points and saved to {mclp_output_file}.")
print(f"p > 1 polygons saved to {greedy_output_file}.")


p = 1 polygons saved to /home/ojin/working_space/SIG/sehoon전달/01. Preprocessing_result/suburban/suburban_mclp.gpkg.
p > 1 polygons saved to /home/ojin/working_space/SIG/sehoon전달/01. Preprocessing_result/suburban/suburban_greedy.gpkg.
