In [1]:
# Import Libaires
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
from shapely.geometry import Point
from shapely import wkt
import json
import os
import time
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from pathlib import Path

# Load project configuration
with open('config.json', 'r') as f:
    config = json.load(f)

# Extract paths
data_raw = Path(config['paths']['data_raw'])
data_processed = Path(config['paths']['data_processed'])
climate_path = Path(config['paths']['climate'])
socialvulnerability_path = Path(config['paths']['socialvulnerability'])
infrastructure_path = Path(config['paths']['infrastructure'])
shapefiles_path = Path(config['paths']['shapefiles'])
redlining_path = Path(config['paths']['redlining'])
Outputs_path = Path(config['paths']['outputs'])

In [None]:
# Configurations
class Config:
    GEOCODE_CACHE = 'geocode_cache.json'
    OUTPUT_MAP = Outputs_path / 'Figures' / 'nyc_infrastructure_access.png'
    WEIGHTS = {'height': 0.25, 'age': 0.25, 'area': 0.25, 'elevation': 0.25}

def calculate_heat_index(gdf):
    score = pd.Series(0.0, index=gdf.index)
    if 'Height_Roof' in gdf.columns:
        score += (gdf['Height_Roof'].fillna(0) / 100).clip(0, 1) * Config.WEIGHTS['height'] * 100
    if 'Construction_Year' in gdf.columns:
        age = 2026 - gdf['Construction_Year'].fillna(2026)
        score += (age / 100).clip(0, 1) * Config.WEIGHTS['age'] * 100
    if 'SHAPE_AREA' in gdf.columns:
        score += (gdf['SHAPE_AREA'].fillna(0) / 500).clip(0, 1) * Config.WEIGHTS['area'] * 100
    if 'Ground_Elevation' in gdf.columns:
        elev = (1 - (gdf['Ground_Elevation'].fillna(100) / 200)).clip(0, 1)
        score += elev * Config.WEIGHTS['elevation'] * 100
    return score

In [None]:
# GEOCODING WITH CACHE (ADDRESS TO LAT/LON)

def get_geocoder():
    geolocator = Nominatim(
        user_agent="nyc_heat",
        timeout=10
    )

    return RateLimiter(
        geolocator.geocode,
        min_delay_seconds=2,
        max_retries=3,
        error_wait_seconds=5,
        swallow_exceptions=True
    )


def geocode_with_cache(df, address_col='Address', borough_col='Borough'):

    # Load cache
    cache = {}
    if os.path.exists(Config.GEOCODE_CACHE):
        with open(Config.GEOCODE_CACHE, 'r') as f:
            cache = json.load(f)

    geocode = get_geocoder()

    lats = []
    lons = []

    for _, row in df.iterrows():

        addr = f"{row[address_col]}, {row[borough_col]}, NY"

        if addr in cache:
            lat = cache[addr]['lat']
            lon = cache[addr]['lon']

        else:
            try:
                loc = geocode(addr)

                if loc:
                    lat = loc.latitude
                    lon = loc.longitude

                    cache[addr] = {
                        'lat': lat,
                        'lon': lon
                    }

                    # SAVE CACHE IMMEDIATELY (critical fix)
                    with open(Config.GEOCODE_CACHE, 'w') as f:
                        json.dump(cache, f)

                else:
                    lat, lon = None, None

            except Exception:
                lat, lon = None, None

        lats.append(lat)
        lons.append(lon)

    df['latitude'] = lats
    df['longitude'] = lons

    return df



In [None]:
def main():
    try:
        # 1. Load and Geocode Indoor Centers
        df_in = pd.read_csv(data_processed / 'inside_cooling_centers_final.csv')

        # Only geocode missing coordinates
        if 'latitude' not in df_in.columns or df_in['latitude'].isna().any():
            print("Geocoding indoor cooling centers...")
            df_in = geocode_with_cache(df_in)
            # Save permanently so never geocode again
            df_in.to_csv(data_processed / 'inside_cooling_centers_final.csv', index=False)
        else:
            print("Using cached coordinates for indoor centers.")

        gdf_in = gpd.GeoDataFrame(
            df_in,
            geometry=gpd.points_from_xy(df_in.longitude, df_in.latitude),
            crs="EPSG:4326"
        )
 
        # 2. Load Outdoor Features (FIXED: Handling commas in coordinates)
        print("Loading outdoor cooling centers...")
        df_out = pd.read_csv(data_processed / 'outside_cooling_centers_final.csv')
        
        # Clean x and y columns: remove commas and convert to float
        df_out['x'] = df_out['x'].astype(str).str.replace(',', '').astype(float)
        df_out['y'] = df_out['y'].astype(str).str.replace(',', '').astype(float)
        
        # Note: If these are State Plane coordinates (like NY Long Island), 
        # they need to be converted to EPSG:4326. 
        # Assuming they are already in EPSG:4326 based on the original script's intent.
        gdf_out = gpd.GeoDataFrame(df_out, geometry=gpd.points_from_xy(df_out.x, df_out.y), crs="EPSG:4326")
       
        # 3. Load Green Infrastructure
        print("Loading green infrastructure...")
        df_green = pd.read_csv(data_processed / 'green_spaces_final.csv')
        df_green['geometry'] = df_green['the_geom'].apply(wkt.loads)
        gdf_green = gpd.GeoDataFrame(df_green, geometry='geometry', crs="EPSG:4326")
       
        # 4. Load Urban Design (Buildings)
        print("Loading urban design data...")
        df_urban = pd.read_csv(data_processed / 'urban_design_final.csv')
        df_urban['geometry'] = df_urban['the_geom'].apply(wkt.loads)
        gdf_urban = gpd.GeoDataFrame(df_urban, geometry='geometry', crs="EPSG:4326")
        gdf_urban['heat_index'] = calculate_heat_index(gdf_urban)
       
        # 5. Load Neighborhoods for context and aggregation
        print("Loading neighborhood boundaries...")
        neighborhoods = gpd.read_file(shapefiles_path / 'nyc_neighborhoods.geojson').to_crs("EPSG:4326")

        # 6. Aggregate Urban Heat Index to Neighborhood Level (FIXED: Dots to Color Scale)
        print("Aggregating heat index to neighborhood level...")
        # Spatial join buildings to neighborhoods
        urban_with_neigh = gpd.sjoin(gdf_urban, neighborhoods, how="left", predicate="within")
        
        # Calculate mean heat index per neighborhood
        # Assuming 'neighborhood' is the name column in the geojson
        neigh_col = 'neighborhood' if 'neighborhood' in neighborhoods.columns else neighborhoods.columns[0]
        neigh_heat = urban_with_neigh.groupby(neigh_col)['heat_index'].mean().reset_index()
        
        # Merge back to neighborhoods gdf
        neighborhoods_heat = neighborhoods.merge(neigh_heat, on=neigh_col, how='left')
        # Fill NaN for neighborhoods with no buildings
        neighborhoods_heat['heat_index'] = neighborhoods_heat['heat_index'].fillna(0)

        # 7. Visualization
        print("Generating maps...")
        fig, axes = plt.subplots(1, 3, figsize=(24, 8))
       
        # Panel 1: Cooling Infrastructure (FIXED Panel A)
        neighborhoods.plot(ax=axes[0], color='whitesmoke', edgecolor='black', linewidth=0.5)
        if not gdf_in.empty:
            gdf_in.plot(ax=axes[0], color='blue', markersize=50, label='Indoor', alpha=0.8)
        if not gdf_out.empty:
            gdf_out.plot(ax=axes[0], color='cyan', markersize=30, label='Outdoor', alpha=0.6)
        axes[0].set_title("Cooling Infrastructure Access")
        axes[0].legend()

        # Panel 2: Green Infrastructure
        neighborhoods.plot(ax=axes[1], color='whitesmoke', edgecolor='black', linewidth=0.5)
        if not gdf_green.empty:
            gdf_green.plot(ax=axes[1], color='green', markersize=20, alpha=0.5)
        axes[1].set_title("Nature-Based Solutions (NbS)")

        # Panel 3: Heat Vulnerability (Urban Design) (FIXED: Neighborhood Color Scale)
        neighborhoods_heat.plot(
            column='heat_index', 
            ax=axes[2], 
            cmap='YlOrRd', 
            legend=True,
            edgecolor='black',
            linewidth=0.3,
            legend_kwds={'label': "Mean Heat Index"}
        )
        axes[2].set_title("Urban Heat Index (Neighborhood Scale)")

        plt.tight_layout()
        Config.OUTPUT_MAP.parent.mkdir(parents=True, exist_ok=True)

        plt.savefig(Config.OUTPUT_MAP, dpi=300, bbox_inches='tight')
        plt.close()
        print(f"Analysis complete. Map saved to {Config.OUTPUT_MAP}")

    except Exception as e:
        import traceback
        print(f"Error: {e}")
        traceback.print_exc()


if __name__ == "__main__":
    main()


Geocoding indoor cooling centers...


RateLimiter caught an error, retrying (0/3 tries). Called with (*('475 53 St, Brooklyn, NY',), **{}).
Traceback (most recent call last):
  File "c:\Users\Jackson\AppData\Local\Programs\Python\Python310\lib\site-packages\geopy\geocoders\base.py", line 368, in _call_geocoder
    result = self.adapter.get_json(url, timeout=timeout, headers=req_headers)
  File "c:\Users\Jackson\AppData\Local\Programs\Python\Python310\lib\site-packages\geopy\adapters.py", line 472, in get_json
    resp = self._request(url, timeout=timeout, headers=headers)
  File "c:\Users\Jackson\AppData\Local\Programs\Python\Python310\lib\site-packages\geopy\adapters.py", line 500, in _request
    raise AdapterHTTPError(
geopy.adapters.AdapterHTTPError: Non-successful status code 503

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "c:\Users\Jackson\AppData\Local\Programs\Python\Python310\lib\site-packages\geopy\extra\rate_limiter.py", line 136, in _retries_g

Error: could not convert string to float: '989,406.723142808'


Traceback (most recent call last):
  File "C:\Users\Jackson\AppData\Local\Temp\ipykernel_9036\3934615512.py", line 24, in main
    gdf_out = gpd.GeoDataFrame(df_out, geometry=gpd.points_from_xy(df_out.x, df_out.y), crs="EPSG:4326")
  File "c:\Users\Jackson\AppData\Local\Programs\Python\Python310\lib\site-packages\geopandas\array.py", line 307, in points_from_xy
    x = np.asarray(x, dtype="float64")
  File "c:\Users\Jackson\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\core\series.py", line 1031, in __array__
    arr = np.asarray(values, dtype=dtype)
ValueError: could not convert string to float: '989,406.723142808'
