成功！預留

Kepler.gl 延伸範圍

In [119]:
import geopandas as gpd
import networkx as nx
import os
from shapely.geometry import Point, LineString
import numpy as np
from tqdm import tqdm
import pandas as pd
import pickle
import pyproj
from keplergl import KeplerGl
from shapely.ops import unary_union, linemerge

# Define checkpoint directory
CHECKPOINT_DIR = r"D:\IAAC\Thesis\Python\MLloading\Geojson\GNN_Read_data\checkpoints"

if not os.path.exists(CHECKPOINT_DIR):
    os.makedirs(CHECKPOINT_DIR)

# Mapbox Access Token (Replace with your valid token)
MAPBOX_ACCESS_TOKEN = "your_valid_mapbox_access_token_here"  # Replace with your token

# Helper function to convert TWD97 (EPSG:3826) to WGS84 (EPSG:4326)
def twd97_to_wgs84(x, y):
    transformer = pyproj.Transformer.from_crs("EPSG:3826", "EPSG:4326", always_xy=True)
    lon, lat = transformer.transform(x, y)
    return lat, lon

# Helper function to convert a polyline into a series of points
def polyline_to_points(linestring, spacing=7):
    """
    Convert a LineString to a list of Points spaced at a given interval (in meters).
    
    Parameters:
    - linestring: shapely LineString object
    - spacing: distance between points in meters (default: 7m, based on Taiwan's minimum tree interval)
    
    Returns:
    - List of shapely Point objects
    """
    points = []
    # Ensure the linestring is a single LineString (merge if MultiLineString)
    if linestring.geom_type == 'MultiLineString':
        linestring = linemerge(linestring)
    if linestring.geom_type != 'LineString':
        return points  # Return empty list if not a valid LineString

    total_length = linestring.length
    if total_length <= 0:
        return points

    # Generate points along the line at regular intervals
    distance = 0
    while distance <= total_length:
        point = linestring.interpolate(distance)
        points.append(point)
        distance += spacing
    
    return points

In [120]:
# 1. Load Data with Checkpoints and Progress Bar
def load_data():
    print("Checkpoint 1: Loading data for 和安里 and all neighborhoods within the covering radius...")
    landuse_ndvi_path = r"D:\IAAC\Thesis\Python\MLloading\Geojson\GNN_Read_data\neighborhoods_with_ndvi_numerical.geojson"
    osm_buildings_path = r"D:\IAAC\Thesis\Python\MLloading\Geojson\GNN_Read_data\Taipei_Buildings_fulldata.geojson"
    osm_roads_path = r"D:\IAAC\Thesis\Python\MLloading\Geojson\GNN_Read_data\taipei_segments_cleaned_verified.geoparquet"
    osm_trees_path = r"D:\IAAC\Thesis\Python\MLloading\Geojson\GNN_Read_data\taipei_land.geoparquet"
    osm_transit_path = r"D:\IAAC\Thesis\Python\MLloading\Geojson\GNN_Read_data\taipei_infrastructure.geoparquet"

    for path in [landuse_ndvi_path, osm_buildings_path, osm_roads_path, osm_trees_path, osm_transit_path]:
        if not os.path.exists(path):
            raise FileNotFoundError(f"File not found: {path}")

    # Try loading with utf-8-sig to handle BOM, fall back to ignoring errors if needed
    try:
        with tqdm(total=5, desc="Loading files") as pbar:
            neighborhoods = gpd.read_file(landuse_ndvi_path, encoding='utf-8-sig')
            print("Columns in neighborhoods after loading:", neighborhoods.columns.tolist())
            pbar.update(1)
            buildings = gpd.read_file(osm_buildings_path, encoding='utf-8-sig')
            pbar.update(1)
            roads = gpd.read_parquet(osm_roads_path)
            pbar.update(1)
            # Load tree data and filter for subtype="tree"
            trees = gpd.read_parquet(osm_trees_path)
            trees = trees[trees['subtype'] == 'tree']
            pbar.update(1)
            # Load transit data and filter for class="stop_position" and "bus_stop"
            transit = gpd.read_parquet(osm_transit_path)
            transit = transit[transit['class'].isin(['stop_position', 'bus_stop'])]
            pbar.update(1)
    except UnicodeDecodeError as e:
        print(f"UTF-8-SIG decoding failed: {e}. Attempting with errors='ignore'...")
        try:
            with tqdm(total=5, desc="Loading files (fallback)") as pbar:
                neighborhoods = gpd.read_file(landuse_ndvi_path, encoding='utf-8-sig', errors='ignore')
                print("Columns in neighborhoods after loading (fallback):", neighborhoods.columns.tolist())
                pbar.update(1)
                buildings = gpd.read_file(osm_buildings_path, encoding='utf-8-sig', errors='ignore')
                pbar.update(1)
                roads = gpd.read_parquet(osm_roads_path)
                pbar.update(1)
                trees = gpd.read_parquet(osm_trees_path)
                trees = trees[trees['subtype'] == 'tree']
                pbar.update(1)
                transit = gpd.read_parquet(osm_transit_path)
                transit = transit[transit['class'].isin(['stop_position', 'bus_stop'])]
                pbar.update(1)
            print("Loaded with errors ignored. Some data may be incomplete.")
        except Exception as e2:
            print(f"Final loading attempt failed: {e2}. Please check file encoding or integrity.")
            raise

    # Ensure all data is in EPSG:3826 (TWD97)
    neighborhoods = neighborhoods.to_crs('EPSG:3826')
    buildings = buildings.to_crs('EPSG:3826')
    roads = roads.to_crs('EPSG:3826')
    trees = trees.to_crs('EPSG:3826')
    transit = transit.to_crs('EPSG:3826')

    # Filter invalid geometries and calculate areas for buildings
    neighborhoods = neighborhoods[neighborhoods.geometry.is_valid]
    buildings = buildings[buildings.geometry.is_valid].copy()
    roads = roads[roads.geometry.is_valid]
    trees = trees[trees.geometry.is_valid]
    transit = transit[transit.geometry.is_valid]

    # Calculate building areas with 1 decimal place precision
    buildings['area_m2'] = buildings.geometry.area.round(1)

    # Clean the 'building' column: only replace 'yes' and NaN with 'unknown'
    buildings['building'] = buildings['building'].fillna('unknown').replace('yes', 'unknown')

    # Separate tree points and polylines
    tree_points = trees[trees.geometry.geom_type == 'Point']
    tree_polylines = trees[trees.geometry.geom_type.isin(['LineString', 'MultiLineString'])]

    # Convert polylines to points (approximate trees every 7 meters)
    converted_points = []
    for _, row in tree_polylines.iterrows():
        geom = row.geometry
        points = polyline_to_points(geom, spacing=7)  # One tree every 7 meters based on Taiwan's minimum interval
        for point in points:
            converted_points.append({'geometry': point})

    # Create a GeoDataFrame for converted points
    if converted_points:
        converted_points_gdf = gpd.GeoDataFrame(converted_points, crs='EPSG:3826')
        # Combine with original tree points
        all_trees = pd.concat([tree_points[['geometry']], converted_points_gdf], ignore_index=True)
        all_trees = gpd.GeoDataFrame(all_trees, crs='EPSG:3826')
    else:
        all_trees = tree_points[['geometry']].copy()
        all_trees = gpd.GeoDataFrame(all_trees, crs='EPSG:3826')

    # Debug: Print distribution of building types, total neighborhoods, tree counts, and transit counts
    print("Building type distribution after cleaning:")
    print(buildings['building'].value_counts())
    print(f"Warren: Total number of neighborhoods in dataset: {len(neighborhoods)}")
    print(f"Total number of tree points loaded: {len(tree_points)}")
    print(f"Total number of tree polylines loaded: {len(tree_polylines)}")
    print(f"Total number of trees after converting polylines: {len(all_trees)}")
    print(f"Total number of transit points loaded: {len(transit)}")

    # Find 和安里 and select all neighborhoods within a covering radius
    central_neigh = neighborhoods[neighborhoods['LIE_NAME'] == '和安里']
    if central_neigh.empty:
        raise ValueError("Neighborhood '和安里' not found in the data.")
    
    central_neigh = central_neigh.iloc[0]
    central_centroid = central_neigh['geometry'].centroid
    
    # Calculate distances from 和安里's centroid to all neighborhoods' centroids (including 和安里)
    neighborhoods['distance_to_central'] = neighborhoods.geometry.centroid.apply(lambda x: x.distance(central_centroid))
    # Set 和安里's distance to 0.0 explicitly
    neighborhoods.loc[neighborhoods['LIE_NAME'] == '和安里', 'distance_to_central'] = 0.0
    
    # Define the covering radius (in meters, since TWD97 is in meters)
    covering_radius = 1000  # Increased from 500m to include more neighborhoods
    
    # Select all neighborhoods within the covering radius, sorted by distance
    within_radius = neighborhoods[neighborhoods['distance_to_central'] <= covering_radius]
    if len(within_radius) == 0:
        raise ValueError(f"No neighborhoods found within {covering_radius}m radius of 和安里.")
    print(f"Found {len(within_radius)} neighborhoods within {covering_radius}m radius.")
    
    # Sort by distance and take all within the radius
    selected_neighborhoods = within_radius.sort_values(by='distance_to_central').reset_index(drop=True)

    # Warn if the dataset might be too large
    if len(selected_neighborhoods) > 100:
        print(f"Warning: Large number of neighborhoods ({len(selected_neighborhoods)}) selected. This may impact performance during graph construction and map rendering.")

    # Debug: Print selected neighborhood names and their distances
    print("Selected neighborhoods and distances from 和安里 (in meters):")
    for idx, row in selected_neighborhoods.iterrows():
        distance = row['distance_to_central']
        print(f"- {row['LIE_NAME']}: {distance:.1f}m")

    # Integrate tree data: Count trees per neighborhood
    print("Integrating tree data with neighborhoods...")
    selected_neighborhoods['tree_count'] = 0

    for idx, neighborhood in selected_neighborhoods.iterrows():
        # Filter trees within the neighborhood boundary
        neighborhood_geom = neighborhood.geometry
        trees_in_neighborhood = all_trees[all_trees.intersects(neighborhood_geom)]
        selected_neighborhoods.at[idx, 'tree_count'] = len(trees_in_neighborhood)

    # Integrate transit data: Count transit points per neighborhood
    print("Integrating transit data with neighborhoods...")
    selected_neighborhoods['transit_count'] = 0

    for idx, neighborhood in selected_neighborhoods.iterrows():
        # Filter transit points within the neighborhood boundary
        neighborhood_geom = neighborhood.geometry
        transit_in_neighborhood = transit[transit.intersects(neighborhood_geom)]
        selected_neighborhoods.at[idx, 'transit_count'] = len(transit_in_neighborhood)

    # Debug: Print tree and transit counts per neighborhood
    print("Tree and transit counts per neighborhood:")
    for idx, row in selected_neighborhoods.iterrows():
        print(f"- {row['LIE_NAME']}: {row['tree_count']} trees, {row['transit_count']} transit points")

    # Combine boundaries of selected neighborhoods
    city_center_boundary = unary_union(selected_neighborhoods['geometry'])
    buffer_distance = 50
    buffered_geom = city_center_boundary.buffer(buffer_distance)

    # Filter buildings, roads, trees, and transit
    filtered_buildings = buildings[buildings.intersects(buffered_geom)]
    filtered_roads = roads[roads.intersects(buffered_geom)].copy()
    filtered_trees = all_trees[all_trees.intersects(buffered_geom)]
    filtered_transit = transit[transit.intersects(buffered_geom)]

    # Calculate road segment lengths
    filtered_roads['length_m'] = filtered_roads.geometry.length.round(1)
    # Debug: Print road length distribution
    print("Road length distribution (in meters):")
    print(filtered_roads['length_m'].describe())
    
    road_points = []
    for idx, row in tqdm(filtered_roads.iterrows(), total=len(filtered_roads), desc="Extracting road endpoints"):
        geom = row['geometry']
        if geom.geom_type == 'LineString':
            start_point = Point(geom.coords[0])
            end_point = Point(geom.coords[-1])
            road_points.extend([(f"road_start_{idx}", start_point), (f"road_end_{idx}", end_point)])
        elif geom.geom_type == 'MultiLineString':
            for i, line in enumerate(geom.geoms):
                start_point = Point(line.coords[0])
                end_point = Point(line.coords[-1])
                road_points.extend([(f"road_start_{idx}_{i}", start_point), (f"road_end_{idx}_{i}", end_point)])

    # Ensure filtered data has valid geometries
    filtered_buildings = filtered_buildings[filtered_buildings.geometry.is_valid]
    filtered_roads = filtered_roads[filtered_roads.geometry.is_valid]
    filtered_trees = filtered_trees[filtered_trees.geometry.is_valid]
    filtered_transit = filtered_transit[filtered_transit.geometry.is_valid]

    # Save checkpoints
    selected_neighborhoods.to_file(os.path.join(CHECKPOINT_DIR, "selected_neighborhoods_filtered.geojson"), driver='GeoJSON')
    filtered_buildings.to_file(os.path.join(CHECKPOINT_DIR, "selected_buildings_filtered.geojson"), driver='GeoJSON')
    filtered_roads.to_file(os.path.join(CHECKPOINT_DIR, "selected_roads_filtered.geojson"), driver='GeoJSON')
    filtered_trees.to_file(os.path.join(CHECKPOINT_DIR, "selected_trees_filtered.geojson"), driver='GeoJSON')
    filtered_transit.to_file(os.path.join(CHECKPOINT_DIR, "selected_transit_filtered.geojson"), driver='GeoJSON')

    print(f"Data loaded and filtered for 和安里 and all neighborhoods within {covering_radius}m radius. Neighborhoods: {len(selected_neighborhoods)}, Buildings: {len(filtered_buildings)}, Roads: {len(filtered_roads)}, Trees: {len(filtered_trees)}, Transit Points: {len(filtered_transit)}")
    return selected_neighborhoods, filtered_buildings, filtered_roads, road_points, filtered_trees, filtered_transit

In [121]:
# 2. Build Graph Network with Checkpoints
def build_graph(neighborhoods, buildings, roads, road_points, trees, transit):
    print("Checkpoint 2: Building graph network for selected neighborhoods...")
    G = nx.Graph()

    # Add Neighborhood Nodes (include tree data, transit data, and NDVI)
    print("Adding neighborhood nodes...")
    for idx, row in tqdm(neighborhoods.iterrows(), total=len(neighborhoods), desc="Adding neighborhoods"):
        node_id = f"neighborhood_{row['LIE_NAME']}"
        G.add_node(node_id,
                   type='neighborhood',
                   lie_name=row['LIE_NAME'],
                   sect_name=row['SECT_NAME'],
                   population=row['2024population'],
                   land_use_residential_percent=row['land_use_residential_percent'],
                   land_use_commercial_percent=row['land_use_commercial_percent'],
                   land_use_education_percent=row['land_use_education_percent'],
                   ndvi_mean=row['ndvi_mean'],
                   ndvi_median=row['ndvi_median'],
                   tree_count=row['tree_count'],
                   transit_count=row['transit_count'],
                   geometry=row['geometry'])

    # Add Building Nodes
    print("Adding building nodes...")
    for idx, row in tqdm(buildings.iterrows(), total=len(buildings), desc="Adding buildings"):
        node_id = f"building_{idx}"
        building_type = row['building'] if pd.notna(row['building']) else 'unknown'
        area_m2 = row['area_m2'] if pd.notna(row['area_m2']) else 0.0
        age = row['屋齡'] if pd.notna(row['屋齡']) else '<NA>'
        height = row['建物高度'] if pd.notna(row['建物高度']) else '<NA>'
        floors = row['地上層數'] if pd.notna(row['地上層數']) else '<NA>'
        structure_type = row['構造種類'] if pd.notna(row['構造種類']) else 'Unknown'
        usage_zone = row['使用分區'] if pd.notna(row['使用分區']) else 'Unknown'
        G.add_node(node_id,
                   type='building',
                   building_type=building_type,
                   area_m2=area_m2,
                   age=age,
                   height=height,
                   floors=floors,
                   structure_type=structure_type,
                   usage_zone=usage_zone,
                   geometry=row['geometry'])

    # Add Road Nodes
    print("Adding road nodes...")
    class_column = next((col for col in roads.columns if col.lower() in ['class', 'road_class', 'highway']), None)
    if class_column is None:
        print(f"Warning: No column resembling 'class', 'road_class', or 'highway' found in roads data. Available columns: {roads.columns.tolist()}")
        class_column = 'unknown'
    else:
        print(f"Using column '{class_column}' for road classification.")

    for node_id, geom in tqdm(road_points, total=len(road_points), desc="Adding road points"):
        road_idx = int(node_id.split('_')[2])
        road_class = roads.loc[road_idx, class_column] if pd.notna(roads.loc[road_idx, class_column]) else 'unknown'
        length_m = roads.loc[road_idx, 'length_m'] if pd.notna(roads.loc[road_idx, 'length_m']) else 0.0
        G.add_node(node_id, type='road', road_class=road_class, length_m=length_m, geometry=geom)

    # Add Tree Nodes
    print("Adding tree nodes...")
    for idx, row in tqdm(trees.iterrows(), total=len(trees), desc="Adding trees"):
        node_id = f"tree_{idx}"
        G.add_node(node_id, type='tree', geometry=row['geometry'])

    # Add Transit Nodes
    print("Adding transit nodes...")
    for idx, row in tqdm(transit.iterrows(), total=len(transit), desc="Adding transit points"):
        node_id = f"transit_{idx}"
        G.add_node(node_id, type='transit', **{'class': row['class'], 'geometry': row['geometry']})

    # Add Edges with optimized buffer distance
    buffer_distance = 200  # Reduced from 500 to improve performance
    print("Adding edges...")
    total_edges = (len([n for n, d in G.nodes(data=True) if d['type'] == 'neighborhood']) * 
                   len([n for n, d in G.nodes(data=True) if d['type'] == 'building'])) + \
                  (len([n for n, d in G.nodes(data=True) if d['type'] == 'neighborhood']) * 
                   len([n for n, d in G.nodes(data=True) if d['type'] == 'road'])) + \
                  (len([n for n, d in G.nodes(data=True) if d['type'] == 'building']) * 
                   len([n for n, d in G.nodes(data=True) if d['type'] == 'road'])) + \
                  (len([n for n, d in G.nodes(data=True) if d['type'] == 'neighborhood']) * 
                   len([n for n, d in G.nodes(data=True) if d['type'] == 'tree'])) + \
                  (len([n for n, d in G.nodes(data=True) if d['type'] == 'neighborhood']) * 
                   len([n for n, d in G.nodes(data=True) if d['type'] == 'transit'])) + \
                  max(0, len(road_points) * (len(road_points) - 1) // 2)
    with tqdm(total=total_edges, desc="Adding edges") as pbar:
        # Neighborhood to Building edges
        for neighborhood_node in [n for n, d in G.nodes(data=True) if d['type'] == 'neighborhood']:
            neigh_geom = G.nodes[neighborhood_node]['geometry']
            neigh_buffer = neigh_geom.buffer(buffer_distance)
            for building_node in [n for n, d in G.nodes(data=True) if d['type'] == 'building']:
                build_geom = G.nodes[building_node]['geometry']
                if neigh_buffer.intersects(build_geom):
                    distance = neigh_geom.distance(build_geom)
                    G.add_edge(neighborhood_node, building_node, weight=distance, type='walk')
                    pbar.update(1)

        # Neighborhood to Road edges
        for neighborhood_node in [n for n, d in G.nodes(data=True) if d['type'] == 'neighborhood']:
            neigh_geom = G.nodes[neighborhood_node]['geometry']
            neigh_buffer = neigh_geom.buffer(buffer_distance)
            for road_node in [n for n, d in G.nodes(data=True) if d['type'] == 'road']:
                road_geom = G.nodes[road_node]['geometry']
                if isinstance(road_geom, Point) and neigh_buffer.intersects(road_geom):
                    distance = neigh_geom.distance(road_geom)
                    G.add_edge(neighborhood_node, road_node, weight=distance, type='walk')
                    pbar.update(1)

        # Building to Road edges
        for building_node in [n for n, d in G.nodes(data=True) if d['type'] == 'building']:
            build_geom = G.nodes[building_node]['geometry']
            build_buffer = build_geom.buffer(50)  # Reduced from 100 to improve performance
            for road_node in [n for n, d in G.nodes(data=True) if d['type'] == 'road']:
                road_geom = G.nodes[road_node]['geometry']
                if isinstance(road_geom, Point) and build_buffer.intersects(road_geom):
                    distance = build_geom.distance(road_geom)
                    G.add_edge(building_node, road_node, weight=distance, type='walk')
                    pbar.update(1)

        # Neighborhood to Tree edges
        for neighborhood_node in [n for n, d in G.nodes(data=True) if d['type'] == 'neighborhood']:
            neigh_geom = G.nodes[neighborhood_node]['geometry']
            neigh_buffer = neigh_geom.buffer(buffer_distance)
            for tree_node in [n for n, d in G.nodes(data=True) if d['type'] == 'tree']:
                tree_geom = G.nodes[tree_node]['geometry']
                if neigh_buffer.intersects(tree_geom):
                    distance = neigh_geom.distance(tree_geom)
                    G.add_edge(neighborhood_node, tree_node, weight=distance, type='natural')
                    pbar.update(1)

        # Neighborhood to Transit edges
        for neighborhood_node in [n for n, d in G.nodes(data=True) if d['type'] == 'neighborhood']:
            neigh_geom = G.nodes[neighborhood_node]['geometry']
            neigh_buffer = neigh_geom.buffer(buffer_distance)
            for transit_node in [n for n, d in G.nodes(data=True) if d['type'] == 'transit']:
                transit_geom = G.nodes[transit_node]['geometry']
                if neigh_buffer.intersects(transit_geom):
                    distance = neigh_geom.distance(transit_geom)
                    G.add_edge(neighborhood_node, transit_node, weight=distance, type='transit')
                    pbar.update(1)

        # Road to Road edges
        if road_points:
            road_class_weights = {
                'footway': 0.5, 'pedestrian': 0.5, 'cycleway': 0.6, 'steps': 0.6, 'living_street': 0.6,
                'path': 0.7, 'track': 0.7, 'residential': 0.8, 'service': 1.0, 'unclassified': 1.2,
                'tertiary': 1.5, 'secondary': 2.0, 'primary': 2.5, 'highway': 3.5, 'motorway': 2.5,
                'trunk': 2.5, 'unknown': 1.0
            }
            print("Adding road-to-road edges...")
            tolerance = 10
            for i, (node1_id, geom1) in enumerate(road_points):
                for j, (node2_id, geom2) in enumerate(road_points[i+1:], start=i+1):
                    if geom1.distance(geom2) <= tolerance:
                        road1_idx = int(node1_id.split('_')[2])
                        road2_idx = int(node2_id.split('_')[2])
                        class1 = roads.loc[road1_idx, class_column] if pd.notna(roads.loc[road1_idx, class_column]) else 'unknown'
                        class2 = roads.loc[road2_idx, class_column] if pd.notna(roads.loc[road2_idx, class_column]) else 'unknown'
                        weight1 = road_class_weights.get(class1, road_class_weights['unknown'])
                        weight2 = road_class_weights.get(class2, road_class_weights['unknown'])
                        distance = geom1.distance(geom2)
                        weight = distance * (weight1 + weight2) / 2
                        G.add_edge(node1_id, node2_id, weight=weight, type='road')
                        pbar.update(1)

    with open(os.path.join(CHECKPOINT_DIR, "graph_with_edges_selected_neighborhoods.pkl"), 'wb') as f:
        pickle.dump(G, f)

    print(f"Graph built with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges for selected neighborhoods.")
    return G

In [122]:
# 3. Calculate Walkability Scores (Integrate Tree Data, Transit Data, and NDVI)
def calculate_walkability(G, neighborhoods):
    print("Checkpoint 3: Calculating walkability scores for selected neighborhoods...")
    for node in tqdm(G.nodes(), total=G.number_of_nodes(), desc="Calculating walkability"):
        if G.nodes[node]['type'] == 'neighborhood':
            # Handle NaN values in land use percentages and other metrics
            residential = G.nodes[node]['land_use_residential_percent'] if pd.notna(G.nodes[node]['land_use_residential_percent']) else 0.0
            commercial = G.nodes[node]['land_use_commercial_percent'] if pd.notna(G.nodes[node]['land_use_commercial_percent']) else 0.0
            education = G.nodes[node]['land_use_education_percent'] if pd.notna(G.nodes[node]['land_use_education_percent']) else 0.0
            ndvi = G.nodes[node]['ndvi_mean'] if pd.notna(G.nodes[node]['ndvi_mean']) else 0.0
            tree_count = G.nodes[node]['tree_count'] if pd.notna(G.nodes[node]['tree_count']) else 0
            transit_count = G.nodes[node]['transit_count'] if pd.notna(G.nodes[node]['transit_count']) else 0

            # Original walkability score components
            land_use_score = (residential * 0.4 + commercial * 0.3 + education * 0.2) / 100
            ndvi_score = ndvi * 0.5

            # Add tree and transit influence to walkability
            # Normalize tree count (e.g., assume 100 trees max for scaling)
            tree_score = min(1.0, tree_count / 100) * 0.2  # 20% weight for tree count
            # Normalize transit count (e.g., assume 20 transit points max for scaling)
            transit_score = min(1.0, transit_count / 20) * 0.2  # 20% weight for transit count

            # Combine scores (weights sum to 1: land_use=0.4, ndvi=0.2, trees=0.2, transit=0.2)
            walkability = min(1.0, land_use_score + (ndvi_score * 0.4) + tree_score + transit_score)
            G.nodes[node]['walkability'] = walkability

    for node, data in G.nodes(data=True):
        if data['type'] == 'neighborhood':
            neighborhoods.loc[neighborhoods['LIE_NAME'] == data['lie_name'], 'walkability'] = data['walkability']

    with open(os.path.join(CHECKPOINT_DIR, "walkability_scores_selected_neighborhoods.pkl"), 'wb') as f:
        pickle.dump(G, f)

    print("Walkability scores calculated for selected neighborhoods.")
    return G

In [123]:
# 4. Create Interactive Kepler.gl Map (Modified)
def create_interactive_map(G, neighborhoods, buildings, roads, trees, transit):
    print("Generating interactive Kepler.gl map for selected neighborhoods...")
    try:
        # Convert to WGS84 for Kepler.gl
        neighborhoods_wgs84 = neighborhoods.to_crs('EPSG:4326')
        buildings_wgs84 = buildings.to_crs('EPSG:4326')
        roads_wgs84 = roads.to_crs('EPSG:4326')
        trees_wgs84 = trees.to_crs('EPSG:4326')
        transit_wgs84 = transit.to_crs('EPSG:4326')

        # Prepare data for Kepler.gl
        land_use_cols = [col for col in neighborhoods_wgs84.columns if col.startswith('land_use_') and col.endswith('_percent')]
        display_cols = ['LIE_NAME', 'SECT_NAME', '2024population', 'ndvi_mean', 'tree_count', 'transit_count', 'walkability'] + land_use_cols
        neighborhoods_data = neighborhoods_wgs84[display_cols + ['geometry']].copy()

        # Update walkability and transit_count from the graph
        for node in G.nodes(data=True):
            if node[1]['type'] == 'neighborhood':
                lie_name = node[1]['lie_name']
                walkability = round(node[1].get('walkability', 0), 2)
                transit_count = node[1].get('transit_count', 0)
                lie_name_normalized = lie_name.strip()
                neighborhoods_data.loc[neighborhoods_data['LIE_NAME'].str.strip() == lie_name_normalized, 'walkability'] = walkability
                neighborhoods_data.loc[neighborhoods_data['LIE_NAME'].str.strip() == lie_name_normalized, 'transit_count'] = transit_count

        # Round numeric columns to 2 decimal places
        numeric_cols = ['2024population', 'ndvi_mean', 'tree_count', 'transit_count', 'walkability'] + land_use_cols
        for col in numeric_cols:
            neighborhoods_data[col] = pd.to_numeric(neighborhoods_data[col], errors='coerce').round(2)

        # Convert to GeoJSON-like structure with dynamic filtering
        geojson_data = {
            'type': 'FeatureCollection',
            'features': []
        }
        all_fields = set()  # To dynamically collect all non-zero/non-NaN fields across features
        for _, row in neighborhoods_data.iterrows():
            properties = row.drop('geometry').to_dict()
            # Filter out zero and NaN values
            filtered_properties = {k: v for k, v in properties.items() if pd.notna(v) and v != 0}
            feature = {
                'type': 'Feature',
                'properties': filtered_properties,
                'geometry': row['geometry'].__geo_interface__ if row['geometry'] is not None else None
            }
            geojson_data['features'].append(feature)
            all_fields.update(filtered_properties.keys())

        # Debug: Print GeoJSON properties for 和安里
        print("GeoJSON properties for 和安里:", [f for f in geojson_data['features'] if f['properties'].get('LIE_NAME') == '和安里'][0]['properties'])

        # Dynamically determine fields to show based on filtered properties
        fields_to_show = sorted(list(all_fields))  # Sort the fields alphabetically
        print("Fields to show (sorted):", fields_to_show)  # Debug: Print the sorted fields to verify

        # Prepare other layers (unchanged)
        buildings_data = buildings_wgs84[['building', 'area_m2', '屋齡', '建物高度', '地上層數', '構造種類', '使用分區', 'geometry']].copy()
        buildings_data['建物高度'] = pd.to_numeric(buildings_data['建物高度'], errors='coerce').fillna(10).round(1)
        buildings_data['地上層數'] = pd.to_numeric(buildings_data['地上層數'], errors='coerce').fillna(3).round(0)
        buildings_data.columns = ['Building Type', 'Area (m²)', 'Building Age', 'Building Height (m)', 'Number of Floors', 'Structure Type', 'Zoning Use', 'geometry']
        buildings_data = buildings_data[buildings_data['geometry'].notna()]

        roads_data = roads_wgs84[['class', 'length_m', 'geometry']].copy()
        roads_data['class'] = roads_data['class'].fillna('unknown')
        roads_data['length_m'] = roads_data['length_m'].round(1)
        roads_data = roads_data[roads_data['geometry'].notna()]

        trees_data = trees_wgs84[['geometry']].copy()
        trees_data['height_m'] = 10  # Default height for visualization
        trees_data = trees_data[trees_data['geometry'].notna()]

        transit_data = transit_wgs84[['class', 'geometry']].copy()
        transit_data['size'] = 10  # Default size for visualization
        transit_data = transit_data[transit_data['geometry'].notna()]

        # Configure Kepler.gl with dynamic and sorted fieldsToShow
        config = {
            'version': 'v1',
            'config': {
                'visState': {
                    'layers': [
                        {
                            'id': 'neighborhoods',
                            'type': 'geojson',
                            'config': {
                                'dataId': 'Neighborhoods',
                                'label': 'Neighborhoods',
                                'isVisible': True,
                                'visConfig': {
                                    'opacity': 0.8,
                                    'colorRange': {
                                        'name': 'Global Warming',
                                        'type': 'sequential',
                                        'category': 'Uber',
                                        'colors': ['#5A1846', '#900C3F', '#C70039', '#E3611C', '#F1920E', '#FFC300']
                                    },
                                    'filled': True
                                }
                            }
                        },
                        # ... (other layers remain unchanged)
                    ],
                    'interactionConfig': {
                        'tooltip': {
                            'fieldsToShow': {
                                'Neighborhoods': fields_to_show,  # Use dynamically sorted fields
                                'Buildings': ['Building Type', 'Area (m²)', 'Building Age', 'Building Height (m)', 'Number of Floors', 'Structure Type', 'Zoning Use'],
                                'Roads': ['class', 'length_m'],
                                'Trees': ['height_m'],
                                'Transit': ['class', 'size']
                            },
                            'enabled': True
                        }
                    }
                },
                'mapState': {
                    'latitude': 25.0224406,
                    'longitude': 121.51855980000002,
                    'zoom': 14,
                    'pitch': 0,
                    'bearing': 0
                }
            }
        }

        # Initialize Kepler.gl map with configuration
        map_1 = KeplerGl(
            height=600,
            mapbox_api_access_token=MAPBOX_ACCESS_TOKEN,
            config=config
        )

        # Add data to the map
        map_1.add_data(data=geojson_data, name="Neighborhoods")
        map_1.add_data(data=buildings_data, name="Buildings")
        map_1.add_data(data=roads_data, name="Roads")
        map_1.add_data(data=trees_data, name="Trees")
        map_1.add_data(data=transit_data, name="Transit")

        # Save to HTML
        output_path = os.path.join(r"D:\IAAC\Thesis\Python\MLloading\Geojson\GNN_Read_data", "walkability_map_selected_neighborhoods.html")
        map_1.save_to_html(file_name=output_path)
        print(f"Interactive Kepler.gl map saved successfully at {output_path}")

    except Exception as e:
        print(f"Error generating Kepler.gl map: {e}")

In [124]:
# 5. Main Execution
def main():
    print("Starting walkability graph network analysis for 和安里 and all neighborhoods within the covering radius...")
    try:
        neighborhoods, buildings, roads, road_points, trees, transit = load_data()
        G = build_graph(neighborhoods, buildings, roads, road_points, trees, transit)
        G = calculate_walkability(G, neighborhoods)
        create_interactive_map(G, neighborhoods, buildings, roads, trees, transit)
        print("Analysis completed successfully for 和安里 and all neighborhoods within the covering radius.")
    except Exception as e:
        print(f"Error during analysis: {e}")

if __name__ == "__main__":
    main()

Starting walkability graph network analysis for 和安里 and all neighborhoods within the covering radius...
Checkpoint 1: Loading data for 和安里 and all neighborhoods within the covering radius...


Loading files:  20%|██        | 1/5 [00:00<00:01,  3.61it/s]

Columns in neighborhoods after loading: ['LIE_NAME', 'SECT_NAME', '2024population', 'land_use_city_open_area_count', 'land_use_city_open_area_area_m2', 'land_use_city_open_area_percent', 'land_use_commercial_count', 'land_use_commercial_area_m2', 'land_use_commercial_percent', 'land_use_infrastructure_count', 'land_use_infrastructure_area_m2', 'land_use_infrastructure_percent', 'land_use_government_count', 'land_use_government_area_m2', 'land_use_government_percent', 'land_use_public_transportation_count', 'land_use_public_transportation_area_m2', 'land_use_public_transportation_percent', 'land_use_education_count', 'land_use_education_area_m2', 'land_use_education_percent', 'land_use_medical_count', 'land_use_medical_area_m2', 'land_use_medical_percent', 'land_use_amenity_count', 'land_use_amenity_area_m2', 'land_use_amenity_percent', 'land_use_road_count', 'land_use_road_area_m2', 'land_use_road_percent', 'land_use_pedestrian_count', 'land_use_pedestrian_area_m2', 'land_use_pedestria

Loading files: 100%|██████████| 5/5 [00:04<00:00,  1.21it/s]


Building type distribution after cleaning:
building
apartments     23519
residential    18958
unknown        17957
house           4305
school          1569
               ...  
social             1
civil              1
entrance           1
supermarket        1
r                  1
Name: count, Length: 80, dtype: int64
Warren: Total number of neighborhoods in dataset: 456
Total number of tree points loaded: 2289
Total number of tree polylines loaded: 1106
Total number of trees after converting polylines: 17786
Total number of transit points loaded: 6845
Found 16 neighborhoods within 1000m radius.
Selected neighborhoods and distances from 和安里 (in meters):
- 和安里: 0.0m
- 仁慈里: 412.0m
- 龍圖里: 459.4m
- 義村里: 461.4m
- 民炤里: 549.6m
- 德安里: 674.6m
- 住安里: 675.5m
- 新龍里: 708.3m
- 仁愛里: 715.3m
- 誠安里: 843.3m
- 民輝里: 850.9m
- 昌隆里: 857.5m
- 龍雲里: 872.8m
- 龍陣里: 876.9m
- 龍門里: 976.1m
- 敦安里: 995.1m
Integrating tree data with neighborhoods...
Integrating transit data with neighborhoods...
Tree and transit counts 

Extracting road endpoints: 100%|██████████| 2063/2063 [00:00<00:00, 21030.75it/s]


Data loaded and filtered for 和安里 and all neighborhoods within 1000m radius. Neighborhoods: 16, Buildings: 2848, Roads: 2063, Trees: 2085, Transit Points: 179
Checkpoint 2: Building graph network for selected neighborhoods...
Adding neighborhood nodes...


Adding neighborhoods: 100%|██████████| 16/16 [00:00<00:00, 7999.63it/s]


Adding building nodes...


Adding buildings: 100%|██████████| 2848/2848 [00:00<00:00, 26872.17it/s]


Adding road nodes...
Using column 'class' for road classification.


Adding road points: 100%|██████████| 4126/4126 [00:00<00:00, 50313.99it/s]


Adding tree nodes...


Adding trees: 100%|██████████| 2085/2085 [00:00<00:00, 53629.35it/s]


Adding transit nodes...


Adding transit points: 100%|██████████| 179/179 [00:00<00:00, 43611.99it/s]


Adding edges...


Adding edges:   0%|          | 49748/20408531 [01:10<31:38, 10726.31it/s] 

Adding road-to-road edges...


Adding edges:   0%|          | 55255/20408531 [02:00<12:17:57, 459.67it/s]


Graph built with 9254 nodes and 55255 edges for selected neighborhoods.
Checkpoint 3: Calculating walkability scores for selected neighborhoods...


Calculating walkability: 100%|██████████| 9254/9254 [00:00<00:00, 2312151.62it/s]


Walkability scores calculated for selected neighborhoods.
Generating interactive Kepler.gl map for selected neighborhoods...
GeoJSON properties for 和安里: {'LIE_NAME': '和安里', 'SECT_NAME': '大安區', '2024population': 6933, 'ndvi_mean': 0.28, 'tree_count': 144, 'transit_count': 15, 'walkability': 0.69, 'land_use_city_open_area_percent': 1.89, 'land_use_commercial_percent': 10.13, 'land_use_government_percent': 0.37, 'land_use_education_percent': 44.77, 'land_use_natural_percent': 2.24, 'land_use_special_zone_percent': 0.44, 'land_use_residential_percent': 40.17}
Fields to show (sorted): ['2024population', 'LIE_NAME', 'SECT_NAME', 'land_use_amenity_percent', 'land_use_city_open_area_percent', 'land_use_commercial_percent', 'land_use_education_percent', 'land_use_government_percent', 'land_use_infrastructure_percent', 'land_use_medical_percent', 'land_use_military_percent', 'land_use_natural_percent', 'land_use_public_transportation_percent', 'land_use_residential_percent', 'land_use_road_perce