In [2]:
# =============================================================================
# 第一部分：基础库导入
# =============================================================================
import geopandas as gpd
from shapely.geometry import MultiPolygon, Polygon, box
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# =============================================================================
# 第二部分：数据加载与预处理
# =============================================================================
def load_data():
    """加载数据函数"""
    # 加载纽约市边界和道路数据
    nyc_boundary = gpd.read_file(r'dataset/nyc_boundary.shp')
    nyc_roads = gpd.read_file(r'dataset/nyc_roads.geojson')
    
    # 将道路数据转换为多边形（用于后续裁剪）
    nyc_roads['geometry'] = nyc_roads.geometry.buffer(0.00005)
    
    # 获取纽约市边界范围
    bounds = nyc_boundary.total_bounds
    x_min, y_min, x_max, y_max = bounds
    return nyc_boundary, nyc_roads, (x_min, y_min, x_max, y_max)

# =============================================================================
# 第三部分：网格生成函数
# =============================================================================
def generate_grids(bounds, grid_size, road_data):
    """
    通用网格生成函数
    参数:
        bounds: 边界坐标 (x_min, y_min, x_max, y_max)
        grid_size: 网格划分粒度 (nx, ny)
        road_data: 用于裁剪的道路数据
    返回:
        包含有效网格的GeoDataFrame
    """
    nx, ny = grid_size
    cells = []
    x_min, y_min, x_max, y_max = bounds
    
    # 生成基础网格
    for ix in range(nx):
        x_step = (x_max - x_min) / nx
        x1 = x_min + ix * x_step
        x2 = x1 + x_step
        for iy in range(ny):
            y_step = (y_max - y_min) / ny
            y1 = y_min + iy * y_step
            y2 = y1 + y_step
            cells.append(box(x1, y1, x2, y2))
    
    # 创建GeoDataFrame并进行道路裁剪
    grid_gdf = gpd.GeoDataFrame(geometry=cells, crs=road_data.crs)
    return gpd.clip(grid_gdf, road_data).reset_index(drop=True)

# =============================================================================
# 第五部分：网格数据存储
# =============================================================================
def save_grid_data(grid_gdf, prefix):
    """网格数据存储函数"""
    data = [{
        "grid_id": idx,
        "Longitude Range": (row.geometry.bounds[0], row.geometry.bounds[2]),
        "Latitude Range": (row.geometry.bounds[1], row.geometry.bounds[3])
    } for idx, row in grid_gdf.iterrows()]
    
    pd.DataFrame(data).to_csv(f'save/NYC_grid_{prefix}.csv', index=False)

# =============================================================================
# 第六部分：道路数据分析
# =============================================================================
def analyze_road_conditions(grid_gdf, road_data, grid_type):
    """
    道路状况分析函数
    参数:
        grid_gdf: 网格数据
        road_data: 道路评分数据
        grid_type: 网格类型标识 ('c'/'f'/'uf')
    """
    # 预处理道路数据
    roads = road_data.copy()
    roads['geometry'] = gpd.GeoSeries.from_wkt(roads['the_geom'])
    roads = gpd.GeoDataFrame(roads, geometry='geometry').drop_duplicates('SegmentID')
    
    results = []
    for _, grid_row in grid_gdf.iterrows():
        grid_poly = grid_row.geometry
        
        # 空间查询
        roads_in_grid = gpd.clip(roads, grid_poly)
        
        # 指标计算
        stats = {
            'grid_id': grid_row.name,
            'road_density': 0,
            'rating_min': 0,
            'rating_max': 0,
            'rating_mean': 0,
            'rating_median': 0
        }
        
        if not roads_in_grid.empty:
            total_length = roads_in_grid.geometry.length.sum()
            grid_area = grid_poly.area
            stats.update({
                'road_density': total_length / grid_area,
                'rating_min': roads_in_grid['ManualRati'].min(),
                'rating_max': roads_in_grid['ManualRati'].max(),
                'rating_mean': roads_in_grid['ManualRati'].mean(),
                'rating_median': roads_in_grid['ManualRati'].median()
            })
        
        results.append(stats)
    
    # 保存结果
    pd.DataFrame(results).to_csv(f'save/grid_road_{grid_type}.csv', index=False)


In [3]:
# =============================================================================
# 调用部分
# =============================================================================
# 加载数据
nyc_boundary, nyc_roads, bounds = load_data()

# 生成三种粒度网格
grid_coarse = generate_grids(bounds, (10, 10), nyc_roads)
print('粗网格生成完成')
print(f"[系统消息] 网格处理完成：\n"
      f"粗粒度(10x10): {len(grid_coarse)}个\n")



  nyc_roads['geometry'] = nyc_roads.geometry.buffer(0.00005)


粗网格生成完成
[系统消息] 网格处理完成：
粗粒度(10x10): 65个



In [4]:
# grid_fine = generate_grids(bounds, (20, 20), nyc_roads)
# print('细网格生成完成')
# print(f"[系统消息] 网格处理完成：\n"
#       f"粗粒度(20x20): {len(grid_fine)}个\n")

In [5]:
# grid_ultrafine = generate_grids(bounds, (40, 40), nyc_roads)
# print('超细网格生成完成')
# print(f"[系统消息] 网格处理完成：\n"
#       f"超细粒度(40x40): {len(grid_ultrafine)}个")

In [6]:
# print(f"[系统消息] 网格处理完成：\n"
#       f"粗粒度(10x10): {len(grid_coarse)}个\n"
#       f"粗粒度(20x20): {len(grid_fine)}个\n"
#       f"超细粒度(40x40): {len(grid_ultrafine)}个")

In [7]:
# 保存三种网格数据
save_grid_data(grid_coarse, 'c')      # 10x10
# save_grid_data(grid_fine, 'f')        # 20x20
# save_grid_data(grid_ultrafine, 'uf')  # 30x30

In [8]:
# 加载道路评分数据
road_rating = pd.read_csv(r'dataset/Street_Pavement_Rating.csv')

# 执行道路分析
analyze_road_conditions(grid_coarse, road_rating, 'c')     # 10x10
# analyze_road_conditions(grid_fine, road_rating, 'f')       # 20x20
# analyze_road_conditions(grid_ultrafine, road_rating, 'uf') # 30x30

  road_rating = pd.read_csv(r'dataset/Street_Pavement_Rating.csv')
