In [3]:
import geopandas as gpd
import numpy as np
import pandas as pd
from shapely.geometry import box

def create_grid(boundary_path, roads_path, grid_size):
    """
    创建并预处理单个网格
    :param boundary_path: 城市边界文件路径
    :param roads_path: 道路数据文件路径 
    :param grid_size: 网格尺寸 (nx, ny)
    :return: 处理后的网格GeoDataFrame
    """
    # 加载地理数据
    boundary = gpd.read_file(boundary_path)
    roads = gpd.read_file(roads_path)
    roads['geometry'] = roads.geometry.buffer(0.00005)
    
    # 生成原始网格
    nx, ny = grid_size
    bounds = boundary.total_bounds
    x_min, y_min, x_max, y_max = bounds
    cells = []
 # 生成基础网格
    for ix in range(nx):
        x_step = (x_max - x_min) / nx
        x1 = x_min + ix * x_step
        x2 = x1 + x_step
        for iy in range(ny):
            y_step = (y_max - y_min) / ny
            y1 = y_min + iy * y_step
            y2 = y1 + y_step
            cells.append(box(x1, y1, x2, y2))
    
    # 创建GeoDataFrame并进行道路裁剪
    grid_gdf = gpd.GeoDataFrame(geometry=cells, crs=roads.crs)
    grid_gdf['original_index'] = grid_gdf.index
    return gpd.clip(grid_gdf, roads).reset_index(drop=True)

# def preprocess_fine_grid(fine_grid, coarse_grid):
#     """
#     预处理细粒度网格，删除不在任何粗粒度网格内的细粒度网格，并重置索引。
#     参数:
#         fine_grid: 细粒度网格GeoDataFrame
#         coarse_grid: 粗粒度网格GeoDataFrame
#     返回:
#         预处理后的细粒度网格
#     """
#     def is_inside_coarse_grid(fine_bounds):
#         f_minx, f_miny, f_maxx, f_maxy = fine_bounds
#         for _, c_row in coarse_grid.iterrows():
#             c_minx, c_miny, c_maxx, c_maxy = c_row.geometry.bounds
#             if (f_minx >= c_minx) and (f_maxx <= c_maxx) and \
#                (f_miny >= c_miny) and (f_maxy <= c_maxy):
#                 return True
#         return False
    
#     # 记录原始网格数量
#     original_count = len(fine_grid)

#     # 筛选出所有在粗粒度网格内的细粒度网格
#     valid_fine_grid = fine_grid[fine_grid.geometry.bounds.apply(lambda x: is_inside_coarse_grid(x), axis=1)]
    
#     # 计算被筛选掉的网格数量
#     filtered_count = original_count - len(valid_fine_grid)
#     print(f"被筛选掉的细粒度网格数量: {filtered_count}")
#     # 重置索引
#     valid_fine_grid.reset_index(drop=True, inplace=True)

#     return valid_fine_grid

def generate_mapping_matrix(grid_gdf, full_grid_size):
    """
    生成网格映射矩阵
    :param grid_gdf: 处理后的网格数据
    :param full_grid_size: 原始完整网格尺寸 (nx*ny)
    :return: 映射矩阵 (full_grid_size x valid_grid_num)
    """
    node_num = len(grid_gdf)
    map_matrix = np.zeros((full_grid_size, node_num), dtype=int)
    print('node_num', node_num)
    for idx, row in grid_gdf.iterrows():
        old_idx = row['original_index']
        map_matrix[old_idx, idx] = 1
    return map_matrix

def convert_to_4d(data_path, grid_gdf, grid_dimensions, time_slots):
    """
    转换为4D时空张量
    :param data_path: 输入数据路径
    :param grid_gdf: 网格数据
    :param grid_dimensions: 网格维度 (H, W)
    :param time_slots: 时间片总数
    :return: 4D张量 (T, D, H, W)
    """
    # 读取数据
    df = pd.read_csv(data_path)
    features = [col for col in df.columns if col not in ['grid_id', 'time_slot']]
    print(features)
    
    # 初始化张量
    H, W = grid_dimensions
    T = time_slots
    D = len(features)
    tensor = np.zeros((time_slots, len(features), H, W), dtype=np.float32)
    
    # 创建坐标映射
    new_to_old = dict(zip(grid_gdf.index, grid_gdf['original_index']))
    
    # 填充数据
    for _, row in df.iterrows():
        coarse_grid_id = int(row['grid_id'])
        time_slot = int(row['time_slot'])
        features = row[2:].values.astype(np.float32)
        
        #根据映射关系将数据填充至相应网格
        old_index = new_to_old[coarse_grid_id]
        x = old_index // W
        y = old_index % W
        tensor[time_slot, :D, x, y] = features
    return tensor

def process_grid_pipeline(config):
    """
    完整网格处理流程
    :param config: 配置字典，包含：
        - boundary_path: 边界文件路径
        - roads_path: 道路文件路径
        - grid_type: 网格类型标识（如'fine'）
        - grid_size: 网格尺寸 (nx, ny)
        - data_path: 时空数据路径
        - output_dir: 输出目录
    """
    # 创建处理后的网格
    processed_grid = create_grid(
        config['boundary_path'],
        config['roads_path'],
        config['grid_size']
    )
    print(config['grid_type'])

    # 生成映射矩阵
    nx, ny = config['grid_size']
    mapping_matrix = generate_mapping_matrix(
        processed_grid,
        full_grid_size=nx*ny
    )

    np.save(f"{config['output_dir']}/new_grid_node_map_{config['grid_type']}.npy", mapping_matrix)
    
    # 转换时空数据
    time_slots = pd.read_csv(config['data_path'])['time_slot'].max() + 1
    tensor_4d = convert_to_4d(
        config['data_path'],
        processed_grid,
        grid_dimensions=config['grid_size'],
        time_slots=time_slots
    )
    np.save(f"{config['output_dir']}/new_grid_data_{config['grid_type']}_4d.npy", tensor_4d)

In [4]:
def generate_trans(path):    
    # 读取CSV文件
    df = pd.read_csv(path)

    # 获取Fine Grid ID和Coarse Grid ID的唯一值
    fine_grid_ids = df['grid_id'].unique()
    coarse_grid_ids = df['Coarse Grid ID'].unique()

    # 计算Fine Grid ID和Coarse Grid ID的总数
    num_fine_grids = len(fine_grid_ids)
    num_coarse_grids = len(coarse_grid_ids)
    print(num_coarse_grids)
    # 创建全0矩阵
    trans = np.zeros((num_fine_grids, num_coarse_grids), dtype=int)
    # 根据每行数据的Fine Grid ID和Coarse Grid ID填充矩阵
    for _, row in df.iterrows():
        fine_grid_id = row['grid_id']
        coarse_grid_id = row['Coarse Grid ID']
        
        # 将对应位置的值设为1
        trans[fine_grid_id, coarse_grid_id] = 1

    print(trans.shape)
    np.save('save/trans_f.npy', trans)

In [5]:
    # 细网格处理
process_grid_pipeline({
        'boundary_path': 'dataset/nyc_boundary.shp',
        'roads_path': 'dataset/nyc_roads.geojson',
        'grid_type': 'f',
        'grid_size': (20, 20),
        'data_path': 'save/grid_data_f_6.csv',
        'output_dir': '/root/autodl-tmp/data/npy_new_diffup'
    })

# process_grid_pipeline({
#         'boundary_path': 'dataset/nyc_boundary.shp',
#         'roads_path': 'dataset/nyc_roads.geojson',
#         'grid_type': 'f',
#         'grid_size': (20, 20),
#         'data_path': 'save/grid_data_f_5.csv',
#         'output_dir': '/root/autodl-tmp/npy_new_diffdown'
#     })
    
    # 粗网格处理 
process_grid_pipeline({
        'boundary_path': 'dataset/nyc_boundary.shp',
        'roads_path': 'dataset/nyc_roads.geojson',
        'grid_type': 'c',
        'grid_size': (10, 10),
        'data_path': 'save/grid_data_c_6.csv',
        'output_dir': '/root/autodl-tmp/data/npy_new_diffup'
    })


# #     # 细网格处理
# process_grid_pipeline({
#         'boundary_path': 'dataset/nyc_boundary.shp',
#         'roads_path': 'dataset/nyc_roads.geojson',
#         'grid_type': 'f',
#         'grid_size': (20, 20),
#         'data_path': 'save/grid_data_f_2.csv',
#         'output_dir': '/root/autodl-tmp/npy_new_nodiff'
#     })
    
#     # 粗网格处理 
# process_grid_pipeline({
#         'boundary_path': 'dataset/nyc_boundary.shp',
#         'roads_path': 'dataset/nyc_roads.geojson',
#         'grid_type': 'c',
#         'grid_size': (10, 10),
#         'data_path': 'save/grid_data_c_2.csv',
#         'output_dir': '/root/autodl-tmp/npy_new_nodiff'
#     })

#     # 超细网格处理 
# process_grid_pipeline({
#         'boundary_path': 'dataset/nyc_boundary.shp',
#         'roads_path': 'dataset/nyc_roads.geojson',
#         'grid_type': 'uf',
#         'grid_size': (30, 30),
#         'data_path': 'save/grid_data_uf_2.csv',
#         'output_dir': 'save'
#     })




  roads['geometry'] = roads.geometry.buffer(0.00005)


f
node_num 218
['risk_label', 'weather_type', 'risk_value', 'inflow', 'outflow', 'precipitation', 'wind_speed_10m']



  roads['geometry'] = roads.geometry.buffer(0.00005)


c
node_num 65
['risk_label', 'risk_value', 'inflow', 'outflow', 'precipitation', 'wind_speed_10m', 'weather_type']


In [6]:
# # generate_trans('save/NYC_grid_f.csv')
# import numpy as np

# # 读取 .npy 文件
# data = np.load('save/grid_node_map_f.npy')
# # 输出数组的形状
# print(data.shape)
# for i in range(400):
#     for j in range(200):
#         if data[i,j]==0:
#             continue
#         else:
#             print(i,j,data[i,j])



In [7]:
# import os
# import numpy as np

# folder_path = "npy"  # 文件夹路径

# # 检查文件夹是否存在
# if not os.path.exists(folder_path):
#     raise FileNotFoundError(f"文件夹 {folder_path} 不存在")

# # 遍历文件夹中的所有文件
# file_shapes = {}
# for filename in os.listdir(folder_path):
#     if filename.endswith(".npy"):
#         file_path = os.path.join(folder_path, filename)
#         try:
#             # 读取npy文件的形状
#             array = np.load(file_path)
#             file_shapes[filename] = array.shape
#             print(f"文件名: {filename} \t 形状: {array.shape}")
#         except Exception as e:
#             print(f"读取 {filename} 失败: {str(e)}")

# # 如果需要将结果保存为字典
# print("\n完整结果:")
# print(file_shapes)

In [8]:
# def print_matrix_from_npy(file_path):
#     """
#     加载 .npy 文件并完全打印矩阵内容。

#     参数:
#     file_path (str): .npy 文件的路径
#     """
#     # 加载 .npy 文件
    
#     matrix = np.load(file_path)
#     # print(matrix.shape)

#     # 设置 numpy 打印选项，确保完全打印矩阵
#     np.set_printoptions(threshold=np.inf, linewidth=np.inf)

#     # 打印矩阵
#     print(f"文件 {file_path} 的矩阵内容：")
#     print(matrix[1300,6,:,:])
# all_data_f_path1 =  'save/grid_data_c_4d.npy'
# all_data_f_path2 = 'save/grid_data_f_4d.npy'
# print_matrix_from_npy(all_data_f_path1)
# print_matrix_from_npy(all_data_f_path2)