In [2]:
import pandas as pd
import numpy as np

def generate_green_grid_data():
    np.random.seed(2025) # 锁定随机种子以复现
    
    # === 1. 候选站点数据 (Candidate Sites) ===
    # 模拟城市中 20 个闲置地块
    num_sites = 20
    site_ids = [f"S{i+1:02d}" for i in range(num_sites)]
    
    # 属性：坐标、区域类型 (商业区/住宅区)、地价、电网剩余容量
    zone_types = np.random.choice(['Residential', 'Commercial'], num_sites, p=[0.6, 0.4])
    grid_caps = np.random.randint(200, 1000, num_sites).astype(float)
    
    # [注入脏数据 1]：模拟数据缺失，随机将 3 个站点的电网容量设为 NaN
    grid_caps[np.random.choice(num_sites, 3, replace=False)] = np.nan
    
    df_sites = pd.DataFrame({
        'Site_ID': site_ids,
        'X': np.random.uniform(10, 90, num_sites),
        'Y': np.random.uniform(10, 90, num_sites),
        'Zone_Type': zone_types,
        'Land_Cost': [np.random.randint(100, 200) if z=='Commercial' else np.random.randint(50, 100) for z in zone_types],
        'Grid_Capacity_kW': grid_caps
    })
    
    # === 2. 交通热点需求数据 (Demand Clusters) ===
    # 模拟 50 个交通流量汇聚点
    num_demands = 50
    traffic = np.random.randint(30, 150, num_demands).astype(float)
    
    # [注入脏数据 2]：模拟传感器故障，产生负数流量或 0 流量
    traffic[np.random.choice(num_demands, 3, replace=False)] = np.random.choice([-10, 0, -99], 3)
    
    df_demands = pd.DataFrame({
        'Demand_ID': [f"D{i+1:02d}" for i in range(num_demands)],
        'X': np.random.uniform(5, 95, num_demands),
        'Y': np.random.uniform(5, 95, num_demands),
        'Daily_Traffic': traffic
    })
    
    # 保存文件
    df_sites.to_csv('City_Sites.csv', index=False)
    df_demands.to_csv('City_Demands.csv', index=False)
    
    print("✅ 数据集构建完成 (含噪声):")
    print(f"1. City_Sites.csv: {len(df_sites)} rows (Contains NaN in Grid_Capacity)")
    print(f"2. City_Demands.csv: {len(df_demands)} rows (Contains Outliers in Traffic)")

if __name__ == "__main__":
    generate_green_grid_data()

✅ 数据集构建完成 (含噪声):
1. City_Sites.csv: 20 rows (Contains NaN in Grid_Capacity)
2. City_Demands.csv: 50 rows (Contains Outliers in Traffic)


In [3]:
import pandas as pd
import numpy as np

def generate_feasible_greengrid_data():
    np.random.seed(2026) # 使用新种子
    
    # === 1. 参数设定 (上帝视角: 确保供给充足) ===
    NUM_SITES = 25
    NUM_DEMANDS = 40
    # 扩大容量：确保单站能容纳至少 50 辆车 (即需要 ~17个快充桩 -> ~2000kW 容量)
    MIN_GRID_CAP = 2500 
    MAX_GRID_CAP = 6000
    
    # === 2. 生成候选站点 (Supply) ===
    site_x = np.random.uniform(5, 95, NUM_SITES)
    site_y = np.random.uniform(5, 95, NUM_SITES)
    site_ids = [f"S{i+1:02d}" for i in range(NUM_SITES)]
    
    # 修正：大幅提升电网容量，确保 单站供给 > 单点需求
    grid_caps = np.random.randint(MIN_GRID_CAP, MAX_GRID_CAP, NUM_SITES).astype(float)
    
    # 注入少量 NaN (测试清洗能力)
    grid_caps[np.random.choice(NUM_SITES, 2, replace=False)] = np.nan
    
    df_sites = pd.DataFrame({
        'Site_ID': site_ids,
        'X': site_x,
        'Y': site_y,
        'Zone_Type': np.random.choice(['Residential', 'Commercial'], NUM_SITES),
        'Land_Cost': np.random.randint(100, 300, NUM_SITES),
        'Grid_Capacity_kW': grid_caps
    })
    
    # === 3. 生成需求点 (Demand) - 卫星伴生策略 ===
    # 策略：不完全随机，而是围绕站点生成，消除“孤岛”
    demand_x, demand_y = [], []
    
    for _ in range(NUM_DEMANDS):
        # 随机选一个站点作为中心
        center_idx = np.random.randint(0, NUM_SITES)
        # 在该站点 3-12km 范围内生成需求点 (保证在 15km 服务半径内)
        angle = np.random.uniform(0, 2*np.pi)
        dist = np.random.uniform(3, 12)
        
        demand_x.append(np.clip(site_x[center_idx] + dist*np.cos(angle), 0, 100))
        demand_y.append(np.clip(site_y[center_idx] + dist*np.sin(angle), 0, 100))
    
    # 修正：控制单点需求流量 (20-40)，确保小于单站供给上限
    traffic = np.random.randint(20, 40, NUM_DEMANDS).astype(float)
    
    # 注入少量异常值
    traffic[np.random.choice(NUM_DEMANDS, 2, replace=False)] = -10
    
    df_demands = pd.DataFrame({
        'Demand_ID': [f"D{i+1:02d}" for i in range(NUM_DEMANDS)],
        'X': demand_x,
        'Y': demand_y,
        'Daily_Traffic': traffic
    })
    
    df_sites.to_csv('City_Sites.csv', index=False)
    df_demands.to_csv('City_Demands.csv', index=False)
    
    print("✅ [V7.1 Fix] 数据集已重置: 供需失衡与孤岛效应已修复。")
    print(f"   -> Avg Grid Cap: {np.nanmean(grid_caps):.0f} kW (Supports ~{np.nanmean(grid_caps)/120*3:.0f} cars)")
    print(f"   -> Avg Demand: {np.mean(traffic):.0f} cars")
    print("   -> Feasibility: Guaranteed.")

if __name__ == "__main__":
    generate_feasible_greengrid_data()

✅ [V7.1 Fix] 数据集已重置: 供需失衡与孤岛效应已修复。
   -> Avg Grid Cap: 4390 kW (Supports ~110 cars)
   -> Avg Demand: 25 cars
   -> Feasibility: Guaranteed.
