In [21]:
import geopandas as gpd
import libpysal
from spreg import GM_Lag
import pandas as pd
import numpy as np
import time
import pandas as pd
from mgwr.gwr import MGWR
from mgwr.sel_bw import Sel_BW
from sklearn.preprocessing import StandardScaler

In [8]:
gdb_path = r"D:\008RA\gis_process\CN_City36\CN_City36_t2.gdb"
layer_name = "全国网格_绩效t7"

target = 'performance_score'

features = [
    'buildingCount', 'totalHeight', 'avgHeight', 'basePerimeterTotal',
    'basePerimeterAvg', 'compactness', 'footprintAreaTotal', 'totalArea',
    'avgBuildingArea', 'FAR', 'coverageRatio',
    'roadDensity', 'intersectionDensity', 'poiDensity', 'poiDiversity',
    'FVC', 'permeableRatio', 'maxHeight', 'minHeight', 'heightRange', 'heightIndex',
    'heightDensity', 'heightOtherness', 'cornerCountTotal', 'basePerimeterMax',
    'basePerimeterMin', 'shapeComplexity', 'areaVariance', 'parcelArea',
    'largestPatchIndex', 'shape3DIndex', 'evennessIndex', 'cornerCountAvg',
    'buildingProximity', 'buildingMinDist', 'buildingMaxDist',
    'buildingDistAvg', 'buildingDistVar', 'SVF', 'streetRatio'
]

In [9]:
try:
    gdf = gpd.read_file(gdb_path, layer=layer_name)
    print(f"[{time.strftime('%H:%M:%S')}] 数据读取成功，共 {len(gdf)} 条记录。")
    
except Exception as e:
    print("读取 GDB 失败。如果报错提示驱动问题，请尝试先用 ArcGIS 将图层导出为 Shapefile 再读取。")
    print(f"错误信息: {e}")
    exit()

# 检查是否有缺失值 (NaN)，空间计量模型对 NaN 非常敏感
print(f"[{time.strftime('%H:%M:%S')}] 正在检查并清理缺失值...")
original_len = len(gdf)
# 只检查我们用到的列
cols_to_check = [target] + features
gdf = gdf.dropna(subset=cols_to_check)
if len(gdf) < original_len:
    print(f"警告：删除了 {original_len - len(gdf)} 行含有缺失值的记录。")

[15:03:22] 数据读取成功，共 196770 条记录。
[15:03:22] 正在检查并清理缺失值...
警告：删除了 1829 行含有缺失值的记录。


In [10]:
gdf.shape

(194941, 56)

In [15]:
gdf.head()

Unnamed: 0,Global_ID,City_x,FID_1,Shape_Length,Shape_Area,City_y,NTL2023,NTL2023_focal,VIT202311,UHIDAY2020_07_inv,...,buildingDistAvg,buildingDistVar,cluster_label,log_NTL2023_focal,log_VIT202311,distance_to_surface,performance_score,geometry,centroid_x,centroid_y
0,0.0,Beijing,0.0,2000.0,250000.0,Beijing,6.185,7.03,1649.857143,0.472765,...,268.089122,2565.568805,5.0,0.355727,0.665412,0.211892,0.754223,"MULTIPOLYGON (((12945906.677 4793764.368, 1294...",12946160.0,4794014.0
1,1.0,Beijing,1.0,2000.0,250000.0,Beijing,5.74,7.226111,155.857143,0.472765,...,300.087841,2010.844167,5.0,0.359848,0.454023,0.353838,0.589307,"MULTIPOLYGON (((12946406.677 4793764.368, 1294...",12946660.0,4794014.0
2,2.0,Beijing,2.0,2000.0,250000.0,Beijing,11.32,13.616667,57.142857,0.472765,...,283.843343,2044.728701,5.0,0.45801,0.364892,0.401524,0.533904,"MULTIPOLYGON (((12945906.677 4794264.368, 1294...",12946160.0,4794514.0
3,3.0,Beijing,3.0,2000.0,250000.0,Beijing,11.73,14.361667,243.571429,0.472765,...,232.312568,1896.14338,5.0,0.466499,0.493915,0.305031,0.646011,"MULTIPOLYGON (((12946406.677 4794264.368, 1294...",12946660.0,4794514.0
4,4.0,Beijing,4.0,2000.0,250000.0,Beijing,8.45,9.19,0.0,0.472765,...,0.0,0.0,1.0,0.396406,0.0,0.608873,0.293002,"MULTIPOLYGON (((12948906.677 4794264.368, 1294...",12949160.0,4794514.0


In [22]:
# CRITICAL: Replace 'City' with the actual column name in your GDB that identifies cities
# e.g., 'city_name', 'CITY_CN', or 'adcode' (if using codes like 330100 for Hangzhou)
hangzhou_gdf = gdf[gdf['City_x'] == 'Hangzhou'].copy()

# Drop NaNs to prevent errors
hangzhou_gdf = hangzhou_gdf.dropna(subset=[target] + features)

# --- 2. Prepare Data for MGWR ---
# MGWR requires inputs as numpy arrays, not DataFrames

# Coordinates (X, Y)
coords = list(zip(hangzhou_gdf.geometry.centroid.x, hangzhou_gdf.geometry.centroid.y))
coords = np.array(coords)

# Variables
X = hangzhou_gdf[features].values
y = hangzhou_gdf[target].values.reshape((-1, 1)) # Reshape is required for target

# Standardization (Highly recommended for GWR/MGWR to ensure convergence)
scaler = StandardScaler()
X_std = scaler.fit_transform(X)
y_std = scaler.fit_transform(y)

# --- 3. Run MGWR ---
print("Selecting optimal bandwidths (this may take time)...")

# 'multi=True' enables Multi-scale (different bandwidth for each variable)
selector = Sel_BW(coords, y_std, X_std, multi=True)
bw = selector.search(verbose=True)

print("Bandwidths selected:", bw)

print("Fitting MGWR model...")
model = MGWR(coords, y_std, X_std, selector, fixed=False, kernel='bisquare')
results = model.fit()

# --- 4. Process Results ---
results.summary()

# Save local parameter estimates back to GeoDataFrame
# The order of columns in 'results.params' matches your 'features' list + intercept (if constant=True)
# Note: mgwr adds the Intercept as the last column by default if not manually handled, 
# but usually, we map the variables we care about.

# Example: Mapping the coefficient of the first feature
hangzhou_gdf['coef_' + features[0]] = results.params[:, 0]
hangzhou_gdf.plot(column='coef_' + features[0], legend=True, cmap='RdBu')

Selecting optimal bandwidths (this may take time)...


LinAlgError: Matrix is singular.