In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import numpy as np
from mgwr.gwr import GWR
from sklearn.preprocessing import StandardScaler

# 1. 加载数据
df = pd.read_csv("UK_airbnb.csv")
df = df[df['price'] > 0]
geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs='EPSG:4326').to_crs(epsg=27700)

# 2. 选择变量 + 去除缺失
fields = ['price', 'reviews_per_month', 'availability_365', 'calculated_host_listings_count']
gdf_ols = gdf[fields + ['geometry']].dropna().copy()

# 3. 抽样1000个样本
gdf_sample = gdf_ols.sample(n=1000, random_state=42)

# 4. 构建变量
y = gdf_sample[['price']].values
X = gdf_sample[['reviews_per_month', 'availability_365', 'calculated_host_listings_count']].values
coords = np.array([(geom.x, geom.y) for geom in gdf_sample.geometry])

# 5. 标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 6. 拟合 GWR（手动设置带宽）
gwr_model = GWR(coords, y, X_scaled, bw=150)
gwr_results = gwr_model.fit()

# 7. 输出摘要
print(gwr_results.summary())


Model type                                                         Gaussian
Number of observations:                                                1000
Number of covariates:                                                     4

Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares:                                       49319216.142
Log-likelihood:                                                   -6821.973
AIC:                                                              13651.946
AICc:                                                             13654.006
BIC:                                                           49312336.018
R2:                                                                   0.093
Adj. R2:                                                              0.090

Variable                              Est.         SE  t(Est/SE)    p-value
------------------------------- ---------- ---------- ------

In [3]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import numpy as np
from mgwr.gwr import GWR
from mgwr.sel_bw import Sel_BW
from sklearn.preprocessing import StandardScaler

# 加载并投影数据
df = pd.read_csv("UK_airbnb.csv")
df = df[df['price'] > 0]
geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs='EPSG:4326').to_crs(epsg=27700)

# 选择变量并清理缺失
fields = ['price', 'reviews_per_month', 'availability_365', 'calculated_host_listings_count']
gdf_ols = gdf[fields + ['geometry']].dropna().copy()

# 抽样 1000 条记录
gdf_sample = gdf_ols.sample(n=1000, random_state=42)

# 准备变量
y = gdf_sample[['price']].values
X = gdf_sample[['reviews_per_month', 'availability_365', 'calculated_host_listings_count']].values
coords = np.array([(geom.x, geom.y) for geom in gdf_sample.geometry])

# 标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 自动选择带宽
selector = Sel_BW(coords, y, X_scaled)
bw_optimal = selector.search()
print("optimal bandwidth：", bw_optimal)

# 拟合 GWR 模型
gwr_model = GWR(coords, y, X_scaled, bw=bw_optimal)
gwr_results = gwr_model.fit()

# 输出摘要
print(gwr_results.summary())


optimal bandwidth： 162.0
Model type                                                         Gaussian
Number of observations:                                                1000
Number of covariates:                                                     4

Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares:                                       49319216.142
Log-likelihood:                                                   -6821.973
AIC:                                                              13651.946
AICc:                                                             13654.006
BIC:                                                           49312336.018
R2:                                                                   0.093
Adj. R2:                                                              0.090

Variable                              Est.         SE  t(Est/SE)    p-value
------------------------------- ---