In [2]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
import sklearn
from sklearn.metrics import mean_squared_error, r2_score

print("sklearn version:", sklearn.__version__)
print("mean_squared_error location:", mean_squared_error.__module__)

import matplotlib.pyplot as plt

# ================== 数据读取 ==================
data_path = r"E:\Dissertation\CASA0004\merged_no_geom_unique_with_correct_homo_manual_clean_noNA_normaldens_forXGBOOST.csv"
df = pd.read_csv(data_path)

# ================== 特征选择 ==================
features = [
    "pop_density_x",
    "estrato_1", "estrato_2", "estrato_3", "estrato_4", "estrato_5", "estrato_6",
    "street_total_length", "street_ratio_CL", "street_ratio_KR", "street_ratio_AC",
    "fence_ratio", "wall_ratio", "road_ratio", "sidewalk_ratio",
    "building_ratio", "vegetation_ratio", "sky_ratio",
    "person_count", "motorcyclist_count", "street_light_count"
]

target = "murder_rate_per_1000"

# 取自变量和因变量
X = df[features]
y = df[target]

# ================== 训练/测试集划分 ==================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ================== 训练 XGBoost ==================
model = xgb.XGBRegressor(
    objective="reg:squarederror",
    n_estimators=500,
    learning_rate=0.05,
    max_depth=5,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

model.fit(X_train, y_train)

# ================== 预测与评估 ==================
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

print(f"测试集 RMSE: {rmse:.4f}")
print(f"测试集 R²:   {r2:.4f}")

# ================== 特征重要性可视化 ==================
plt.figure(figsize=(10, 6))
xgb.plot_importance(model, importance_type="gain", height=0.5, max_num_features=15)
plt.title("XGBoost 特征重要性（基于 Gain）")
plt.show()


sklearn version: 1.7.1
mean_squared_error location: sklearn.metrics._regression


TypeError: got an unexpected keyword argument 'squared'

In [9]:
print("mean_squared_error location:", mean_squared_error.__module__)

mean_squared_error location: sklearn.metrics._regression


In [7]:
print(sklearn.__version__)

1.7.0


In [10]:
import inspect
print(inspect.getsourcefile(mean_squared_error))

c:\Users\wengqc\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\utils\_param_validation.py


In [11]:
import inspect
from sklearn.metrics import mean_squared_error

print(inspect.signature(mean_squared_error))


(y_true, y_pred, *, sample_weight=None, multioutput='uniform_average')
