In [None]:
# 空间分析笔记本

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# 导入自定义模块
import sys
sys.path.append('../')
from src.data.data_loader import DataLoader
from src.models.spatial_model import SpatialModel
from src.visualization.plot_utils import VisualizationUtils

# 1. 数据准备
print("=== 空间分析 ===")

# 加载数据
loader = DataLoader()
panel_data = loader.load_panel_data()

# 使用2023年数据
data_2023 = panel_data[panel_data['年份'] == 2023].copy()

# 获取城市坐标（这里使用模拟坐标，实际应用中应该使用真实坐标）
city_coordinates = {
    '香港': [114.1694, 22.3193],
    '澳门': [113.5491, 22.1987],
    '广州': [113.2644, 23.1291],
    '深圳': [114.0579, 22.5431],
    '珠海': [113.5767, 22.2707],
    '佛山': [113.1224, 23.0215],
    '惠州': [114.4162, 23.1118],
    '东莞': [113.7518, 23.0207],
    '中山': [113.3928, 22.5176],
    '江门': [113.0819, 22.5787],
    '肇庆': [112.4650, 23.0471]
}

# 创建坐标数组
coordinates = []
city_labels = []
for city in data_2023['城市'].unique():
    if city in city_coordinates:
        coordinates.append(city_coordinates[city])
        city_labels.append(city)

coordinates = np.array(coordinates)

print(f"城市数量: {len(coordinates)}")
print(f"坐标形状: {coordinates.shape}")

# 2. 空间模型初始化
print("\n=== 空间模型初始化 ===")
spatial_model = SpatialModel()

# 3. 创建空间权重矩阵
print("\n1. 创建空间权重矩阵...")
weights_matrix = spatial_model.create_weights_matrix(
    coordinates, method='knn', k=3
)

if weights_matrix:
    print(f"权重矩阵创建成功")
    print(f"邻居数量统计: {list(dict(weights_matrix.cardinalities).values())}")

# 4. 空间自相关分析
print("\n2. 空间自相关分析...")

# 选择分析变量
analysis_vars = ['GDP_亿元', '跨境数据传输总量_TB', '研发经费投入_亿元']

for var in analysis_vars:
    if var in data_2023.columns:
        print(f"\n分析变量: {var}")
        
        # 提取数据并排序以匹配坐标顺序
        var_data = []
        for city in city_labels:
            city_value = data_2023[data_2023['城市'] == city][var].values
            if len(city_value) > 0:
                var_data.append(city_value[0])
            else:
                var_data.append(np.nan)
        
        var_data = np.array(var_data)
        
        # 处理缺失值
        valid_indices = ~np.isnan(var_data)
        if valid_indices.sum() > 3:  # 至少需要4个有效值
            valid_data = var_data[valid_indices]
            valid_weights = spatial_model.weights_matrix
            if valid_weights:
                # 全局莫兰指数
                moran_results = spatial_model.calculate_moran_i(valid_data, valid_weights)
                
                if 'error' not in moran_results:
                    print(f"  全局莫兰指数 I = {moran_results['I']:.3f}")
                    print(f"  Z-score = {moran_results['z_norm']:.3f}")
                    print(f"  P-value = {moran_results['p_norm']:.3f}")
                    print(f"  是否显著: {moran_results['significant']}")
                else:
                    print(f"  计算失败: {moran_results['error']}")
        else:
            print(f"  有效数据不足")

# 5. 空间回归分析
print("\n3. 空间回归分析...")

# 准备数据（以GDP为因变量）
y_var = 'GDP_亿元'
x_vars = ['跨境数据传输总量_TB', '研发经费投入_亿元', '5G基站数量']

# 检查变量是否存在
valid_x_vars = [var for var in x_vars if var in data_2023.columns]

if y_var in data_2023.columns and len(valid_x_vars) > 0:
    print(f"因变量: {y_var}")
    print(f"自变量: {valid_x_vars}")
    
    # 提取并匹配数据
    y_data = []
    X_data = []
    valid_cities = []
    
    for city in city_labels:
        city_data = data_2023[data_2023['城市'] == city]
        if not city_data.empty:
            y_value = city_data[y_var].values[0]
            x_values = [city_data[var].values[0] for var in valid_x_vars]
            
            if not np.isnan(y_value) and not any(np.isnan(x) for x in x_values):
                y_data.append(y_value)
                X_data.append(x_values)
                valid_cities.append(city)
    
    if len(y_data) > 5:  # 至少需要6个观测值
        y = np.array(y_data)
        X = np.array(X_data)
        
        print(f"有效观测值: {len(y)}个")
        
        # 拟合空间滞后模型
        print("\n拟合空间滞后模型(SLM)...")
        slm_results = spatial_model.fit_spatial_lag_model(y, X, weights_matrix)
        
        if 'error' not in slm_results:
            print(f"  R平方: {slm_results['r2']:.3f}")
            print(f"  MSE: {slm_results['mse']:.3f}")
            
            # 显示系数
            print("  系数:")
            for i, var in enumerate(valid_x_vars):
                print(f"    {var}: {slm_results['coefficients'][i]:.3f}")
            print(f"    空间滞后项: {slm_results['coefficients'][-1]:.3f}")
        else:
            print(f"  SLM拟合失败: {slm_results['error']}")
        
        # 拟合其他空间模型
        print("\n拟合其他空间模型...")
        all_results = spatial_model.perform_spatial_regression(y, X, weights_matrix)
        
        for model_name, results in all_results.items():
            if 'error' not in results:
                print(f"  {model_name}: R平方 = {results.get('r2', 'N/A')}")
    else:
        print("有效数据不足进行回归分析")

# 6. 可视化
print("\n4. 可视化空间分析结果...")
viz = VisualizationUtils()

# 创建空间分布图
for var in analysis_vars[:2]:  # 只显示前两个变量
    if var in data_2023.columns:
        viz.plot_spatial_distribution(
            data_2023,
            value_col=var,
            year=2023,
            title=f'2023年{var}空间分布',
            save_path=f'../outputs/figures/spatial_{var}.png'
        )

# 7. 获取模型摘要
print("\n5. 获取模型摘要...")
model_summary = spatial_model.get_model_summary()

if not model_summary.empty:
    print("\n空间模型摘要:")
    print(model_summary.to_string(index=False))
    
    # 保存结果
    model_summary.to_csv('../outputs/tables/spatial_models_summary.csv', index=False)
    print("模型摘要已保存至: ../outputs/tables/spatial_models_summary.csv")

# 8. 保存空间分析结果
print("\n=== 保存结果 ===")

# 保存莫兰指数结果
if spatial_model.moran_results:
    moran_df = pd.DataFrame([spatial_model.moran_results.get('global', {})])
    moran_df.to_csv('../outputs/tables/moran_results.csv', index=False)
    print("莫兰指数结果已保存至: ../outputs/tables/moran_results.csv")

# 保存空间回归结果
spatial_results = []
for model_name, results in spatial_model.models.items():
    if 'error' not in results:
        result_summary = {
            '模型': model_name,
            '类型': results.get('model_type', '未知'),
            'R平方': results.get('r2', np.nan),
            'MSE': results.get('mse', np.nan),
            '样本数': results.get('nobs', 0),
            '变量数': results.get('nvar', 0)
        }
        spatial_results.append(result_summary)

if spatial_results:
    spatial_df = pd.DataFrame(spatial_results)
    spatial_df.to_csv('../outputs/tables/spatial_regression_results.csv', index=False)
    print("空间回归结果已保存至: ../outputs/tables/spatial_regression_results.csv")

print("\n=== 空间分析完成 ===")
print(f"生成图表数量: {len(analysis_vars[:2])}张")
print(f"生成表格数量: 3个")
print(f"所有输出已保存至 ../outputs/ 目录")