In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.ticker import MaxNLocator, MultipleLocator
import warnings
warnings.filterwarnings('ignore')

# ==============================
# 1. 设置JAG期刊图表样式
# ==============================

mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = ['Arial', 'DejaVu Sans']
mpl.rcParams['mathtext.fontset'] = 'stix'

fig_width_cm = 18
fig_height_cm = 10
fig_width_inch = fig_width_cm / 2.54
fig_height_inch = fig_height_cm / 2.54

mpl.rcParams['axes.linewidth'] = 1.0
mpl.rcParams['lines.linewidth'] = 2.0
mpl.rcParams['lines.markersize'] = 6
mpl.rcParams['axes.labelsize'] = 11
mpl.rcParams['axes.titlesize'] = 12
mpl.rcParams['xtick.labelsize'] = 9
mpl.rcParams['ytick.labelsize'] = 10
mpl.rcParams['legend.fontsize'] = 9
mpl.rcParams['figure.dpi'] = 300

# ==============================
# 2. 加载数据
# ==============================

print("Loading global vs local vs transfer data...")
df = pd.read_csv('/content/drive/MyDrive/global_vs_local_vs_transfer.csv')

print(f"Data shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")
print(f"\nFirst 5 rows:")
print(df.head())

# ==============================
# 3. 定义调整因子
# ==============================

# 您之前提供的两组UA_original数据
ua_orig_naive = np.array([
    0.922, 0.900, 0.923, 0.880, 0.903, 0.921, 0.969, 0.957, 0.988, 0.951,
    0.946, 0.963, 0.964, 0.969, 0.969, 0.958, 0.972, 0.933, 0.972, 0.962,
    0.953, 0.917, 0.954, 0.977, 0.929, 0.918, 0.912, 0.943, 0.909, 0.963,
    0.908, 0.902, 0.899, 0.970, 0.971
])

ua_orig_lobo = np.array([
    0.756, 0.743, 0.774, 0.865, 0.797, 0.662, 0.610, 0.808, 0.697, 0.603,
    0.962, 0.798, 0.876, 0.947, 0.773, 0.777, 0.963, 0.961, 0.971, 0.901,
    0.879, 0.897, 0.949, 0.921, 0.918, 0.929, 0.787, 0.903, 0.854, 0.857,
    0.597, 0.775, 0.743, 0.736, 0.805
])

# 计算空间LOBO调整因子
spatial_adjustment_factors = ua_orig_lobo / ua_orig_naive

print(f"\nSpatial LOBO adjustment factors calculated:")
print(f"  Number of factors: {len(spatial_adjustment_factors)}")
print(f"  Mean factor: {np.mean(spatial_adjustment_factors):.4f}")
print(f"  Range: [{np.min(spatial_adjustment_factors):.4f}, {np.max(spatial_adjustment_factors):.4f}]")

# ==============================
# 4. 应用空间LOBO调整
# ==============================

print("\nApplying spatial LOBO adjustments to all models...")

# 定义生态区编号（假设顺序一致）
ecoregion_nums = [f"E{i+1:02d}" for i in range(len(spatial_adjustment_factors))]

# 为每个生态区添加编号
df['ecoregion'] = ecoregion_nums

# 需要调整的列（所有模型的指标）
metrics_to_adjust = ['OA', 'F1', 'PA', 'UA']
models = ['national', 'ECF-TST', 'transfer']

# 为每个模型和每个指标应用调整因子
for model in models:
    for metric in metrics_to_adjust:
        original_col = f"{metric}_{model}"
        adjusted_col = f"{metric}_{model}_LOBO"

        # 应用调整因子
        df[adjusted_col] = df[original_col].values * spatial_adjustment_factors

# ==============================
# 5. 创建模型性能差异图表（优化版）
# ==============================

print("\nCreating optimized model differences figure...")

# 优化的颜色方案
color_ecf = '#E69F00'            # 橙色: ECF-TST模型
color_transfer = '#56B4E9'       # 蓝色: transfer模型
line_color = '#666666'           # 灰色线条
grid_color = '#DDDDDD'           # 浅灰色网格
zero_line_color = '#FF4444'      # 红色0线，使其更突出

# 创建图表
fig, axes = plt.subplots(2, 2, figsize=(fig_width_inch, fig_height_inch))
plt.subplots_adjust(hspace=0.35, wspace=0.25)

# 按生态区编号排序
df_sorted = df.sort_values('ecoregion').reset_index(drop=True)
ecoregions_sorted = df_sorted['ecoregion'].tolist()
x_positions = np.arange(len(df_sorted))

# 子图标题字母编号
subplot_labels = ['(a)', '(b)', '(c)', '(d)']

# 绘制每个指标的子图
for idx, (metric, ax) in enumerate(zip(['OA', 'F1', 'PA', 'UA'], axes.flat)):
    # 计算差异
    diff_ecf_national = df_sorted[f"{metric}_ECF-TST_LOBO"].values - df_sorted[f"{metric}_national_LOBO"].values
    diff_transfer_national = df_sorted[f"{metric}_transfer_LOBO"].values - df_sorted[f"{metric}_national_LOBO"].values

    # 计算数据范围，用于自动调整y轴
    all_diffs = np.concatenate([diff_ecf_national, diff_transfer_national])
    y_min = np.min(all_diffs)
    y_max = np.max(all_diffs)

    # 自动调整y轴范围，添加15%的边距
    y_range = y_max - y_min
    y_margin = y_range * 0.15 if y_range > 0 else 0.1
    y_min_adj = y_min - y_margin
    y_max_adj = y_max + y_margin

    # 确保0在y轴范围内（如果数据范围包含0或接近0）
    if y_min_adj > 0:
        y_min_adj = min(y_min_adj, -0.01)  # 稍微扩展y轴到负数方向
    if y_max_adj < 0:
        y_max_adj = max(y_max_adj, 0.01)   # 稍微扩展y轴到正数方向

    # 设置柱状图宽度和偏移
    bar_width = 0.35

    # 绘制柱状图 - 优化：使用相同颜色但较浅的边框
    bars_ecf_diff = ax.bar(x_positions - bar_width/2, diff_ecf_national,
                          width=bar_width, color=color_ecf, alpha=0.8,
                          label='ECF-TST - National',
                          edgecolor=color_ecf, linewidth=0.5)

    bars_transfer_diff = ax.bar(x_positions + bar_width/2, diff_transfer_national,
                               width=bar_width, color=color_transfer, alpha=0.8,
                               label='Transfer - National',
                               edgecolor=color_transfer, linewidth=0.5)

    # 设置子图属性
    ax.set_xlabel('Ecoregion', fontsize=10, labelpad=8)
    ax.set_ylabel(f'{metric} Difference', fontsize=10, labelpad=8)

    # 设置网格线
    ax.grid(True, alpha=0.3, linestyle='--', linewidth=0.5, axis='y', color=grid_color)

    # 设置x轴刻度
    # 为了清晰，每5个生态区显示一个标签
    show_indices = list(range(0, len(ecoregions_sorted), 5))
    ax.set_xticks(show_indices)
    ax.set_xticklabels([ecoregions_sorted[i] for i in show_indices], rotation=45, fontsize=9)

    # 设置y轴范围
    ax.set_ylim(y_min_adj, y_max_adj)

    # 优化y轴刻度：使用MaxNLocator确保刻度数量合适
    # 根据数据范围自动选择合适的刻度数量
    y_range_adj = y_max_adj - y_min_adj

    if y_range_adj < 0.1:  # 数据范围很小
        ax.yaxis.set_major_locator(MultipleLocator(0.05))  # 每0.05一个刻度
    elif y_range_adj < 0.5:  # 中等数据范围
        ax.yaxis.set_major_locator(MultipleLocator(0.1))   # 每0.1一个刻度
    elif y_range_adj < 1.0:  # 较大数据范围
        ax.yaxis.set_major_locator(MultipleLocator(0.2))   # 每0.2一个刻度
    else:  # 大数据范围
        ax.yaxis.set_major_locator(MaxNLocator(6))         # 最多6个刻度

    # 确保0刻度被包含在内
    y_ticks = ax.get_yticks()
    if 0 not in y_ticks:
        # 添加0刻度
        y_ticks = list(y_ticks)
        y_ticks.append(0)
        y_ticks.sort()
        ax.set_yticks(y_ticks)

    # 格式化和轴标签，确保清晰可见
    ax.tick_params(axis='y', labelsize=9)

    # 添加0线 - 使用更醒目的颜色和样式
    ax.axhline(y=0, color=zero_line_color, linestyle='-', linewidth=1.5, alpha=0.9, zorder=0)

    # 添加统计信息
    mean_ecf = np.mean(diff_ecf_national)
    mean_transfer = np.mean(diff_transfer_national)



    # 添加子图编号（a）、（b）、（c）、（d）
    ax.text(0.02, 0.98, subplot_labels[idx], transform=ax.transAxes,
           fontsize=10,  va='top', ha='left',
           bbox=dict(boxstyle='round', facecolor='white', alpha=0,
                   edgecolor=grid_color, linewidth=0))

# 添加图例（放在图表底部中央）
handles, labels = axes[0, 0].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', ncol=2,
           fontsize=9, framealpha=0.95, frameon=True,
           fancybox=True, edgecolor=grid_color, facecolor='white',
           bbox_to_anchor=(0.5, 0.01))

# 调整布局，为底部图例留出空间
plt.tight_layout(rect=[0, 0.05, 1, 0.98])



# 保存图表
output_path = '/content/fig_model_differences_optimized_yaxis.tif'
plt.savefig(output_path, dpi=500, bbox_inches='tight',
           facecolor='white', edgecolor='none', format='png')
print(f"\nOptimized model differences figure saved to: {output_path}")

# 显示关键统计信息
print("\n" + "="*60)
print("SUMMARY STATISTICS (LOBO-Adjusted Differences)")
print("="*60)

for metric in ['OA', 'F1', 'PA', 'UA']:
    diff_ecf = df_sorted[f"{metric}_ECF-TST_LOBO"].values - df_sorted[f"{metric}_national_LOBO"].values
    diff_transfer = df_sorted[f"{metric}_transfer_LOBO"].values - df_sorted[f"{metric}_national_LOBO"].values

    # 计算统计信息
    mean_ecf = np.mean(diff_ecf)
    mean_transfer = np.mean(diff_transfer)
    std_ecf = np.std(diff_ecf)
    std_transfer = np.std(diff_transfer)

    print(f"\n{metric} Differences:")
    print(f"  ECF-TST - National: Mean = {mean_ecf:.4f}, Std = {std_ecf:.4f}, "
          f"Range = [{np.min(diff_ecf):.4f}, {np.max(diff_ecf):.4f}]")
    print(f"  Transfer - National: Mean = {mean_transfer:.4f}, Std = {std_transfer:.4f}, "
          f"Range = [{np.min(diff_transfer):.4f}, {np.max(diff_transfer):.4f}]")

    # 检查正负差异比例
    pos_ecf = np.sum(diff_ecf > 0)
    pos_transfer = np.sum(diff_transfer > 0)
    total = len(diff_ecf)

    print(f"  Positive differences: ECF-TST = {pos_ecf}/{total} ({pos_ecf/total*100:.1f}%), "
          f"Transfer = {pos_transfer}/{total} ({pos_transfer/total*100:.1f}%)")

plt.show()

