In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Patch
from scipy import stats
import re
import os

# ================== 可调节参数 ==================
# 字体大小设置
TITLE_FONTSIZE = 18       # 主标题字体大小
AXIS_LABEL_FONTSIZE = 16  # 坐标轴标签字体大小
TICK_LABEL_FONTSIZE = 14  # 刻度标签字体大小
LEGEND_FONTSIZE = 20      # 图例字体大小
PIE_FONTSIZE = 12         # 饼图字体大小

# 线条粗细
LINE_WIDTH = 4.0          # 主要线条粗细
TRENDLINE_WIDTH = 3.5     # 趋势线粗细
BAR_EDGEWIDTH = 1.0       # 柱状图边框粗细

# 图表尺寸
COMBO_FIGSIZE = (22, 9)   # 组合图尺寸
TS_FIGSIZE = (16, 8)      # 时间序列图尺寸

# DPI设置
DPI_VALUE = 500           # 高分辨率DPI

# 颜色设置（确保图1和图2颜色一致）
COLORS = {
    'Both': '#2E86AB',          # 深蓝色
    'Hansen only': '#A23B72',   # 紫红色
    'ECF-TST only': '#F18F01',  # 橙色
    'Neither': 'white',         # 浅灰色
    'Hansen line': '#2E86AB',   # Hansen折线颜色
    'ECF-TST line': '#F18F01'   # ECF-TST折线颜色
}

# 时间段设置
PERIODS = [(2001, 2008), (2008, 2016), (2016, 2020)]
PERIOD_COLORS = ['#FFE5D9', '#D4EDDA', '#D6EAF8']  # 时间段背景色
PERIOD_NAMES = ['2001-2008', '2008-2016', '2016-2020']

# 设置SCI期刊常用字体和样式
plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica', 'DejaVu Sans']
plt.rcParams['font.size'] = 10
plt.rcParams['axes.linewidth'] = 0.5
plt.rcParams['xtick.major.width'] = 0.5
plt.rcParams['ytick.major.width'] = 0.5
plt.rcParams['lines.linewidth'] = 1.5
sns.set_style("whitegrid")

# ================== 数据读取与处理 ==================
# 1. 读取数据
annual_data = pd.read_csv('/content/drive/MyDrive/Forest_Comparison_Paper/annual_loss_statistics.csv')
ecoregion_data = pd.read_csv('/content/drive/MyDrive/Forest_Comparison_Paper/forest_loss_statistics_eco_regions_corrected2.csv')

# 2. 映射生态区名称到标准生态区代码
ecoregion_mapping = {
    'A01': 'I01', 'A02': 'I02', 'A03': 'I03', 'A04': 'I04', 'A05': 'I05',
    'A07': 'I07', 'I-08': 'I08', 'I-10': 'I10', 'A11': 'I11', 'I-12': 'I12',
    'I-14': 'I14', 'A15': 'I15', 'I-16': 'I16', 'I-17': 'I17', 'A18': 'I18',
    'I-19': 'I19', 'I-20': 'I20', 'I-21': 'I21', 'A22': 'I22', 'A23': 'I23',
    'A24': 'I24', 'I-25': 'I25', 'A26': 'I26', 'A27': 'I27', 'I-28': 'I28',
    'I-29': 'I29', 'A30': 'I30', 'A31': 'I31', 'A32': 'I32', 'A33': 'I33',
    'II-01': 'II01', 'II-05': 'II05', 'II-07': 'II07',
    'III-07': 'III07', 'III-09': 'III09'
}

ecoregion_data['ecoregion_code'] = ecoregion_data['eco_region_name'].map(ecoregion_mapping)

# 3. 按气候带对生态区进行排序
climate_zone_order = {
    'I01': 'Cold temperate', 'I02': 'Mid temperate', 'I03': 'Mid temperate',
    'I04': 'Mid temperate', 'I05': 'Mid temperate', 'I07': 'Mid to warm temperate transition',
    'I08': 'Warm temperate', 'I10': 'Warm temperate', 'I11': 'Warm temperate',
    'I12': 'Warm temperate semi-arid', 'I14': 'Northern subtropical',
    'I15': 'Transitional subtropical', 'I16': 'Central subtropical',
    'I17': 'Central subtropical', 'I18': 'Central subtropical',
    'I19': 'Central subtropical', 'I20': 'Central subtropical',
    'I21': 'Central subtropical', 'I22': 'Central subtropical',
    'I23': 'Central subtropical', 'I24': 'Central subtropical',
    'I25': 'Central subtropical', 'I26': 'Southern subtropical',
    'I27': 'Southern subtropical', 'I28': 'Southern subtropical',
    'I29': 'Southern subtropical', 'I30': 'Tropical monsoon',
    'I31': 'Tropical monsoon', 'I32': 'Tropical maritime',
    'I33': 'Tropical montane', 'II01': 'Temperate continental arid',
    'II05': 'Cold/mid temperate', 'II07': 'Mid temperate continental',
    'III07': 'Plateau cold temperate', 'III09': 'Plateau tropical monsoon'
}

ecoregion_data['climate_zone'] = ecoregion_data['ecoregion_code'].map(climate_zone_order)

# 创建排序键
def create_sort_key(row):
    zone = row['climate_zone']
    code = row['ecoregion_code']
    zone_order = [
        'Cold temperate', 'Mid temperate', 'Mid to warm temperate transition',
        'Warm temperate', 'Warm temperate semi-arid', 'Northern subtropical',
        'Transitional subtropical', 'Central subtropical', 'Southern subtropical',
        'Tropical monsoon', 'Tropical maritime', 'Tropical montane',
        'Temperate continental arid', 'Cold/mid temperate', 'Mid temperate continental',
        'Plateau cold temperate', 'Plateau tropical monsoon'
    ]
    zone_index = zone_order.index(zone) if zone in zone_order else len(zone_order)
    match = re.search(r'\d+', code)
    num = int(match.group()) if match else 0
    prefix = re.search(r'[A-Z]+', code)
    prefix_str = prefix.group() if prefix else ''
    return (zone_index, prefix_str, num)

ecoregion_data['sort_key'] = ecoregion_data.apply(create_sort_key, axis=1)
ecoregion_data_sorted = ecoregion_data.sort_values('sort_key')

# 创建保存目录（如果不存在）
save_dir = '/content/drive/MyDrive/Forest_Comparison_Paper/High_DPI_Figures'
os.makedirs(save_dir, exist_ok=True)

# ================== 图1+图2：饼图和柱状图组合图 ==================
# 创建组合图
fig_combined, ax2 = plt.subplots(figsize=(22, 10))  # 增大尺寸以适应高DPI

# ========== 图2：堆积柱状图（主图） ==========
x = np.arange(len(ecoregion_data_sorted))
width = 0.8

# 底部开始：Neither
bottom = np.zeros(len(ecoregion_data_sorted))
bars_neither = ax2.bar(x, ecoregion_data_sorted['area_no_loss_percent'], width,
                      color=COLORS['Neither'], edgecolor='black',
                      linewidth=BAR_EDGEWIDTH)

# ECF-TST only
bottom += ecoregion_data_sorted['area_no_loss_percent']
bars_ecf = ax2.bar(x, ecoregion_data_sorted['area_multi_only_percent'], width,
                   bottom=bottom, color=COLORS['ECF-TST only'],
                   edgecolor='black', linewidth=BAR_EDGEWIDTH)

# Hansen only
bottom += ecoregion_data_sorted['area_multi_only_percent']
bars_hansen = ax2.bar(x, ecoregion_data_sorted['area_hansen_only_percent'], width,
                      bottom=bottom, color=COLORS['Hansen only'],
                      edgecolor='black', linewidth=BAR_EDGEWIDTH)

# Both (顶部)
bottom += ecoregion_data_sorted['area_hansen_only_percent']
bars_both = ax2.bar(x, ecoregion_data_sorted['area_both_percent'], width,
                    bottom=bottom, color=COLORS['Both'],
                    edgecolor='black', linewidth=BAR_EDGEWIDTH)

# 设置坐标轴标签
ax2.set_xlabel('Ecoregion', fontsize=26, fontweight='bold')
ax2.set_ylabel('Percentage (%)', fontsize=26, fontweight='bold')

# 设置x轴刻度标签（生态区代码）
ax2.set_xticks(x)
ax2.set_xticklabels(ecoregion_data_sorted['ecoregion_code'], rotation=90,
                   ha='center', fontsize=24)

# 设置y轴刻度标签
ax2.tick_params(axis='x', labelsize=24)
ax2.tick_params(axis='y', labelsize=24)

# 添加气候带分隔线
prev_zone = None
for i, (idx, row) in enumerate(ecoregion_data_sorted.iterrows()):
    current_zone = row['climate_zone']
    if prev_zone is not None and current_zone != prev_zone:
        ax2.axvline(x=i-0.5, color='gray', linestyle='-', linewidth=0.5, alpha=0.5)
    prev_zone = current_zone

# ========== 图2图例（放在右下角） ==========
legend_elements = [
    Patch(facecolor=COLORS['Both'], edgecolor='black', label='Both'),
    Patch(facecolor=COLORS['Hansen only'], edgecolor='black', label='Hansen only'),
    Patch(facecolor=COLORS['ECF-TST only'], edgecolor='black', label='ECF-TST only'),
    Patch(facecolor=COLORS['Neither'], edgecolor='black', label='Neither')
]
ax2.legend(handles=legend_elements, loc='lower right',
           frameon=True, fontsize=22,
           title='Classification', title_fontsize=22)

# ========== 图1：饼图（放在左下角） ==========
# 在左下角创建inset axes用于饼图
ax_pie = ax2.inset_axes([-0.08, 0.02, 0.45, 0.45])  # [x, y, width, height]

# 计算平均百分比用于饼图
mean_both = ecoregion_data['area_both_percent'].mean()
mean_hansen_only = ecoregion_data['area_hansen_only_percent'].mean()
mean_multi_only = ecoregion_data['area_multi_only_percent'].mean()
mean_no_loss = ecoregion_data['area_no_loss_percent'].mean()

# 饼图数据
pie_labels = ['Both', 'Hansen only', 'ECF-TST only', 'Neither']
pie_sizes = [mean_both, mean_hansen_only, mean_multi_only, mean_no_loss]
pie_colors = [COLORS[label] for label in pie_labels]

# 绘制饼图（不显示图例，只显示百分比）
wedges, texts, autotexts = ax_pie.pie(pie_sizes, colors=pie_colors,
                                       autopct='%1.1f%%', startangle=90,
                                       textprops={'fontsize': 20,
                                                 'color': 'black'})

# 为饼图添加边框
for wedge in wedges:
    wedge.set_edgecolor('black')
    wedge.set_linewidth(0.8)  # 边框宽度

# 设置饼图标题
ax_pie.set_title('Mean confusion matrix',
                 fontsize=20, fontweight='bold', pad=5, y=0,bbox=dict(facecolor='white', alpha=0.8,
                          edgecolor='white', boxstyle='round,pad=0.5'))

# 确保饼图是圆形
ax_pie.set_aspect('equal')

# ========== 添加网格等设置 ==========
# 添加网格
ax2.grid(True, axis='y', linestyle='--', alpha=0.7)
ax2.spines['top'].set_visible(False)
ax2.spines['right'].set_visible(False)

plt.tight_layout()

# 保存组合图到Google云盘（500 DPI）
combined_chart_path = os.path.join(save_dir, 'Figure_1_2_combined_500dpi.png')
plt.savefig(combined_chart_path, dpi=DPI_VALUE, bbox_inches='tight', facecolor='white', edgecolor='none')
print(f"组合图已保存到: {combined_chart_path}")
plt.show()

# ================== 图3：全国年度森林损失时间序列图（带分段趋势线） ==================
fig3, ax3 = plt.subplots(figsize=(20, 10))  # 增大尺寸以适应高DPI

# 绘制折线（加粗）
hansen_line, = ax3.plot(annual_data['year'], annual_data['hansen_loss_area_km2'],
                        marker='o', label='Hansen', linewidth=LINE_WIDTH,
                        markersize=14, color=COLORS['Hansen line'], markeredgecolor='black')
ecf_line, = ax3.plot(annual_data['year'], annual_data['multi_loss_area_km2'],
                     marker='s', label='ECF-TST', linewidth=LINE_WIDTH,
                     markersize=14, color=COLORS['ECF-TST line'], markeredgecolor='black')

# 计算并绘制每个时间段的趋势线
trendline_equations = []  # 存储趋势线方程

for i, (start_year, end_year) in enumerate(PERIODS):
    # 添加背景色
    ax3.axvspan(start_year - 0.5, end_year + 0.5, alpha=0.3,
                color=PERIOD_COLORS[i], label=PERIOD_NAMES[i])

    # 筛选时间段数据
    period_mask = (annual_data['year'] >= start_year) & (annual_data['year'] <= end_year)
    period_data = annual_data[period_mask]

    if len(period_data) > 1:
        # 计算Hansen趋势线和增长率
        hansen_x = period_data['year'].values
        hansen_y = period_data['hansen_loss_area_km2'].values
        hansen_slope, hansen_intercept, hansen_r_value, hansen_p_value, hansen_std_err = stats.linregress(hansen_x, hansen_y)
        hansen_trend = hansen_slope * hansen_x + hansen_intercept
        hansen_growth_rate = hansen_slope * len(hansen_x) / hansen_y.mean() * 100 if hansen_y.mean() > 0 else 0

        # 计算ECF-TST趋势线和增长率
        ecf_y = period_data['multi_loss_area_km2'].values
        ecf_slope, ecf_intercept, ecf_r_value, ecf_p_value, ecf_std_err = stats.linregress(hansen_x, ecf_y)
        ecf_trend = ecf_slope * hansen_x + ecf_intercept
        ecf_growth_rate = ecf_slope * len(hansen_x) / ecf_y.mean() * 100 if ecf_y.mean() > 0 else 0

        # 存储趋势线方程
        trendline_equations.append({
            'period': PERIOD_NAMES[i],
            'hansen_growth': hansen_growth_rate,
            'ecf_growth': ecf_growth_rate
        })

        # 绘制趋势线（加粗）
        ax3.plot(hansen_x, hansen_trend, '--', color=COLORS['Hansen line'],
                 alpha=0.8, linewidth=TRENDLINE_WIDTH, label=f'Hansen trend ({PERIOD_NAMES[i]})')
        ax3.plot(hansen_x, ecf_trend, '--', color=COLORS['ECF-TST line'],
                 alpha=0.8, linewidth=TRENDLINE_WIDTH, label=f'ECF-TST trend ({PERIOD_NAMES[i]})')

        # 标注增长率
        mid_year = (start_year + end_year) / 2
        y_pos = max(period_data['hansen_loss_area_km2'].max(),
                   period_data['multi_loss_area_km2'].max()) * 0.85

        # 创建文本框，只显示增长率
        text_box = f'Hansen: {hansen_growth_rate:.1f}%\n' \
                   f'ECF-TST: {ecf_growth_rate:.1f}%'

        ax3.text(mid_year, y_pos, text_box, ha='center', va='center',
                 fontsize=22, fontweight='bold', color='#A23B72',
                 bbox=dict(boxstyle="round,pad=0.6", facecolor="white",
                           alpha=0.8, edgecolor='gray'))

# 设置坐标轴标签
ax3.set_xlabel('Year', fontsize=26, fontweight='bold')
ax3.set_ylabel('Annual forest loss area (km²)', fontsize=26, fontweight='bold')

# 设置x轴刻度 - 显示所有年份，确保为整数
all_years = annual_data['year'].tolist()
ax3.set_xticks(all_years)
ax3.set_xticklabels([str(int(year)) for year in all_years],
                    rotation=45, ha='right', fontsize=24)

# 设置x轴范围
ax3.set_xlim(2000.5, 2020.5)

# 设置y轴刻度标签
ax3.tick_params(axis='x', labelsize=24)
ax3.tick_params(axis='y', labelsize=24)

# ========== 图3图例（放在右下角） ==========
lines = [hansen_line, ecf_line]
period_patches = [Patch(facecolor=PERIOD_COLORS[i], alpha=0.3, label=PERIOD_NAMES[i])
                  for i in range(3)]
all_handles = lines + period_patches

ax3.legend(handles=all_handles, loc='lower right', frameon=True, fontsize=22)

# 添加网格
ax3.grid(True, axis='both', linestyle='--', alpha=0.7)
ax3.spines['top'].set_visible(False)
ax3.spines['right'].set_visible(False)

plt.tight_layout()

# 保存时间序列图到Google云盘（500 DPI）
time_series_path = os.path.join(save_dir, 'Figure_3_time_series_500dpi.png')
plt.savefig(time_series_path, dpi=DPI_VALUE, bbox_inches='tight', facecolor='white', edgecolor='none')
print(f"时间序列图已保存到: {time_series_path}")
plt.show()

# ================== 输出关键统计信息 ==================
print("="*80)
print("SUMMARY STATISTICS")
print("="*80)

# 计算累计损失
hansen_cumulative = annual_data['hansen_loss_area_km2'].sum()
ecf_cumulative = annual_data['multi_loss_area_km2'].sum()

print(f"\nCumulative forest loss (2001-2020):")
print(f"Hansen dataset: {hansen_cumulative:,.0f} km²")
print(f"ECF-TST dataset: {ecf_cumulative:,.0f} km²")
print(f"Difference: {ecf_cumulative - hansen_cumulative:,.0f} km² ({((ecf_cumulative/hansen_cumulative)-1)*100:.1f}%)")

print(f"\nMean confusion matrix statistics:")
print(f"Both datasets detected: {mean_both:.2f}%")
print(f"Hansen only: {mean_hansen_only:.2f}%")
print(f"ECF-TST only: {mean_multi_only:.2f}%")
print(f"Neither detected: {mean_no_loss:.2f}%")

print(f"\nTrendline growth rates by period:")
for eq in trendline_equations:
    print(f"\n{eq['period']}:")
    print(f"  Hansen growth rate: {eq['hansen_growth']:.1f}%")
    print(f"  ECF-TST growth rate: {eq['ecf_growth']:.1f}%")

# 计算各生态区的检测差异
ecoregion_data_sorted['hansen_total'] = ecoregion_data_sorted['area_both_km2'] + ecoregion_data_sorted['area_hansen_only_km2']
ecoregion_data_sorted['ecf_total'] = ecoregion_data_sorted['area_both_km2'] + ecoregion_data_sorted['area_multi_only_km2']
ecoregion_data_sorted['diff_percent'] = (ecoregion_data_sorted['ecf_total'] - ecoregion_data_sorted['hansen_total']) / ecoregion_data_sorted['hansen_total'] * 100

print(f"\nEcoregions with largest differences (ECF-TST vs Hansen):")
top_diff = ecoregion_data_sorted.nlargest(5, 'diff_percent')[['ecoregion_code', 'diff_percent']]
for idx, row in top_diff.iterrows():
    print(f"  {row['ecoregion_code']}: {row['diff_percent']:.1f}%")

print(f"\nFigures saved with 500 DPI to:")
print(f"1. {combined_chart_path}")
print(f"2. {time_series_path}")
print(f"\nSave directory: {save_dir}")

print(f"\nDPI setting used: {DPI_VALUE}")
print(f"Figure sizes:")
print(f"  Combined chart: 22x10 inches")
print(f"  Time series chart: 20x10 inches")