In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from scipy.interpolate import make_interp_spline

In [None]:
# Load data and prepare GDP matching using fuzzy matching
from difflib import get_close_matches
import re

cost_columns = ["total_cost_0","total_cost_2020","total_cost_2050"]
df = pd.read_csv('output_scenario.csv')

# Load GDP data
gdp_df = pd.read_csv('gdp.csv')

# Extract 2020 GDP per capita data
gdp_2020 = gdp_df[['Country Name', '2020']].copy()
gdp_2020.columns = ['Country', 'GDP_per_capita_2020']
gdp_2020['GDP_per_capita_2020'] = pd.to_numeric(gdp_2020['GDP_per_capita_2020'], errors='coerce')

# Clean country names function
def clean_country_name(name):
    if pd.isna(name):
        return ""
    # Remove extra spaces and convert to lowercase
    name = str(name).strip().lower()
    # Remove common prefixes/suffixes
    name = re.sub(r'\b(republic of|kingdom of|state of|federation of)\b', '', name)
    name = re.sub(r'\s+', ' ', name).strip()
    return name

# Function to fuzzy match country names
def fuzzy_match_country(country, gdp_countries, threshold=0.6):
    if pd.isna(country):
        return None
    
    # Handle multiple countries (separated by semicolon)
    if ';' in str(country):
        countries = [c.strip() for c in str(country).split(';')]
        country = countries[0]  # Use first country
    
    # Clean the country name
    clean_country = clean_country_name(country)
    if not clean_country:
        return None
    
    # Clean GDP country names for matching
    clean_gdp_countries = [clean_country_name(c) for c in gdp_countries]
    clean_to_original = dict(zip(clean_gdp_countries, gdp_countries))
    
    # Try exact match first
    if clean_country in clean_gdp_countries:
        return clean_to_original[clean_country]
    
    # Use fuzzy matching
    matches = get_close_matches(clean_country, clean_gdp_countries, n=1, cutoff=threshold)
    if matches:
        return clean_to_original[matches[0]]
    
    # Try partial matching for common cases
    for clean_gdp, orig_gdp in clean_to_original.items():
        if clean_country in clean_gdp or clean_gdp in clean_country:
            return orig_gdp
    
    return None

# Match countries using fuzzy matching
gdp_countries = list(gdp_2020['Country'].dropna())
df['GDP_Country'] = df['Country'].apply(lambda x: fuzzy_match_country(x, gdp_countries))

# Merge GDP data
df = df.merge(gdp_2020[['Country', 'GDP_per_capita_2020']], 
              left_on='GDP_Country', right_on='Country', 
              how='left', suffixes=('', '_gdp'))

# Calculate per capita costs (cost per person)
df[cost_columns] = df[cost_columns].div(df['pop'], axis=0)

# Calculate EIBC (cost/GDP) - only where GDP data is available
df['EIBC_0'] = df['total_cost_0'] / df['GDP_per_capita_2020']
df['EIBC_2020'] = df['total_cost_2020'] / df['GDP_per_capita_2020']
df['EIBC_2050'] = df['total_cost_2050'] / df['GDP_per_capita_2020']

print(f"Successfully matched GDP data for {df['GDP_per_capita_2020'].notna().sum()} out of {len(df)} countries")
print(f"Countries with missing GDP data: {df[df['GDP_per_capita_2020'].isna()]['Country'].unique()}")

In [None]:
def visualize_cost(data, vmax, column):
    plt.rcParams['font.family'] = 'Times New Roman'
    # 导入必要的模块
    from matplotlib.offsetbox import OffsetImage, AnnotationBbox
    import numpy as np
    from scipy.ndimage import gaussian_filter
    
    # 创建Point几何图形
    data['geometry'] = data.apply(lambda row: Point(row['Long'], row['Lat']), axis=1)
    geo_df = gpd.GeoDataFrame(data, geometry='geometry')
    geo_df.set_crs(epsg=4326, inplace=True)
    
    # 创建带有cartopy投影的图形
    fig = plt.figure(figsize=(14, 6), dpi=500)
    ax = fig.add_subplot(1, 1, 1, projection=ccrs.Robinson())

    # 设置背景和地图特征
    ax.set_facecolor("#FFFEFE")  
    ax.add_feature(cfeature.LAND, color="#CECECE", alpha=0.4)
    ax.add_feature(cfeature.OCEAN, color="#FFFFFF", alpha=0.5)
    ax.add_feature(cfeature.COASTLINE, linewidth=0.7)
    ax.add_feature(cfeature.BORDERS, linestyle=':', linewidth=0.5, alpha=0.3)
    
    # 使用更简单的方法创建密度图
    resolution = 1.0  # 1度分辨率
    lons = np.arange(-180, 181, resolution)
    lats = np.arange(-90, 91, resolution)
    
    # 创建网格点
    lon_grid, lat_grid = np.meshgrid(lons, lats)
    density_grid = np.zeros_like(lon_grid)
    
    # 提取所有点的经纬度和值
    all_lons = geo_df.geometry.x.values
    all_lats = geo_df.geometry.y.values
    all_values = geo_df[column].values
    
    # 使用更适合地理数据的影响半径
    bandwidth = 3.0
    
    # 计算每个网格点的密度值
    for i, row in enumerate(all_lons):
        lon, lat = all_lons[i], all_lats[i]
        value = all_values[i]
        
        # 计算此数据点对网格的影响
        for x in range(len(lats)):
            for y in range(len(lons)):
                grid_lon = lons[y]
                grid_lat = lats[x]
                
                # 计算距离
                dist = np.sqrt((grid_lon - lon)**2 + (grid_lat - lat)**2)
                
                # 只在一定半径内影响网格点
                if dist <= 3 * bandwidth:
                    # 使用高斯核计算权重
                    weight = np.exp(-0.5 * (dist / bandwidth)**2) * value
                    density_grid[x, y] += weight
    
    # 对密度图应用轻微的高斯平滑
    density_grid = gaussian_filter(density_grid, sigma=1.0)
    
    # 设置阈值，只在密度足够高的区域绘制斜线
    threshold = np.max(density_grid) * 0.08
    mask = density_grid < threshold
    
    # 设置上限值，避免极端值影响色彩分布
    # 使用百分位数而不是最大值来设置上限，避免离群值的影响
    upper_limit = np.percentile(density_grid[~mask], 90)  # 使用90%分位数作为上限
    
    # 裁剪过高的值
    density_grid_capped = np.clip(density_grid, 0, upper_limit)
    
    # 创建掩码数组
    density_grid_masked = np.ma.array(density_grid_capped, mask=mask)
    
    # 热力图颜色 - 使用更多颜色变化
    heatmap_colors = ['#012f4830', '#669aba40', '#fbf0d950', '#be142060', '#7a010170']
    heatmap_cmap = mpl.colors.LinearSegmentedColormap.from_list('heatmap_palette', heatmap_colors)
    
    # 创建自定义规范化对象，确保色彩分布均匀
    heatmap_norm = mpl.colors.Normalize(vmin=threshold, vmax=upper_limit)
    
    # 绘制密度图 - 只在有数据的区域绘制斜线
    contour = ax.contourf(
        lon_grid, lat_grid, density_grid_masked,
        transform=ccrs.PlateCarree(),
        cmap=heatmap_cmap,
        norm=heatmap_norm,  # 使用自定义规范化
        levels=8,
        alpha=0.7,
        zorder=2,
        # hatches=['/////', '/////', '/////', '/////', '/////', '/////', '/////', '/////'],
        extend='neither'
    )
    
    # 设置颜色映射
    sci_colors = ['#012f48', '#669aba', '#fbf0d9', '#be1420', '#7a0101']  # From blue to red
    custom_cmap = mpl.colors.LinearSegmentedColormap.from_list('sci_palette', sci_colors)
    norm = mpl.colors.Normalize(vmin=0, vmax=vmax)
    
    # 计算点大小范围
    min_size = 20
    max_size = 50
    
    # 对数据按值排序，保证小值在下层，大值在上层
    geo_df = geo_df.sort_values(by=column)
    
    # 创建一个假散点用于颜色条
    fake_scatter = ax.scatter(
        [-1000], [-1000],  # 不可见位置
        c=[0],
        cmap=custom_cmap,
        vmin=0,
        vmax=vmax,
        s=1
    )
    
    # 为每个点创建并添加标记
    for idx, row in geo_df.iterrows():
        # 获取经纬度和值
        lon, lat = row.geometry.x, row.geometry.y
        value = row[column]
        
        # 计算点大小
        size = min_size + ((value / vmax) * (max_size - min_size))
        
        # 计算颜色
        color = custom_cmap(norm(value))
        
        # 创建临时图形来生成点图像
        temp_fig = plt.figure(figsize=(1, 1), frameon=False, dpi= 300)
        temp_fig.patch.set_alpha(0)  # 透明背景
        
        temp_ax = temp_fig.add_subplot(111)
        temp_ax.set_aspect('equal')
        temp_ax.patch.set_alpha(0)
        
        # 先绘制一个稍大的黑色圆作为边框
        outer_circle = plt.Circle(
            (0.5, 0.5),
            0.18,         # 稍大的半径
            color='black',  # 黑色
            alpha=1
        )
        temp_ax.add_patch(outer_circle)

        # 再绘制一个内圆作为主要颜色区域
        inner_circle = plt.Circle(
            (0.5, 0.5),
            0.15,         # 稍小的半径
            color=color,  # 数据颜色
            alpha=1
        )
        temp_ax.add_patch(inner_circle)
        
        # 设置坐标轴范围
        temp_ax.set_xlim(0, 1)
        temp_ax.set_ylim(0, 1)
        temp_ax.axis('off')  # 隐藏坐标轴
        
        # 渲染并获取图像
        temp_fig.tight_layout(pad=0)
        temp_fig.canvas.draw()
        point_img = np.array(temp_fig.canvas.renderer.buffer_rgba())
        plt.close(temp_fig)
        
        # 将地理坐标转换为投影坐标
        x, y = ax.projection.transform_point(lon, lat, src_crs=ccrs.PlateCarree())
        
        # 计算缩放因子 - 调整点大小
        zoom_factor = np.sqrt(size) / 100
        
        # 创建OffsetImage
        imagebox = OffsetImage(point_img, zoom=zoom_factor)
        imagebox.image.axes = ax
        
        # 创建并添加AnnotationBbox
        ab = AnnotationBbox(
            imagebox,
            (x, y),
            frameon=False,
            pad=0,
            zorder=10  # 确保点在最上层
        )
        ax.add_artist(ab)
    
    # 添加颜色条
    cbar = fig.colorbar(
        fake_scatter,
        ax=ax,
        orientation='horizontal',
        shrink=0.6,  # 控制颜色条长度
        pad=0.03,    # 调整颜色条和图形之间的间距
        aspect=50    # 控制颜色条的宽度（值越小越宽）
    )
    cbar.set_label(f'Per Capita Investment Growth Rate', fontsize=12)
    
    # 设置全球边界
    ax.set_global()
     
    # 移除坐标轴刻度和边框
    ax.set_xticks([])
    ax.set_yticks([])
    for spine in ax.spines.values():
        spine.set_visible(False)
    plt.axis('off')  # 完全关闭坐标轴
    
    # 减少边距
    plt.tight_layout(pad=0)
    
    plt.show()

In [None]:
def create_longitude_profile(data, column, ax=None):
    plt.rcParams['font.family'] = 'Times New Roman'
    """创建经度方向的曲线图"""
    if ax is None:
        fig, ax = plt.subplots(figsize=(14, 1),dpi=500)
    
    # 按经度分组计算平均值
    lon_bins = np.arange(-180, 181, 5)  # 5度一组
    lon_centers = (lon_bins[:-1] + lon_bins[1:]) / 2
    lon_values = []
    
    for i in range(len(lon_bins)-1):
        mask = (data['lon'] >= lon_bins[i]) & (data['lon'] < lon_bins[i+1])
        if mask.any():
            lon_values.append(data.loc[mask, column].mean())
        else:
            lon_values.append(0)  # 用0代替NaN
    
    # 平滑曲线
    if len(lon_centers) > 3:
        lon_smooth = np.linspace(min(lon_centers), max(lon_centers), 300)
        spline = make_interp_spline(lon_centers, lon_values, k=3)
        lon_values_smooth = spline(lon_smooth)
        # 确保没有负值
        lon_values_smooth = np.maximum(lon_values_smooth, 0)
    else:
        lon_smooth = lon_centers
        lon_values_smooth = lon_values
    
    # 绘制曲线
    ax.plot(lon_smooth, lon_values_smooth, color='#7a0101', linewidth=1.5)
    ax.fill_between(lon_smooth, 0, lon_values_smooth, alpha=0.3, color="#a37070")
    
    # 设置范围和网格线
    ax.set_xlim(-180, 180)
    ax.set_ylim(0, None)
    
    # 添加网格线
    # ax.axvline(x=0, color='gray', linestyle='-', alpha=0.3, linewidth=0.5)
    # ax.axvline(x=60, color='gray', linestyle='--', alpha=0.3, linewidth=0.5)
    # ax.axvline(x=120, color='gray', linestyle='--', alpha=0.3, linewidth=0.5)
    # ax.axvline(x=-60, color='gray', linestyle='--', alpha=0.3, linewidth=0.5)
    # ax.axvline(x=-120, color='gray', linestyle='--', alpha=0.3, linewidth=0.5)
    
    # 添加经度标签
    # ax.text(0, -0.05, "0°", transform=ax.transData, ha='center', va='top', fontsize=8)
    # ax.text(60, -0.05, "60°E", transform=ax.transData, ha='center', va='top', fontsize=8)
    # ax.text(120, -0.05, "120°E", transform=ax.transData, ha='center', va='top', fontsize=8)
    # ax.text(-60, -0.05, "60°W", transform=ax.transData, ha='center', va='top', fontsize=8)
    # ax.text(-120, -0.05, "120°W", transform=ax.transData, ha='center', va='top', fontsize=8)
    
    # 美化图表
    ax.set_xticks([])
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    # ax.set_ylabel('Avg. Investment', fontsize=9)
    
    return ax

def create_latitude_profile(data, column, ax=None):
    """创建纬度方向的曲线图"""
    if ax is None:
        fig, ax = plt.subplots(figsize=(2, 10),dpi=500)
    
    # 按纬度分组计算平均值
    lat_bins = np.arange(-90, 91, 2)  # 2度一组
    lat_centers = (lat_bins[:-1] + lat_bins[1:]) / 2
    lat_values = []
    
    for i in range(len(lat_bins)-1):
        mask = (data['lat'] >= lat_bins[i]) & (data['lat'] < lat_bins[i+1])
        if mask.any():
            lat_values.append(data.loc[mask, column].mean())
        else:
            lat_values.append(0)  # 用0代替NaN
    
    # 平滑曲线
    if len(lat_centers) > 3:
        lat_smooth = np.linspace(min(lat_centers), max(lat_centers), 300)
        spline = make_interp_spline(lat_centers, lat_values, k=3)
        lat_values_smooth = spline(lat_smooth)
        # 确保没有负值
        lat_values_smooth = np.maximum(lat_values_smooth, 0)
    else:
        lat_smooth = lat_centers
        lat_values_smooth = lat_values
    
    # 绘制曲线
    ax.plot(lat_values_smooth, lat_smooth, color='#7a0101', linewidth=1.5)
    ax.fill_betweenx(lat_smooth, 0, lat_values_smooth, alpha=0.3, color='#a37070')
    
    # 设置范围和网格线
    ax.set_ylim(-90, 90)
    ax.set_xlim(0, None)
    
    # 添加网格线
    # ax.axhline(y=0, color='gray', linestyle='-', alpha=0.3, linewidth=0.5)
    # ax.axhline(y=30, color='gray', linestyle='--', alpha=0.3, linewidth=0.5)
    # ax.axhline(y=60, color='gray', linestyle='--', alpha=0.3, linewidth=0.5)
    # ax.axhline(y=-30, color='gray', linestyle='--', alpha=0.3, linewidth=0.5)
    # ax.axhline(y=-60, color='gray', linestyle='--', alpha=0.3, linewidth=0.5)
    
    # 添加纬度标签
    # ax.text(-0.05, 0, "0°", transform=ax.transData, ha='right', va='center', fontsize=8)
    # ax.text(-0.05, 30, "30°N", transform=ax.transData, ha='right', va='center', fontsize=8)
    # ax.text(-0.05, 60, "60°N", transform=ax.transData, ha='right', va='center', fontsize=8)
    # ax.text(-0.05, -30, "30°S", transform=ax.transData, ha='right', va='center', fontsize=8)
    # ax.text(-0.05, -60, "60°S", transform=ax.transData, ha='right', va='center', fontsize=8)
    
    # 美化图表
    ax.set_yticks([])
    ax.spines['left'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    # ax.set_xlabel('Avg. Investment', fontsize=9)
    
    return ax

In [None]:
create_longitude_profile(df, 'EIBC_0', ax=None)
create_latitude_profile(df, 'EIBC_0', ax=None)

In [None]:
# Create separate EIBC visualizations for 2020 and 2050
def visualize_eibc_single(data, year, column, vmax=None):
    """Create individual EIBC visualization for a specific year"""
    plt.rcParams['font.family'] = 'Times New Roman'
    from matplotlib.offsetbox import OffsetImage, AnnotationBbox
    import numpy as np
    from scipy.ndimage import gaussian_filter
    
    # Filter data with valid EIBC values
    data_valid = data.dropna(subset=[column])
    
    if len(data_valid) == 0:
        print(f"No valid EIBC data found for {year}!")
        return
    
    print(f"Processing {len(data_valid)} countries for EIBC {year}")
    
    # Set vmax if not provided (use 95th percentile to avoid outliers)
    if vmax is None:
        vmax = np.percentile(data_valid[column], 95)
    
    # Create geometry for plotting
    data_valid = data_valid.copy()
    data_valid['geometry'] = data_valid.apply(lambda row: Point(row['lon'], row['lat']), axis=1)
    geo_df = gpd.GeoDataFrame(data_valid, geometry='geometry')
    geo_df.set_crs(epsg=4326, inplace=True)
    
    # Create figure with Robinson projection
    fig = plt.figure(figsize=(16, 8), dpi=300)
    ax = fig.add_subplot(1, 1, 1, projection=ccrs.Robinson())
    
    # Set background and map features
    ax.set_facecolor("#FFFFFF")  
    ax.add_feature(cfeature.LAND, color="#CECECE", alpha=0.4)
    ax.add_feature(cfeature.OCEAN, color="#FFFFFF", alpha=0.5)
    ax.add_feature(cfeature.COASTLINE, linewidth=0.7)
    ax.add_feature(cfeature.BORDERS, linestyle=':', linewidth=0.5, alpha=0.3)
    
    # Set color mapping - using consistent color scheme
    sci_colors = ['#012f48', '#669aba', '#fbf0d9', '#be1420', '#7a0101']
    custom_cmap = mpl.colors.LinearSegmentedColormap.from_list('sci_palette', sci_colors)
    norm = mpl.colors.Normalize(vmin=0, vmax=vmax)
    
    # Calculate point sizes
    min_size = 25
    max_size = 200
    
    # Sort data to ensure smaller values are plotted first (larger values on top)
    geo_df_sorted = geo_df.sort_values(by=column, ascending=True)
    
    # Create fake scatter for colorbar
    fake_scatter = ax.scatter([-1000], [-1000], c=[0], cmap=custom_cmap, 
                             vmin=0, vmax=vmax, s=1)
    
    # Plot each point with custom styling
    for idx, row in geo_df_sorted.iterrows():
        lon, lat = row.geometry.x, row.geometry.y
        value = row[column]
        
        if pd.isna(value) or value <= 0:
            continue
        
        # Calculate point size based on value
        size_factor = min(value / vmax, 1.0)
        size = min_size + (size_factor * (max_size - min_size))
        color = custom_cmap(norm(value))
        
        # Create custom point with border
        temp_fig = plt.figure(figsize=(1, 1), frameon=False, dpi=200)
        temp_fig.patch.set_alpha(0)
        
        temp_ax = temp_fig.add_subplot(111)
        temp_ax.set_aspect('equal')
        temp_ax.patch.set_alpha(0)
        
        # Draw outer circle (border)
        outer_circle = plt.Circle((0.5, 0.5), 0.18, color='black', alpha=1)
        temp_ax.add_patch(outer_circle)
        
        # Draw inner circle (data color)
        inner_circle = plt.Circle((0.5, 0.5), 0.15, color=color, alpha=1)
        temp_ax.add_patch(inner_circle)
        
        temp_ax.set_xlim(0, 1)
        temp_ax.set_ylim(0, 1)
        temp_ax.axis('off')
        
        temp_fig.tight_layout(pad=0)
        temp_fig.canvas.draw()
        point_img = np.array(temp_fig.canvas.renderer.buffer_rgba())
        plt.close(temp_fig)
        
        # Transform coordinates and add to map
        x, y = ax.projection.transform_point(lon, lat, src_crs=ccrs.PlateCarree())
        zoom_factor = np.sqrt(size) / 80
        
        imagebox = OffsetImage(point_img, zoom=zoom_factor)
        imagebox.image.axes = ax
        
        ab = AnnotationBbox(imagebox, (x, y), frameon=False, pad=0, zorder=10)
        ax.add_artist(ab)
    
    # Add colorbar
    cbar = fig.colorbar(fake_scatter, ax=ax, orientation='horizontal', 
                       shrink=0.6, pad=0.05, aspect=50)
    cbar.set_label(f'EIBC (Energy Investment to GDP Ratio)', fontsize=14)
    
    # Set title
    # ax.set_title(f'Energy Investment Burden Coefficient (EIBC) - {year}', 
    #             fontsize=16, fontweight='bold', pad=20)
    
    # Set global extent and clean up
    ax.set_global()
    ax.set_xticks([])
    ax.set_yticks([])
    for spine in ax.spines.values():
        spine.set_visible(False)
    
    plt.tight_layout()
    plt.show()
    
    # Print statistics
    print(f"\nEIBC {year} Statistics:")
    print(f"  Valid countries: {len(data_valid)}")
    print(f"  Mean EIBC: {data_valid[column].mean():.4f}")
    print(f"  Median EIBC: {data_valid[column].median():.4f}")
    print(f"  95th percentile: {np.percentile(data_valid[column], 95):.4f}")
    print(f"  Max EIBC: {data_valid[column].max():.4f}")
    
    return fig

# Determine consistent vmax for both years
eibc_0_valid = df.dropna(subset=['EIBC_0'])['EIBC_0']
eibc_2020_valid = df.dropna(subset=['EIBC_2020'])['EIBC_2020']
eibc_2050_valid = df.dropna(subset=['EIBC_2050'])['EIBC_2050']

if len(eibc_0_valid) > 0 and len(eibc_2020_valid) > 0 and len(eibc_2050_valid) > 0:
    # Use 95th percentile of combined data for consistent scaling
    combined_eibc = pd.concat([eibc_2020_valid, eibc_2050_valid])
    vmax_consistent = np.percentile(combined_eibc, 95)
    
    print(f"Using consistent vmax: {vmax_consistent:.4f} for both years")
    
    # Create individual visualizations for 2020 and 2050
    print("\n=== Creating EIBC 0 Visualization ===")
    fig_0 = visualize_eibc_single(df, '0', 'EIBC_0', vmax=vmax_consistent)
    
    print("\n=== Creating EIBC 2020 Visualization ===")
    fig_2020 = visualize_eibc_single(df, '2020', 'EIBC_2020', vmax=vmax_consistent)
    
    print("\n=== Creating EIBC 2050 Visualization ===")
    fig_2050 = visualize_eibc_single(df, '2050', 'EIBC_2050', vmax=vmax_consistent)
    
else:
    print("Error: No valid EIBC data found for one or both years!")



In [None]:
# Create scatter plot analysis
def create_eibc_scatter_plot(data):
    """Create EIBC scatter plot: 2020 vs 2050 with different shapes and colors"""
    plt.rcParams['font.family'] = 'Times New Roman'
    
    # Filter valid data for both years
    data_valid = data.dropna(subset=['EIBC_0', 'EIBC_2020'])
    
    if len(data_valid) == 0:
        print("No valid EIBC data found for comparison!")
        return
    
    # Create figure for scatter plot
    fig, ax = plt.subplots(figsize=(14, 6), dpi=300)
    
    # Convert to percentage for display
    eibc_2020_pct = data_valid['EIBC_0'] * 100
    eibc_2050_pct = data_valid['EIBC_2020'] * 100
    
    # Create two different point types based on whether EIBC increased or decreased
    increased_mask = eibc_2050_pct > eibc_2020_pct
    decreased_mask = eibc_2050_pct <= eibc_2020_pct
    
    # Scatter plot with different shapes and colors for increases and decreases
    if increased_mask.any():
        scatter_increase = ax.scatter(eibc_2020_pct[increased_mask], eibc_2050_pct[increased_mask], 
                                     alpha=0.7, s=60, c='#be1420', marker='^', 
                                     edgecolors='black', linewidth=0.5, label='EIBC Increased')
    
    if decreased_mask.any():
        scatter_decrease = ax.scatter(eibc_2020_pct[decreased_mask], eibc_2050_pct[decreased_mask], 
                                     alpha=0.7, s=60, c='#012f48', marker='o', 
                                     edgecolors='black', linewidth=0.5, label='EIBC Decreased')
    
    # Add diagonal line for reference (y=x line)
    max_val = max(eibc_2020_pct.max(), eibc_2050_pct.max())
    ax.plot([0, max_val], [0, max_val], 'k--', alpha=0.5, linewidth=1, label='y=x (No Change)')
    
    # Set labels with percentage
    ax.set_xlabel('EIBC 0 (%)', fontsize=16)
    ax.set_ylabel('EIBC 2020 (%)', fontsize=16)
    # ax.set_title('EIBC: 2020 vs 2050', fontsize=16, fontweight='bold')
    ax.grid(True, alpha=0.3)
    ax.legend(fontsize=16, edgecolor='black')
    ax.tick_params(axis='both', labelsize=14)
    # Set equal aspect ratio and start from 0
    ax.set_xlim(0, max_val * 1.05)
    ax.set_ylim(0, max_val * 1.05)
    
    plt.tight_layout()
    plt.show()
    
    return fig

# Create change analysis with focused view
def create_eibc_change_analysis(data):
    """Create focused EIBC change analysis with percentage values and full bar display"""
    plt.rcParams['font.family'] = 'Times New Roman'
    
    # Filter valid data for both years
    data_valid = data.dropna(subset=['EIBC_0', 'EIBC_2020'])
    
    if len(data_valid) == 0:
        print("No valid EIBC data found for comparison!")
        return
    
    # Calculate change and convert to percentage
    eibc_change = data_valid['EIBC_2020'] - data_valid['EIBC_0']
    eibc_change_pct = eibc_change * 100  # Convert to percentage
    
    # Create larger figure for change analysis
    fig, ax = plt.subplots(figsize=(14, 6), dpi=300)
    
    # Focus on smaller changes - use a much smaller range to emphasize the smaller bars
    # Set x-axis limit to focus on very small changes only
    change_90th = np.percentile(np.abs(eibc_change_pct), 90)
    xlim_max = min(change_90th * 0.3, 0.05)  # Focus on tiny changes in percentage
    xlim_min = -xlim_max
    
    # Filter data for the focused range
    focused_changes = eibc_change_pct[(eibc_change_pct >= xlim_min) & (eibc_change_pct <= xlim_max)]
    
    # Create histogram with focused range and more bins for detail
    counts, bins, patches = ax.hist(focused_changes, bins=80, alpha=1, color='#de6a69', edgecolor='black', linewidth=0.3)
    ax.axvline(0, color='black', linestyle='--', alpha=0.7, linewidth=2)
    ax.set_xlabel('EIBC Change (2020 - 0) (%)', fontsize=16)
    ax.set_ylabel('Frequency', fontsize=16)
    ax.tick_params(axis='both', labelsize=14)
    # ax.set_title('Change in EIBC (2050 - 2020) - Focused on Small Changes (%)', fontsize=16, fontweight='bold')
    ax.grid(True, alpha=0.3)
    
    # Set x-axis limits to focus on the very small changes
    ax.set_xlim(xlim_min, xlim_max)
    
    # Show all bars completely - no y-axis cutting
    max_count = np.max(counts)
    ax.set_ylim(0, max_count * 1.05)  # Add 5% padding at top for better visibility
    
    # Add text annotation showing the maximum values information (in percentage)
    max_positive_change_pct = eibc_change_pct.max()
    max_negative_change_pct = eibc_change_pct.min()
    max_abs_change_pct = max(abs(max_positive_change_pct), abs(max_negative_change_pct))
    
    # Create info box with maximum value information
    # info_text = f'Maximum Values (Full Dataset):\n'
    info_text = f'Max Increase: {max_positive_change_pct:.4f}%\n'
    info_text += f'Max Decrease: {max_negative_change_pct:.4f}%'
    # info_text += f'Max |Change|: {max_abs_change_pct:.4f}%\n'
    # info_text += f'Tallest bar: {max_count} countries\n'
    # info_text += f'(All bars shown completely)'
    
    ax.text(0.80, 0.95, info_text, transform=ax.transAxes, fontsize=16,
           verticalalignment='top', horizontalalignment='left',
           bbox=dict(boxstyle='round,pad=0.5', facecolor='white', alpha=0.9))
    
    # Add inset subplot for positive EIBC changes in the upper left corner
    from mpl_toolkits.axes_grid1.inset_locator import inset_axes
    
    # Create inset axes in the upper left corner
    axins = inset_axes(ax, width="35%", height="35%", loc='upper left', borderpad=4)
    
    # Filter only positive changes within the focused range
    positive_changes = focused_changes[focused_changes > 0]
    
    if len(positive_changes) > 0:
        # Create histogram for positive changes with finer bins
        counts_pos, _, _ = axins.hist(positive_changes, bins=30, alpha=1, color='#982b2d', 
                                     edgecolor='black', linewidth=0.5)
        axins.set_xlabel('Positive EIBC Change (%)', fontsize=16)
        axins.set_ylabel('Frequency', fontsize=16)
        # axins.set_title('EIBC Increases\n(Zoomed View)', fontsize=16, pad=5)
        axins.grid(True, alpha=0.3)
        axins.tick_params(labelsize=9)
        
        # Automatically set y-limit around 80 to focus on smaller bars
        max_pos_count = np.max(counts_pos)
        
        # Find a suitable y-limit around 80 that excludes the tallest bars
        # Sort counts to find the distribution
        sorted_counts = np.sort(counts_pos[counts_pos > 0])  # Only positive counts
        
        if len(sorted_counts) > 1:
            # Calculate what 80% of the non-zero bars would be
            percentile_80_idx = int(len(sorted_counts) * 0.8)
            if percentile_80_idx < len(sorted_counts):
                suggested_limit = sorted_counts[percentile_80_idx]
                
                # Ensure the limit is reasonable (between 50 and 120)
                target_limit = max(50, min(120, suggested_limit * 1.2))  # 20% padding above 80th percentile
                
                # If the calculated limit is too close to max, use around 80
                if target_limit > max_pos_count * 0.9:
                    target_limit = min(80, max_pos_count * 0.7)
                    
                axins.set_ylim(0, target_limit)
            else:
                # Fallback: use 80 or 70% of max, whichever is smaller
                axins.set_ylim(0, min(80, max_pos_count * 0.7))
        else:
            # If only one bar, use 80 or the bar height, whichever is smaller
            axins.set_ylim(0, min(80, max_pos_count * 1.1))
        
        # Add statistics text (in percentage)
        mean_pos = positive_changes.mean()
        median_pos = positive_changes.median()
        count_pos = len(positive_changes)
        max_pos = positive_changes.max()
        
        # Get the actual y-limit that was set
        actual_ylim = axins.get_ylim()[1]
        
        # Add text box with statistics including info about y-axis cutting
        # stats_text = f'n={count_pos}\nMean: {mean_pos:.4f}%\nMedian: {median_pos:.4f}%\nMax: {max_pos:.4f}%'
        # axins.text(0.95, 0.95, stats_text, transform=axins.transAxes, 
        #           fontsize=8, verticalalignment='top', horizontalalignment='right',
        #           bbox=dict(boxstyle='round', facecolor='white', alpha=0.9))
    else:
        # If no positive changes in focused range, show a message
        axins.text(0.5, 0.5, 'No positive\nchanges in\nfocused range', 
                  transform=axins.transAxes, ha='center', va='center', fontsize=10)
        axins.set_xticks([])
        axins.set_yticks([])
    
    
    plt.tight_layout()
    plt.show()
    
    # Print detailed statistics (in percentage)
    print(f"\n=== EIBC Change Analysis ===")
    print(f"Total countries with valid data: {len(data_valid)}")
    print(f"Countries in focused range ({xlim_min:.4f}% to {xlim_max:.4f}%): {len(focused_changes)}")
    
    print(f"\nOverall Change Statistics:")
    print(f"  Mean change: {eibc_change_pct.mean():.4f}%")
    print(f"  Median change: {eibc_change_pct.median():.4f}%")
    print(f"  Max increase: {max_positive_change_pct:.4f}%")
    print(f"  Max decrease: {max_negative_change_pct:.4f}%")
    print(f"  Countries with increased EIBC: {(eibc_change_pct > 0).sum()} ({(eibc_change_pct > 0).mean()*100:.1f}%)")
    print(f"  Countries with decreased EIBC: {(eibc_change_pct < 0).sum()} ({(eibc_change_pct < 0).mean()*100:.1f}%)")
    
    # Statistics for focused range
    print(f"\nFocused Range Statistics:")
    print(f"  Mean change (focused): {focused_changes.mean():.4f}%")
    print(f"  Median change (focused): {focused_changes.median():.4f}%")
    print(f"  Tallest bar height: {max_count} countries")
    print(f"  All bars displayed completely")
    
    # Additional statistics for positive changes in focused range
    if len(positive_changes) > 0:
        print(f"\nPositive Changes in Focused Range:")
        print(f"  Count: {len(positive_changes)}")
        print(f"  Mean: {positive_changes.mean():.4f}%")
        print(f"  Median: {positive_changes.median():.4f}%")
        print(f"  Max: {positive_changes.max():.4f}%")
        print(f"  Inset y-limit set to: {axins.get_ylim()[1]:.0f} (to focus on smaller bars)")
    
    return fig

print("\n=== Creating EIBC Scatter Plot ===")
fig_scatter = create_eibc_scatter_plot(df)

print("\n=== Creating EIBC Change Analysis ===")
fig_change = create_eibc_change_analysis(df)

In [None]:
# Create scatter plot analysis
def create_eibc_scatter_plot(data):
    """Create EIBC scatter plot: 2020 vs 2050 with different shapes and colors"""
    plt.rcParams['font.family'] = 'Times New Roman'
    
    # Filter valid data for both years
    data_valid = data.dropna(subset=['EIBC_2020', 'EIBC_2050'])
    
    if len(data_valid) == 0:
        print("No valid EIBC data found for comparison!")
        return
    
    # Create figure for scatter plot
    fig, ax = plt.subplots(figsize=(14, 6), dpi=300)
    
    # Convert to percentage for display
    eibc_2020_pct = data_valid['EIBC_2020'] * 100
    eibc_2050_pct = data_valid['EIBC_2050'] * 100
    
    # Create two different point types based on whether EIBC increased or decreased
    increased_mask = eibc_2050_pct > eibc_2020_pct
    decreased_mask = eibc_2050_pct <= eibc_2020_pct
    
    # Scatter plot with different shapes and colors for increases and decreases
    if increased_mask.any():
        scatter_increase = ax.scatter(eibc_2020_pct[increased_mask], eibc_2050_pct[increased_mask], 
                                     alpha=0.7, s=60, c='#be1420', marker='^', 
                                     edgecolors='black', linewidth=0.5, label='EIBC Increased')
    
    if decreased_mask.any():
        scatter_decrease = ax.scatter(eibc_2020_pct[decreased_mask], eibc_2050_pct[decreased_mask], 
                                     alpha=0.7, s=60, c='#012f48', marker='o', 
                                     edgecolors='black', linewidth=0.5, label='EIBC Decreased')
    
    # Add diagonal line for reference (y=x line)
    max_val = max(eibc_2020_pct.max(), eibc_2050_pct.max())
    ax.plot([0, max_val], [0, max_val], 'k--', alpha=0.5, linewidth=1, label='y=x (No Change)')
    
    # Set labels with percentage
    ax.set_xlabel('EIBC 2020 (%)', fontsize=16)
    ax.set_ylabel('EIBC 2050 (%)', fontsize=16)
    # ax.set_title('EIBC: 2020 vs 2050', fontsize=16, fontweight='bold')
    ax.grid(True, alpha=0.3)
    ax.legend(fontsize=16, edgecolor='black')
    ax.tick_params(axis='both', labelsize=14)
    # Set equal aspect ratio and start from 0
    ax.set_xlim(0, max_val * 1.05)
    ax.set_ylim(0, max_val * 1.05)
    
    plt.tight_layout()
    plt.show()
    
    return fig

# Create change analysis with focused view
def create_eibc_change_analysis(data):
    """Create focused EIBC change analysis with percentage values and full bar display"""
    plt.rcParams['font.family'] = 'Times New Roman'
    
    # Filter valid data for both years
    data_valid = data.dropna(subset=['EIBC_2020', 'EIBC_2050'])
    
    if len(data_valid) == 0:
        print("No valid EIBC data found for comparison!")
        return
    
    # Calculate change and convert to percentage
    eibc_change = data_valid['EIBC_2050'] - data_valid['EIBC_2020']
    eibc_change_pct = eibc_change * 100  # Convert to percentage
    
    # Create larger figure for change analysis
    fig, ax = plt.subplots(figsize=(14, 6), dpi=300)
    
    # Focus on smaller changes - use a much smaller range to emphasize the smaller bars
    # Set x-axis limit to focus on very small changes only
    change_90th = np.percentile(np.abs(eibc_change_pct), 90)
    xlim_max = min(change_90th * 0.3, 0.05)  # Focus on tiny changes in percentage
    xlim_min = -xlim_max
    
    # Filter data for the focused range
    focused_changes = eibc_change_pct[(eibc_change_pct >= xlim_min) & (eibc_change_pct <= xlim_max)]
    
    # Create histogram with focused range and more bins for detail
    counts, bins, patches = ax.hist(focused_changes, bins=80, alpha=1, color='#de6a69', edgecolor='black', linewidth=0.3)
    ax.axvline(0, color='black', linestyle='--', alpha=0.7, linewidth=2)
    ax.set_xlabel('EIBC Change (2050 - 2020) (%)', fontsize=16)
    ax.set_ylabel('Frequency', fontsize=16)
    ax.tick_params(axis='both', labelsize=14)
    # ax.set_title('Change in EIBC (2050 - 2020) - Focused on Small Changes (%)', fontsize=16, fontweight='bold')
    ax.grid(True, alpha=0.3)
    
    # Set x-axis limits to focus on the very small changes
    ax.set_xlim(xlim_min, xlim_max)
    
    # Show all bars completely - no y-axis cutting
    max_count = np.max(counts)
    ax.set_ylim(0, max_count * 1.05)  # Add 5% padding at top for better visibility
    
    # Add text annotation showing the maximum values information (in percentage)
    max_positive_change_pct = eibc_change_pct.max()
    max_negative_change_pct = eibc_change_pct.min()
    max_abs_change_pct = max(abs(max_positive_change_pct), abs(max_negative_change_pct))
    
    # Create info box with maximum value information
    # info_text = f'Maximum Values (Full Dataset):\n'
    info_text = f'Max Increase: {max_positive_change_pct:.4f}%\n'
    info_text += f'Max Decrease: {max_negative_change_pct:.4f}%'
    # info_text += f'Max |Change|: {max_abs_change_pct:.4f}%\n'
    # info_text += f'Tallest bar: {max_count} countries\n'
    # info_text += f'(All bars shown completely)'
    
    ax.text(0.80, 0.95, info_text, transform=ax.transAxes, fontsize=16,
           verticalalignment='top', horizontalalignment='left',
           bbox=dict(boxstyle='round,pad=0.5', facecolor='white', alpha=0.9))
    
    # Add inset subplot for positive EIBC changes in the upper left corner
    from mpl_toolkits.axes_grid1.inset_locator import inset_axes
    
    # Create inset axes in the upper left corner
    axins = inset_axes(ax, width="35%", height="35%", loc='upper left', borderpad=4)
    
    # Filter only positive changes within the focused range
    positive_changes = focused_changes[focused_changes > 0]
    
    if len(positive_changes) > 0:
        # Create histogram for positive changes with finer bins
        counts_pos, _, _ = axins.hist(positive_changes, bins=30, alpha=1, color='#982b2d', 
                                     edgecolor='black', linewidth=0.5)
        axins.set_xlabel('Positive EIBC Change (%)', fontsize=16)
        axins.set_ylabel('Frequency', fontsize=16)
        # axins.set_title('EIBC Increases\n(Zoomed View)', fontsize=16, pad=5)
        axins.grid(True, alpha=0.3)
        axins.tick_params(labelsize=9)
        
        # Automatically set y-limit around 80 to focus on smaller bars
        max_pos_count = np.max(counts_pos)
        
        # Find a suitable y-limit around 80 that excludes the tallest bars
        # Sort counts to find the distribution
        sorted_counts = np.sort(counts_pos[counts_pos > 0])  # Only positive counts
        
        if len(sorted_counts) > 1:
            # Calculate what 80% of the non-zero bars would be
            percentile_80_idx = int(len(sorted_counts) * 0.8)
            if percentile_80_idx < len(sorted_counts):
                suggested_limit = sorted_counts[percentile_80_idx]
                
                # Ensure the limit is reasonable (between 50 and 120)
                target_limit = max(50, min(120, suggested_limit * 1.2))  # 20% padding above 80th percentile
                
                # If the calculated limit is too close to max, use around 80
                if target_limit > max_pos_count * 0.9:
                    target_limit = min(80, max_pos_count * 0.7)
                    
                axins.set_ylim(0, target_limit)
            else:
                # Fallback: use 80 or 70% of max, whichever is smaller
                axins.set_ylim(0, min(80, max_pos_count * 0.7))
        else:
            # If only one bar, use 80 or the bar height, whichever is smaller
            axins.set_ylim(0, min(80, max_pos_count * 1.1))
        
        # Add statistics text (in percentage)
        mean_pos = positive_changes.mean()
        median_pos = positive_changes.median()
        count_pos = len(positive_changes)
        max_pos = positive_changes.max()
        
        # Get the actual y-limit that was set
        actual_ylim = axins.get_ylim()[1]
        
        # Add text box with statistics including info about y-axis cutting
        # stats_text = f'n={count_pos}\nMean: {mean_pos:.4f}%\nMedian: {median_pos:.4f}%\nMax: {max_pos:.4f}%'
        # axins.text(0.95, 0.95, stats_text, transform=axins.transAxes, 
        #           fontsize=8, verticalalignment='top', horizontalalignment='right',
        #           bbox=dict(boxstyle='round', facecolor='white', alpha=0.9))
    else:
        # If no positive changes in focused range, show a message
        axins.text(0.5, 0.5, 'No positive\nchanges in\nfocused range', 
                  transform=axins.transAxes, ha='center', va='center', fontsize=10)
        axins.set_xticks([])
        axins.set_yticks([])
    
    
    plt.tight_layout()
    plt.show()
    
    # Print detailed statistics (in percentage)
    print(f"\n=== EIBC Change Analysis ===")
    print(f"Total countries with valid data: {len(data_valid)}")
    print(f"Countries in focused range ({xlim_min:.4f}% to {xlim_max:.4f}%): {len(focused_changes)}")
    
    print(f"\nOverall Change Statistics:")
    print(f"  Mean change: {eibc_change_pct.mean():.4f}%")
    print(f"  Median change: {eibc_change_pct.median():.4f}%")
    print(f"  Max increase: {max_positive_change_pct:.4f}%")
    print(f"  Max decrease: {max_negative_change_pct:.4f}%")
    print(f"  Countries with increased EIBC: {(eibc_change_pct > 0).sum()} ({(eibc_change_pct > 0).mean()*100:.1f}%)")
    print(f"  Countries with decreased EIBC: {(eibc_change_pct < 0).sum()} ({(eibc_change_pct < 0).mean()*100:.1f}%)")
    
    # Statistics for focused range
    print(f"\nFocused Range Statistics:")
    print(f"  Mean change (focused): {focused_changes.mean():.4f}%")
    print(f"  Median change (focused): {focused_changes.median():.4f}%")
    print(f"  Tallest bar height: {max_count} countries")
    print(f"  All bars displayed completely")
    
    # Additional statistics for positive changes in focused range
    if len(positive_changes) > 0:
        print(f"\nPositive Changes in Focused Range:")
        print(f"  Count: {len(positive_changes)}")
        print(f"  Mean: {positive_changes.mean():.4f}%")
        print(f"  Median: {positive_changes.median():.4f}%")
        print(f"  Max: {positive_changes.max():.4f}%")
        print(f"  Inset y-limit set to: {axins.get_ylim()[1]:.0f} (to focus on smaller bars)")
    
    return fig

print("\n=== Creating EIBC Scatter Plot ===")
fig_scatter = create_eibc_scatter_plot(df)

print("\n=== Creating EIBC Change Analysis ===")
fig_change = create_eibc_change_analysis(df)

In [None]:
# Create EIBC difference visualization (2020 - 0)
def visualize_eibc_difference(data, vmax=None, vmin=None):
    """Create EIBC difference visualization (2020 - 0)"""
    plt.rcParams['font.family'] = 'Times New Roman'
    from matplotlib.offsetbox import OffsetImage, AnnotationBbox
    import numpy as np
    
    # Filter data with valid EIBC values for both years
    data_valid = data.dropna(subset=['EIBC_0', 'EIBC_2020'])
    
    if len(data_valid) == 0:
        print("No valid EIBC data found for difference calculation!")
        return
    
    # Calculate difference (2050 - 2020)
    data_valid = data_valid.copy()
    data_valid['EIBC_diff'] = data_valid['EIBC_2020'] - data_valid['EIBC_0']
    
    print(f"Processing {len(data_valid)} countries for EIBC difference")
    
    # Set vmax and vmin if not provided (use symmetric range around 0)
    if vmax is None or vmin is None:
        max_abs_diff = max(abs(data_valid['EIBC_diff'].min()), abs(data_valid['EIBC_diff'].max()))
        # Use 99th percentile of absolute values to avoid outliers
        max_abs_diff = np.percentile(np.abs(data_valid['EIBC_diff']), 99)
        vmax = max_abs_diff
        vmin = 0
    
    # Create geometry for plotting
    data_valid['geometry'] = data_valid.apply(lambda row: Point(row['lon'], row['lat']), axis=1)
    geo_df = gpd.GeoDataFrame(data_valid, geometry='geometry')
    geo_df.set_crs(epsg=4326, inplace=True)
    
    # Create figure with Robinson projection
    fig = plt.figure(figsize=(16, 8), dpi=300)
    ax = fig.add_subplot(1, 1, 1, projection=ccrs.Robinson())

    # Set background and map features
    ax.set_facecolor("#FFFFFF")  
    ax.add_feature(cfeature.LAND, color="#CECECE", alpha=0.4)
    ax.add_feature(cfeature.OCEAN, color="#FFFFFF", alpha=0.5)
    ax.add_feature(cfeature.COASTLINE, linewidth=0.7)
    ax.add_feature(cfeature.BORDERS, linestyle=':', linewidth=0.5, alpha=0.3)
    
    # Set color mapping for difference (blue-white-red)
    diff_colors = ['#012f48', '#669aba', '#ffffff', '#be1420', '#7a0101']
    diff_cmap = mpl.colors.LinearSegmentedColormap.from_list('diff_palette', diff_colors)
    norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)
    
    # Calculate point sizes based on absolute difference
    min_size = 25
    max_size = 150
    
    # Sort data to ensure smaller absolute values are plotted first
    geo_df_sorted = geo_df.sort_values(by='EIBC_diff', key=abs, ascending=True)
    
    # Create fake scatter for colorbar
    fake_scatter = ax.scatter([-1000], [-1000], c=[0], cmap=diff_cmap, 
                             vmin=vmin, vmax=vmax, s=1)
    
    # Plot each point with custom styling
    for idx, row in geo_df_sorted.iterrows():
        lon, lat = row.geometry.x, row.geometry.y
        diff_value = row['EIBC_diff']
        
        if pd.isna(diff_value):
            continue
        
        # Calculate point size based on absolute difference
        abs_diff = abs(diff_value)
        size_factor = min(abs_diff / vmax, 1.0)
        size = min_size + (size_factor * (max_size - min_size))
        color = diff_cmap(norm(diff_value))
        
        # Create custom point with border
        temp_fig = plt.figure(figsize=(1, 1), frameon=False, dpi=200)
        temp_fig.patch.set_alpha(0)
        
        temp_ax = temp_fig.add_subplot(111)
        temp_ax.set_aspect('equal')
        temp_ax.patch.set_alpha(0)
        
        # Draw outer circle (border)
        outer_circle = plt.Circle((0.5, 0.5), 0.18, color='black', alpha=1)
        temp_ax.add_patch(outer_circle)
        
        # Draw inner circle (data color)
        inner_circle = plt.Circle((0.5, 0.5), 0.15, color=color, alpha=1)
        temp_ax.add_patch(inner_circle)
        
        temp_ax.set_xlim(0, 1)
        temp_ax.set_ylim(0, 1)
        temp_ax.axis('off')
        
        temp_fig.tight_layout(pad=0)
        temp_fig.canvas.draw()
        point_img = np.array(temp_fig.canvas.renderer.buffer_rgba())
        plt.close(temp_fig)
        
        # Transform coordinates and add to map
        x, y = ax.projection.transform_point(lon, lat, src_crs=ccrs.PlateCarree())
        zoom_factor = np.sqrt(size) / 80
        
        imagebox = OffsetImage(point_img, zoom=zoom_factor)
        imagebox.image.axes = ax
        
        ab = AnnotationBbox(imagebox, (x, y), frameon=False, pad=0, zorder=10)
        ax.add_artist(ab)
    
    # Add colorbar
    cbar = fig.colorbar(fake_scatter, ax=ax, orientation='horizontal', 
                       shrink=0.6, pad=0.05, aspect=50)
    cbar.set_label('EIBC Change (2020 - 0)', fontsize=14)
    
    # Set title
    # ax.set_title('EIBC Change: 2050 - 2020\n(Blue: Decrease, Red: Increase)', 
    #             fontsize=16, fontweight='bold', pad=20)
    
    # Set global extent and clean up
    ax.set_global()
    ax.set_xticks([])
    ax.set_yticks([])
    for spine in ax.spines.values():
        spine.set_visible(False)
    
    plt.tight_layout()
    plt.show()
    
    # Print statistics
    print(f"\nEIBC Difference Statistics:")
    print(f"  Valid countries: {len(data_valid)}")
    print(f"  Mean change: {data_valid['EIBC_diff'].mean():.4f}")
    print(f"  Median change: {data_valid['EIBC_diff'].median():.4f}")
    print(f"  Std change: {data_valid['EIBC_diff'].std():.4f}")
    print(f"  Min change: {data_valid['EIBC_diff'].min():.4f}")
    print(f"  Max change: {data_valid['EIBC_diff'].max():.4f}")
    
    # Count increases and decreases
    increases = (data_valid['EIBC_diff'] > 0).sum()
    decreases = (data_valid['EIBC_diff'] < 0).sum()
    unchanged = (data_valid['EIBC_diff'] == 0).sum()
    
    print(f"  Countries with increased EIBC: {increases} ({increases/len(data_valid)*100:.1f}%)")
    print(f"  Countries with decreased EIBC: {decreases} ({decreases/len(data_valid)*100:.1f}%)")
    print(f"  Countries with unchanged EIBC: {unchanged} ({unchanged/len(data_valid)*100:.1f}%)")
    
    return fig

# Create EIBC difference visualization
print("=== Creating EIBC Difference Visualization ===")
fig_diff = visualize_eibc_difference(df,vmax=0.05,vmin=-0.05)

In [None]:
# Create EIBC difference visualization (2050 - 2020)
def visualize_eibc_difference(data, vmax=None, vmin=None):
    """Create EIBC difference visualization (2050 - 2020)"""
    plt.rcParams['font.family'] = 'Times New Roman'
    from matplotlib.offsetbox import OffsetImage, AnnotationBbox
    import numpy as np
    
    # Filter data with valid EIBC values for both years
    data_valid = data.dropna(subset=['EIBC_2020', 'EIBC_2050'])
    
    if len(data_valid) == 0:
        print("No valid EIBC data found for difference calculation!")
        return
    
    # Calculate difference (2050 - 2020)
    data_valid = data_valid.copy()
    data_valid['EIBC_diff'] = data_valid['EIBC_2050'] - data_valid['EIBC_2020']
    
    print(f"Processing {len(data_valid)} countries for EIBC difference")
    
    # Set vmax and vmin if not provided (use symmetric range around 0)
    if vmax is None or vmin is None:
        max_abs_diff = max(abs(data_valid['EIBC_diff'].min()), abs(data_valid['EIBC_diff'].max()))
        # Use 99th percentile of absolute values to avoid outliers
        max_abs_diff = np.percentile(np.abs(data_valid['EIBC_diff']), 99)
        vmax = max_abs_diff
        vmin = -max_abs_diff
    
    # Create geometry for plotting
    data_valid['geometry'] = data_valid.apply(lambda row: Point(row['lon'], row['lat']), axis=1)
    geo_df = gpd.GeoDataFrame(data_valid, geometry='geometry')
    geo_df.set_crs(epsg=4326, inplace=True)
    
    # Create figure with Robinson projection
    fig = plt.figure(figsize=(16, 8), dpi=300)
    ax = fig.add_subplot(1, 1, 1, projection=ccrs.Robinson())

    # Set background and map features
    ax.set_facecolor("#FFFFFF")  
    ax.add_feature(cfeature.LAND, color="#CECECE", alpha=0.4)
    ax.add_feature(cfeature.OCEAN, color="#FFFFFF", alpha=0.5)
    ax.add_feature(cfeature.COASTLINE, linewidth=0.7)
    ax.add_feature(cfeature.BORDERS, linestyle=':', linewidth=0.5, alpha=0.3)
    
    # Set color mapping for difference (blue-white-red)
    diff_colors = ['#012f48', '#669aba', '#ffffff', '#be1420', '#7a0101']
    diff_cmap = mpl.colors.LinearSegmentedColormap.from_list('diff_palette', diff_colors)
    norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)
    
    # Calculate point sizes based on absolute difference
    min_size = 25
    max_size = 150
    
    # Sort data to ensure smaller absolute values are plotted first
    geo_df_sorted = geo_df.sort_values(by='EIBC_diff', key=abs, ascending=True)
    
    # Create fake scatter for colorbar
    fake_scatter = ax.scatter([-1000], [-1000], c=[0], cmap=diff_cmap, 
                             vmin=vmin, vmax=vmax, s=1)
    
    # Plot each point with custom styling
    for idx, row in geo_df_sorted.iterrows():
        lon, lat = row.geometry.x, row.geometry.y
        diff_value = row['EIBC_diff']
        
        if pd.isna(diff_value):
            continue
        
        # Calculate point size based on absolute difference
        abs_diff = abs(diff_value)
        size_factor = min(abs_diff / vmax, 1.0)
        size = min_size + (size_factor * (max_size - min_size))
        color = diff_cmap(norm(diff_value))
        
        # Create custom point with border
        temp_fig = plt.figure(figsize=(1, 1), frameon=False, dpi=200)
        temp_fig.patch.set_alpha(0)
        
        temp_ax = temp_fig.add_subplot(111)
        temp_ax.set_aspect('equal')
        temp_ax.patch.set_alpha(0)
        
        # Draw outer circle (border)
        outer_circle = plt.Circle((0.5, 0.5), 0.18, color='black', alpha=1)
        temp_ax.add_patch(outer_circle)
        
        # Draw inner circle (data color)
        inner_circle = plt.Circle((0.5, 0.5), 0.15, color=color, alpha=1)
        temp_ax.add_patch(inner_circle)
        
        temp_ax.set_xlim(0, 1)
        temp_ax.set_ylim(0, 1)
        temp_ax.axis('off')
        
        temp_fig.tight_layout(pad=0)
        temp_fig.canvas.draw()
        point_img = np.array(temp_fig.canvas.renderer.buffer_rgba())
        plt.close(temp_fig)
        
        # Transform coordinates and add to map
        x, y = ax.projection.transform_point(lon, lat, src_crs=ccrs.PlateCarree())
        zoom_factor = np.sqrt(size) / 80
        
        imagebox = OffsetImage(point_img, zoom=zoom_factor)
        imagebox.image.axes = ax
        
        ab = AnnotationBbox(imagebox, (x, y), frameon=False, pad=0, zorder=10)
        ax.add_artist(ab)
    
    # Add colorbar
    cbar = fig.colorbar(fake_scatter, ax=ax, orientation='horizontal', 
                       shrink=0.6, pad=0.05, aspect=50)
    cbar.set_label('EIBC Change (2050 - 2020)', fontsize=14)
    
    # Set title
    # ax.set_title('EIBC Change: 2050 - 2020\n(Blue: Decrease, Red: Increase)', 
    #             fontsize=16, fontweight='bold', pad=20)
    
    # Set global extent and clean up
    ax.set_global()
    ax.set_xticks([])
    ax.set_yticks([])
    for spine in ax.spines.values():
        spine.set_visible(False)
    
    plt.tight_layout()
    plt.show()
    
    # Print statistics
    print(f"\nEIBC Difference Statistics:")
    print(f"  Valid countries: {len(data_valid)}")
    print(f"  Mean change: {data_valid['EIBC_diff'].mean():.4f}")
    print(f"  Median change: {data_valid['EIBC_diff'].median():.4f}")
    print(f"  Std change: {data_valid['EIBC_diff'].std():.4f}")
    print(f"  Min change: {data_valid['EIBC_diff'].min():.4f}")
    print(f"  Max change: {data_valid['EIBC_diff'].max():.4f}")
    
    # Count increases and decreases
    increases = (data_valid['EIBC_diff'] > 0).sum()
    decreases = (data_valid['EIBC_diff'] < 0).sum()
    unchanged = (data_valid['EIBC_diff'] == 0).sum()
    
    print(f"  Countries with increased EIBC: {increases} ({increases/len(data_valid)*100:.1f}%)")
    print(f"  Countries with decreased EIBC: {decreases} ({decreases/len(data_valid)*100:.1f}%)")
    print(f"  Countries with unchanged EIBC: {unchanged} ({unchanged/len(data_valid)*100:.1f}%)")
    
    return fig

# Create EIBC difference visualization
print("=== Creating EIBC Difference Visualization ===")
fig_diff = visualize_eibc_difference(df,vmax=0.05,vmin=-0.05)