get data

In [None]:
import cdsapi
import logging
from pathlib import Path
import time
import xarray as xr
import numpy as np

class ERA5Retriever:
    def __init__(self, output_dir='era5_data'):
        """初始化ERA5数据获取器"""
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)
        self.client = cdsapi.Client()
        self.setup_logging()
        
        # 定义中欧研究区域 [North, West, South, East]
        # 覆盖德国、法国东部、波兰等中欧核心区域
        self.study_area = [55, 5, 45, 20]
        
        # 定义关键预报变量
        self.variables = {
            # 基本预报变量
            "2m_temperature": "基本温度场",
            "total_precipitation": "降水",
            "mean_sea_level_pressure": "海平面气压",
            
            # 环流特征变量
            "10m_u_component_of_wind": "纬向风",
            "10m_v_component_of_wind": "经向风",
            "geopotential_at_500hpa": "500hPa位势高度",
            
            # 热力和水汽特征
            "relative_humidity_at_850hpa": "850hPa相对湿度",
            "total_column_water_vapour": "整层水汽含量",
            
            # 边界层特征
            "boundary_layer_height": "边界层高度",
            "surface_pressure": "地面气压",
        }

    def setup_logging(self):
        """设置日志记录"""
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(self.output_dir / 'era5_retrieval.log'),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger('ERA5Retriever')

    def retrieve_data(self, year, month, output_file=None):
        """获取ERA5数据
        
        Parameters:
        -----------
        year : int
            年份
        month : int
            月份
        output_file : str, optional
            输出文件名
        """
        if output_file is None:
            output_file = self.output_dir / f'era5_{year}_{month:02d}.nc'
            
        if output_file.exists():
            self.logger.info(f"File {output_file} already exists, skipping...")
            return output_file

        try:
            # 构建请求参数
            request = {
                "format": "netcdf",
                "product_type": "reanalysis",
                "variable": list(self.variables.keys()),
                "year": str(year),
                "month": f"{month:02d}",
                "day": [f"{day:02d}" for day in range(1, 32)],
                "time": [f"{hour:02d}:00" for hour in range(0, 24, 6)],  # 6小时间隔
                "area": self.study_area,
                "pressure_level": ["500", "850"],  # 添加重要气压层
            }

            # 获取数据
            self.logger.info(f"Retrieving data for {year}-{month:02d}")
            self.client.retrieve(
                'reanalysis-era5-single-levels',
                request,
                output_file
            )
            
            # 验证下载的数据
            self._validate_data(output_file)
            
            self.logger.info(f"Successfully downloaded and validated {output_file}")
            time.sleep(5)  # 避免请求过于频繁
            
            return output_file
            
        except Exception as e:
            self.logger.error(f"Error retrieving data for {year}-{month:02d}: {str(e)}")
            return None

    def _validate_data(self, file_path):
        """验证下载的数据完整性和质量"""
        try:
            ds = xr.open_dataset(file_path)
            
            # 检查变量是否完整
            missing_vars = set(self.variables.keys()) - set(ds.data_vars)
            if missing_vars:
                self.logger.warning(f"Missing variables in {file_path}: {missing_vars}")
            
            # 检查缺失值
            for var in ds.data_vars:
                missing_ratio = ds[var].isnull().mean().values
                if missing_ratio > 0:
                    self.logger.warning(f"Variable {var} has {missing_ratio*100:.2f}% missing values")
            
            ds.close()
            
        except Exception as e:
            self.logger.error(f"Error validating {file_path}: {str(e)}")
            raise

def main():
    """主函数：获取10年的ERA5数据"""
    retriever = ERA5Retriever()
    
    # 定义时间范围：2012-2021年的数据
    years = range(2012, 2022)
    # 冬季月份 (包括上一年11-12月和当年1-3月)
    months = [11, 12, 1, 2, 3]
    
    # 获取数据
    total_files = len(years) * len(months)
    completed_files = 0
    
    for year in years:
        for month in months:
            # 处理跨年的情况
            if month in [1, 2, 3]:
                if year == years[-1]:  
                    continue  # 跳过最后一年的1-3月
                current_year = year
            else:
                current_year = year - 1  # 11-12月属于上一年的冬季
            
            try:
                retriever.retrieve_data(current_year, month)
                completed_files += 1
                print(f"Progress: {completed_files}/{total_files} files completed")
                
            except Exception as e:
                print(f"Failed to retrieve data for {current_year}-{month}: {e}")
                continue
    
    print("Data retrieval completed!")

if __name__ == "__main__":
    main()

describe data

In [None]:
class ClimateAnalyzer:
    def __init__(self, data_dir='era5_data', output_dir='analysis_results'):
        """初始化气候分析器"""
        self.data_dir = Path(data_dir)
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)
        self.setup_logging()

    def setup_logging(self):
        """设置日志记录"""
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(self.output_dir / 'climate_analysis.log'),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger('ClimateAnalyzer')

    def load_data(self):
        """加载ERA5数据并进行初步处理"""
        try:
            self.logger.info("Loading ERA5 data...")
            data_files = sorted(self.data_dir.glob('era5_*.nc'))
            ds = xr.open_mfdataset(data_files, combine='by_coords')
            
            # 基本信息记录
            self.logger.info(f"Data loaded: {len(data_files)} files")
            self.logger.info(f"Time range: {ds.valid_time.min().values} to {ds.valid_time.max().values}")
            return ds
        except Exception as e:
            self.logger.error(f"Error loading data: {str(e)}")
            raise

    def calculate_basic_statistics(self, ds):
        """计算基本统计量"""
        try:
            self.logger.info("Calculating basic statistics...")
            stats = {}
            
            # 1. 时间维度统计
            stats['temporal'] = {
                'total_days': len(ds.valid_time),
                'years_covered': len(np.unique(ds.valid_time.dt.year)),
                'seasons_covered': len(np.unique(ds.valid_time.dt.season))
            }
            
            # 2. 变量基本统计
            stats['variables'] = {}
            for var in ds.data_vars:
                var_stats = {
                    'mean': float(ds[var].mean()),
                    'std': float(ds[var].std()),
                    'min': float(ds[var].min()),
                    'max': float(ds[var].max()),
                    'missing_ratio': float(ds[var].isnull().mean()),
                    'spatial_variability': float(ds[var].std(dim=['latitude', 'longitude']).mean())
                }
                stats['variables'][var] = var_stats
            
            # 3. 区域平均特征
            stats['regional'] = self.calculate_regional_statistics(ds)
            
            # 4. 季节性特征
            stats['seasonal'] = self.calculate_seasonal_statistics(ds)
            
            return stats
            
        except Exception as e:
            self.logger.error(f"Error calculating statistics: {str(e)}")
            raise

    def calculate_regional_statistics(self, ds):
        """计算区域统计特征"""
        regions = {
            'north': {'lat': slice(50, 55), 'lon': slice(5, 20)},
            'central': {'lat': slice(45, 50), 'lon': slice(5, 20)},
            'south': {'lat': slice(40, 45), 'lon': slice(5, 20)}
        }
        
        regional_stats = {}
        for region_name, coords in regions.items():
            region_ds = ds.sel(**coords)
            stats = {}
            for var in ds.data_vars:
                stats[var] = {
                    'mean': float(region_ds[var].mean()),
                    'std': float(region_ds[var].std()),
                    'extremes': {
                        'p95': float(region_ds[var].quantile(0.95)),
                        'p05': float(region_ds[var].quantile(0.05))
                    }
                }
            regional_stats[region_name] = stats
            
        return regional_stats

    def calculate_seasonal_statistics(self, ds):
        """计算季节性统计特征"""
        # 按季节分组
        seasonal_ds = ds.groupby('valid_time.season')
        
        seasonal_stats = {}
        for season, season_ds in seasonal_ds:
            stats = {}
            for var in ds.data_vars:
                stats[var] = {
                    'mean': float(season_ds[var].mean()),
                    'std': float(season_ds[var].std()),
                    'variability': float(season_ds[var].std(dim=['latitude', 'longitude']).mean())
                }
            seasonal_stats[season] = stats
            
        return seasonal_stats

    def generate_summary_report(self, stats):
        """生成统计分析报告"""
        report_path = self.output_dir / 'statistical_analysis_report.txt'
        
        with open(report_path, 'w') as f:
            f.write("ERA5 Data Statistical Analysis Report\n")
            f.write("===================================\n\n")
            
            # 1. 数据概览
            f.write("1. Data Overview\n")
            f.write("--------------\n")
            f.write(f"Total days analyzed: {stats['temporal']['total_days']}\n")
            f.write(f"Years covered: {stats['temporal']['years_covered']}\n")
            f.write(f"Seasons covered: {stats['temporal']['seasons_covered']}\n\n")
            
            # 2. 变量统计
            f.write("2. Variable Statistics\n")
            f.write("-------------------\n")
            for var, var_stats in stats['variables'].items():
                f.write(f"\n{var}:\n")
                for stat, value in var_stats.items():
                    f.write(f"  {stat}: {value:.4f}\n")
            
            # 3. 区域特征
            f.write("\n3. Regional Characteristics\n")
            f.write("-------------------------\n")
            for region, region_stats in stats['regional'].items():
                f.write(f"\n{region.upper()}:\n")
                for var, var_stats in region_stats.items():
                    f.write(f"  {var}:\n")
                    for stat, value in var_stats.items():
                        if isinstance(value, dict):
                            f.write(f"    {stat}:\n")
                            for sub_stat, sub_value in value.items():
                                f.write(f"      {sub_stat}: {sub_value:.4f}\n")
                        else:
                            f.write(f"    {stat}: {value:.4f}\n")
            
            # 4. 季节特征
            f.write("\n4. Seasonal Patterns\n")
            f.write("------------------\n")
            for season, season_stats in stats['seasonal'].items():
                f.write(f"\n{season}:\n")
                for var, var_stats in season_stats.items():
                    f.write(f"  {var}:\n")
                    for stat, value in var_stats.items():
                        f.write(f"    {stat}: {value:.4f}\n")
        
        self.logger.info(f"Statistical analysis report generated: {report_path}")
        return report_path

def main():
    """主函数"""
    try:
        # 初始化分析器
        analyzer = ClimateAnalyzer()
        
        # 加载数据
        ds = analyzer.load_data()
        
        # 计算基本统计量
        stats = analyzer.calculate_basic_statistics(ds)
        
        # 生成报告
        report_path = analyzer.generate_summary_report(stats)
        
        print(f"Analysis completed. Report saved to: {report_path}")
        return ds, stats
        
    except Exception as e:
        print(f"Error in analysis: {str(e)}")
        return None, None

if __name__ == "__main__":
    ds, stats = main()

model

In [None]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cartopy.crs as ccrs
from matplotlib.gridspec import GridSpec
from pathlib import Path

class ClimateVisualizer:
    def __init__(self, data_dir='era5_data', output_dir='visualization_results'):
        """初始化气候数据可视化器"""
        self.data_dir = Path(data_dir)
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)
        
        # 设置绘图风格
        plt.style.use('default')
        sns.set_theme(style="whitegrid")
        plt.rcParams['font.family'] = 'Times New Roman'
        plt.rcParams['font.size'] = 12
        
    def create_spatial_temporal_analysis(self, ds):
        """创建时空分析综合图"""
        fig = plt.figure(figsize=(15, 12))
        gs = GridSpec(2, 2, figure=fig)
        
        # 1. 空间平均场
        ax1 = fig.add_subplot(gs[0, :], projection=ccrs.PlateCarree())
        self._plot_mean_field(ds, ax1)
        
        # 2. 时间序列分析
        ax2 = fig.add_subplot(gs[1, 0])
        self._plot_time_series(ds, ax2)
        
        # 3. 季节循环
        ax3 = fig.add_subplot(gs[1, 1])
        self._plot_seasonal_cycle(ds, ax3)
        
        plt.tight_layout()
        return fig
        
    def _plot_mean_field(self, ds, ax):
        """绘制平均场分布"""
        mean_temp = ds['t2m'].mean(dim='valid_time')
        mean_temp.plot(
            ax=ax,
            transform=ccrs.PlateCarree(),
            cmap='RdBu_r',
            robust=True
        )
        ax.coastlines()
        ax.gridlines()
        ax.set_title('Mean Temperature Distribution')
        return ax
        
    def _plot_time_series(self, ds, ax):
        """绘制时间序列"""
        time_series = ds['t2m'].mean(dim=['latitude', 'longitude'])
        time_series.plot(ax=ax)
        ax.set_title('Temperature Time Series')
        ax.set_xlabel('Time')
        ax.set_ylabel('Temperature (K)')
        return ax
        
    def _plot_seasonal_cycle(self, ds, ax):
        """绘制季节循环"""
        monthly_means = ds['t2m'].groupby('valid_time.month').mean()
        monthly_means.mean(dim=['latitude', 'longitude']).plot(ax=ax, marker='o')
        ax.set_title('Seasonal Cycle')
        ax.set_xlabel('Month')
        ax.set_ylabel('Temperature (K)')
        return ax

    def create_climate_patterns_plot(self, ds):
        """创建气候模态分析图"""
        fig = plt.figure(figsize=(15, 15))
        gs = GridSpec(3, 1, figure=fig)
        
        # 1. 温度-压力关系
        ax1 = fig.add_subplot(gs[0])
        self._plot_temp_pressure_relationship(ds, ax1)
        
        # 2. 区域平均对比
        ax2 = fig.add_subplot(gs[1])
        self._plot_regional_comparison(ds, ax2)
        
        # 3. 季节变化趋势
        ax3 = fig.add_subplot(gs[2])
        self._plot_seasonal_trends(ds, ax3)
        
        plt.tight_layout()
        return fig
        
    def _plot_temp_pressure_relationship(self, ds, ax):
        """绘制温度与气压的关系"""
        temp = ds['t2m'].mean(dim=['latitude', 'longitude'])
        pressure = ds['msl'].mean(dim=['latitude', 'longitude'])
        ax.scatter(temp, pressure, alpha=0.5)
        ax.set_title('Temperature-Pressure Relationship')
        ax.set_xlabel('Temperature (K)')
        ax.set_ylabel('Mean Sea Level Pressure (hPa)')
        return ax
        
    def _plot_regional_comparison(self, ds, ax):
        """绘制区域对比"""
        regions = {
            'North': {'lat': slice(50, 55), 'lon': slice(5, 20)},
            'Central': {'lat': slice(45, 50), 'lon': slice(5, 20)},
            'South': {'lat': slice(40, 45), 'lon': slice(5, 20)}
        }
        
        for region_name, coords in regions.items():
            temp = ds['t2m'].sel(**coords).mean(dim=['latitude', 'longitude'])
            temp.plot(ax=ax, label=region_name)
            
        ax.set_title('Regional Temperature Comparison')
        ax.legend()
        return ax
        
    def _plot_seasonal_trends(self, ds, ax):
        """绘制季节变化趋势"""
        seasonal = ds['t2m'].groupby('valid_time.season').mean()
        seasonal.mean(dim=['latitude', 'longitude']).plot(ax=ax)
        ax.set_title('Seasonal Temperature Trends')
        return ax

    def create_extreme_events_analysis(self, ds):
        """创建极端事件分析图"""
        fig = plt.figure(figsize=(15, 10))
        gs = GridSpec(2, 2, figure=fig)
        
        # 1. 温度分布
        ax1 = fig.add_subplot(gs[0, 0])
        self._plot_temperature_distribution(ds, ax1)
        
        # 2. 极端事件空间分布
        ax2 = fig.add_subplot(gs[0, 1], projection=ccrs.PlateCarree())
        self._plot_extreme_events_spatial(ds, ax2)
        
        # 3. 极端事件时间演变
        ax3 = fig.add_subplot(gs[1, :])
        self._plot_extreme_events_temporal(ds, ax3)
        
        plt.tight_layout()
        return fig
        
    def _plot_temperature_distribution(self, ds, ax):
        """绘制温度分布"""
        temp = ds['t2m'].values.flatten()
        sns.histplot(temp, bins=50, ax=ax)
        ax.set_title('Temperature Distribution')
        ax.set_xlabel('Temperature (K)')
        return ax
        
    def _plot_extreme_events_spatial(self, ds, ax):
        """绘制极端事件空间分布"""
        extreme_temp = ds['t2m'].quantile(0.95, dim='valid_time')
        extreme_temp.plot(
            ax=ax,
            transform=ccrs.PlateCarree(),
            cmap='RdBu_r',
            robust=True
        )
        ax.coastlines()
        ax.set_title('95th Percentile Temperature')
        return ax
        
    def _plot_extreme_events_temporal(self, ds, ax):
        """绘制极端事件时间演变"""
        temp = ds['t2m'].mean(dim=['latitude', 'longitude'])
        q95 = temp.quantile(0.95)
        extreme_days = (temp > q95).astype(int)
        extreme_days.plot(ax=ax)
        ax.set_title('Extreme Temperature Days')
        return ax

    def generate_all_plots(self, ds):
        """生成所有图表并保存"""
        try:
            # 1. 时空分析
            spatial_temporal_fig = self.create_spatial_temporal_analysis(ds)
            spatial_temporal_fig.savefig(
                self.output_dir / 'spatial_temporal_analysis.png', 
                dpi=300, 
                bbox_inches='tight'
            )
            plt.close(spatial_temporal_fig)
            
            # 2. 气候模态
            patterns_fig = self.create_climate_patterns_plot(ds)
            patterns_fig.savefig(
                self.output_dir / 'climate_patterns.png', 
                dpi=300, 
                bbox_inches='tight'
            )
            plt.close(patterns_fig)
            
            # 3. 极端事件分析
            extremes_fig = self.create_extreme_events_analysis(ds)
            extremes_fig.savefig(
                self.output_dir / 'extreme_events.png', 
                dpi=300, 
                bbox_inches='tight'
            )
            plt.close(extremes_fig)
            
            print(f"\nAll figures have been generated and saved in: {self.output_dir}")
            
        except Exception as e:
            print(f"Error generating plots: {str(e)}")

def main():
    try:
        # 加载数据
        data_dir = Path('era5_data')
        ds = xr.open_mfdataset(str(data_dir / 'era5_*.nc'), combine='by_coords')
        
        # 初始化可视化器
        visualizer = ClimateVisualizer()
        
        # 生成所有图表
        visualizer.generate_all_plots(ds)
        
        return ds
        
    except Exception as e:
        print(f"Error in main execution: {str(e)}")
        return None

if __name__ == "__main__":
    ds = main()

In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from eofs.xarray import Eof

class ClimateDiagnostics:
    def __init__(self, data_dir='era5_data', output_dir='analysis_results'):
        """初始化气候诊断分析器"""
        self.data_dir = Path(data_dir)
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)
        self.setup_logging()
        
    def setup_logging(self):
        """设置日志记录"""
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(self.output_dir / 'climate_diagnostics.log'),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger('ClimateDiagnostics')

    def basic_statistics(self, ds):
        """计算基本统计量"""
        self.logger.info("Computing basic statistics...")
        stats = {}
        
        try:
            for var in ds.data_vars:
                # 时间平均
                temporal_mean = ds[var].mean(dim='valid_time')
                temporal_std = ds[var].std(dim='valid_time')
                
                # 空间平均
                spatial_mean = ds[var].mean(dim=['latitude', 'longitude'])
                spatial_std = ds[var].std(dim=['latitude', 'longitude'])
                
                # 季节性
                seasonal = ds[var].groupby('valid_time.season').mean()
                
                # 极值分析
                extremes = {
                    'max': float(ds[var].max()),
                    'min': float(ds[var].min()),
                    'p95': float(ds[var].quantile(0.95)),
                    'p05': float(ds[var].quantile(0.05))
                }
                
                stats[var] = {
                    'temporal_statistics': {
                        'mean': temporal_mean,
                        'std': temporal_std
                    },
                    'spatial_statistics': {
                        'mean': spatial_mean,
                        'std': spatial_std
                    },
                    'seasonal_means': seasonal,
                    'extremes': extremes
                }
            
            return stats
            
        except Exception as e:
            self.logger.error(f"Error in basic statistics: {str(e)}")
            return None

    def circulation_analysis(self, ds):
        """分析大气环流特征"""
        self.logger.info("Analyzing circulation patterns...")
        circulation = {}
        
        try:
            # 计算位势高度梯度
            if 'z' in ds:
                circulation['gph_gradient'] = ds['z'].differentiate('latitude')
                
            # 计算风场辐散
            if all(var in ds for var in ['u10', 'v10']):
                u_diff = ds['u10'].differentiate('longitude')
                v_diff = ds['v10'].differentiate('latitude')
                circulation['divergence'] = u_diff + v_diff
            
            # 计算涡度
            if all(var in ds for var in ['u10', 'v10']):
                u_y = ds['u10'].differentiate('latitude')
                v_x = ds['v10'].differentiate('longitude')
                circulation['vorticity'] = v_x - u_y
            
            return circulation
            
        except Exception as e:
            self.logger.error(f"Error in circulation analysis: {str(e)}")
            return None

    def teleconnection_analysis(self, ds):
        """分析遥相关模态"""
        self.logger.info("Analyzing teleconnection patterns...")
        teleconnections = {}
        
        try:
            # 准备数据
            data = ds['t2m'].stack(spatial=['latitude', 'longitude'])
            
            # 计算EOF
            solver = Eof(data)
            
            # 获取前3个EOF模态
            n_modes = 3
            eofs = solver.eofs(neofs=n_modes)
            pcs = solver.pcs(npcs=n_modes)
            variance_fractions = solver.varianceFraction(neigs=n_modes)
            
            teleconnections = {
                'eofs': eofs,
                'pcs': pcs,
                'variance_explained': variance_fractions
            }
            
            return teleconnections
            
        except Exception as e:
            self.logger.error(f"Error in teleconnection analysis: {str(e)}")
            return None

    def extreme_event_analysis(self, ds):
        """分析极端事件"""
        self.logger.info("Analyzing extreme events...")
        extremes = {}
        
        try:
            # 温度极值分析
            if 't2m' in ds:
                temp = ds['t2m']
                q95 = temp.quantile(0.95, dim='valid_time')
                q05 = temp.quantile(0.05, dim='valid_time')
                
                # 热浪定义：连续3天超过95百分位
                hot_days = (temp > q95).astype(int)
                hot_spells = self._find_persistent_events(hot_days, min_duration=3)
                
                # 寒潮定义：连续3天低于5百分位
                cold_days = (temp < q05).astype(int)
                cold_spells = self._find_persistent_events(cold_days, min_duration=3)
                
                extremes['temperature'] = {
                    'hot_spells': hot_spells,
                    'cold_spells': cold_spells
                }
            
            # 降水极值分析
            if 'tp' in ds:
                precip = ds['tp']
                p95 = precip.quantile(0.95, dim='valid_time')
                
                # 强降水事件
                heavy_precip = (precip > p95).astype(int)
                precip_events = self._find_persistent_events(heavy_precip, min_duration=1)
                
                extremes['precipitation'] = {
                    'heavy_events': precip_events
                }
            
            return extremes
            
        except Exception as e:
            self.logger.error(f"Error in extreme event analysis: {str(e)}")
            return None

    def _find_persistent_events(self, binary_series, min_duration):
        """辅助函数：识别持续性事件"""
        events = []
        current_spell = 0
        
        for t in range(len(binary_series.valid_time)):
            if binary_series[t] == 1:
                current_spell += 1
            else:
                if current_spell >= min_duration:
                    events.append({
                        'start': t - current_spell,
                        'duration': current_spell
                    })
                current_spell = 0
        
        return events

    def generate_report(self, stats, circulation, teleconnections, extremes):
        """生成分析报告"""
        report_path = self.output_dir / 'climate_diagnostics_report.txt'
        
        with open(report_path, 'w') as f:
            f.write("Climate Diagnostics Report\n")
            f.write("========================\n\n")
            
            # 基本统计
            f.write("1. Basic Statistics\n")
            f.write("----------------\n")
            for var, var_stats in stats.items():
                f.write(f"\nVariable: {var}\n")
                f.write(f"Mean: {float(var_stats['temporal_statistics']['mean'].mean()):.2f}\n")
                f.write(f"Std: {float(var_stats['temporal_statistics']['std'].mean()):.2f}\n")
                f.write("Seasonal Variation:\n")
                for season, value in var_stats['seasonal_means'].items():
                    f.write(f"  {season}: {float(value.mean()):.2f}\n")
            
            # 环流特征
            f.write("\n2. Circulation Characteristics\n")
            f.write("---------------------------\n")
            if circulation:
                for metric, value in circulation.items():
                    f.write(f"\n{metric}:\n")
                    f.write(f"Mean: {float(value.mean()):.2f}\n")
                    f.write(f"Std: {float(value.std()):.2f}\n")
            
            # 遥相关模态
            f.write("\n3. Teleconnection Patterns\n")
            f.write("-------------------------\n")
            if teleconnections:
                for i, var in enumerate(teleconnections['variance_explained']):
                    f.write(f"EOF {i+1} explains {float(var)*100:.1f}% of variance\n")
            
            # 极端事件
            f.write("\n4. Extreme Events\n")
            f.write("----------------\n")
            if extremes:
                if 'temperature' in extremes:
                    f.write("\nTemperature Extremes:\n")
                    f.write(f"Hot spells: {len(extremes['temperature']['hot_spells'])}\n")
                    f.write(f"Cold spells: {len(extremes['temperature']['cold_spells'])}\n")
                
                if 'precipitation' in extremes:
                    f.write("\nPrecipitation Extremes:\n")
                    f.write(f"Heavy events: {len(extremes['precipitation']['heavy_events'])}\n")

def main():
    try:
        # 初始化分析器
        analyzer = ClimateDiagnostics()
        
        # 加载数据
        ds = xr.open_mfdataset(str(analyzer.data_dir / 'era5_*.nc'))
        
        # 执行分析
        stats = analyzer.basic_statistics(ds)
        circulation = analyzer.circulation_analysis(ds)
        teleconnections = analyzer.teleconnection_analysis(ds)
        extremes = analyzer.extreme_event_analysis(ds)
        
        # 生成报告
        analyzer.generate_report(stats, circulation, teleconnections, extremes)
        
        print("Analysis completed successfully!")
        return {
            'statistics': stats,
            'circulation': circulation,
            'teleconnections': teleconnections,
            'extremes': extremes
        }
        
    except Exception as e:
        print(f"Error in main execution: {str(e)}")
        return None

if __name__ == "__main__":
    results = main()

In [None]:
class ForecastSkillAnalyzer:
    def __init__(self, data_dir='era5_data', output_dir='skill_assessment'):
        """初始化预测技巧评估器"""
        self.data_dir = Path(data_dir)
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)
        self.logger = self._setup_logger()
        
    def _setup_logger(self):
        """设置日志记录"""
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(self.output_dir / 'skill_assessment.log'),
                logging.StreamHandler()
            ]
        )
        return logging.getLogger('ForecastSkillAnalyzer')

    def calculate_forecast_skill(self, ds, regions, lead_times=range(1, 36)):
        """计算不同提前期的预报技巧"""
        self.logger.info("Calculating forecast skill metrics...")
        skill_metrics = {}
        
        try:
            for region_name, region_coords in regions.items():
                skill_metrics[region_name] = {}
                
                # 提取区域数据
                region_data = ds.sel(**region_coords)
                
                for lead_time in lead_times:
                    # 计算不同技巧评分
                    acc = self._calculate_acc(region_data, lead_time)
                    rmse = self._calculate_rmse(region_data, lead_time)
                    crps = self._calculate_crps(region_data, lead_time)
                    
                    skill_metrics[region_name][lead_time] = {
                        'ACC': acc,
                        'RMSE': rmse,
                        'CRPS': crps
                    }
            
            return skill_metrics
            
        except Exception as e:
            self.logger.error(f"Error calculating forecast skill: {str(e)}")
            return None

    def _calculate_acc(self, ds, lead_time):
        """计算距平相关系数(Anomaly Correlation Coefficient)"""
        try:
            # 获取观测和预报数据
            obs = ds['t2m'].values
            fcst = np.roll(obs, -lead_time, axis=0)  # 模拟预报
            
            # 计算气候态
            climatology = ds['t2m'].groupby('valid_time.dayofyear').mean()
            
            # 计算距平
            obs_anom = obs - climatology
            fcst_anom = fcst - climatology
            
            # 计算相关系数
            acc = np.corrcoef(obs_anom.flatten(), fcst_anom.flatten())[0, 1]
            
            return acc
            
        except Exception as e:
            self.logger.error(f"Error calculating ACC: {str(e)}")
            return None

    def _calculate_rmse(self, ds, lead_time):
        """计算均方根误差(Root Mean Square Error)"""
        try:
            obs = ds['t2m'].values
            fcst = np.roll(obs, -lead_time, axis=0)  # 模拟预报
            
            # 计算RMSE
            rmse = np.sqrt(np.mean((obs - fcst) ** 2))
            
            return rmse
            
        except Exception as e:
            self.logger.error(f"Error calculating RMSE: {str(e)}")
            return None

    def _calculate_crps(self, ds, lead_time):
        """计算连续概率评分(Continuous Ranked Probability Score)"""
        try:
            obs = ds['t2m'].values
            fcst = np.roll(obs, -lead_time, axis=0)  # 模拟预报
            
            # 生成预报集合
            ensemble_size = 10
            noise = np.random.normal(0, scale=0.1, size=(ensemble_size,) + fcst.shape)
            ensemble_fcst = fcst[np.newaxis, :] + noise
            
            # 计算CRPS
            crps = np.mean([np.abs(ensemble_fcst[i] - obs) for i in range(ensemble_size)])
            
            return crps
            
        except Exception as e:
            self.logger.error(f"Error calculating CRPS: {str(e)}")
            return None

    def analyze_predictability_sources(self, ds, regions):
        """分析预测技巧的来源"""
        self.logger.info("Analyzing predictability sources...")
        predictability = {}
        
        try:
            for region_name, region_coords in regions.items():
                region_data = ds.sel(**region_coords)
                
                # 分析季节依赖性
                seasonal_skill = self._analyze_seasonal_dependence(region_data)
                
                # 分析与遥相关模态的关系
                teleconnection_impact = self._analyze_teleconnection_impact(region_data)
                
                # 分析初始条件的影响
                initial_condition_impact = self._analyze_initial_conditions(region_data)
                
                predictability[region_name] = {
                    'seasonal_dependence': seasonal_skill,
                    'teleconnection_impact': teleconnection_impact,
                    'initial_condition_impact': initial_condition_impact
                }
            
            return predictability
            
        except Exception as e:
            self.logger.error(f"Error analyzing predictability: {str(e)}")
            return None

    def generate_skill_report(self, skill_metrics, predictability):
        """生成预测技巧评估报告"""
        report_path = self.output_dir / 'forecast_skill_report.txt'
        
        with open(report_path, 'w') as f:
            f.write("Forecast Skill Assessment Report\n")
            f.write("==============================\n\n")
            
            # 技巧评分
            f.write("1. Skill Metrics by Region\n")
            f.write("------------------------\n")
            for region, metrics in skill_metrics.items():
                f.write(f"\n{region}:\n")
                for lead_time, scores in metrics.items():
                    f.write(f"\nLead Time {lead_time} days:\n")
                    for metric, value in scores.items():
                        f.write(f"  {metric}: {value:.3f}\n")
            
            # 预测技巧来源
            f.write("\n2. Sources of Predictability\n")
            f.write("--------------------------\n")
            for region, sources in predictability.items():
                f.write(f"\n{region}:\n")
                
                f.write("\nSeasonal Dependence:\n")
                for season, skill in sources['seasonal_dependence'].items():
                    f.write(f"  {season}: {skill:.3f}\n")
                
                f.write("\nTeleconnection Impact:\n")
                for mode, impact in sources['teleconnection_impact'].items():
                    f.write(f"  {mode}: {impact:.3f}\n")
                
                f.write("\nInitial Condition Impact:\n")
                for var, impact in sources['initial_condition_impact'].items():
                    f.write(f"  {var}: {impact:.3f}\n")

        self.logger.info(f"Skill assessment report generated: {report_path}")
        return report_path

    def plot_skill_metrics(self, skill_metrics):
        """可视化预测技巧评估结果"""
        self.logger.info("Generating skill visualization...")
        figures = {}
        
        try:
            # 1. 提前期-技巧图
            fig1, ax1 = plt.subplots(figsize=(12, 8))
            for region, metrics in skill_metrics.items():
                lead_times = list(metrics.keys())
                acc_values = [m['ACC'] for m in metrics.values()]
                ax1.plot(lead_times, acc_values, marker='o', label=region)
            
            ax1.set_xlabel('Lead Time (days)')
            ax1.set_ylabel('ACC')
            ax1.set_title('Forecast Skill by Lead Time')
            ax1.legend()
            ax1.grid(True)
            
            figures['lead_time_skill'] = fig1
            
            # 保存图片
            for name, fig in figures.items():
                fig.savefig(self.output_dir / f'skill_{name}.png', dpi=300, bbox_inches='tight')
                plt.close(fig)
            
            return figures
            
        except Exception as e:
            self.logger.error(f"Error plotting skill metrics: {str(e)}")
            return None

def main():
    try:
        # 初始化分析器
        analyzer = ForecastSkillAnalyzer()
        
        # 加载数据
        ds = xr.open_mfdataset(str(analyzer.data_dir / 'era5_*.nc'))
        
        # 定义分析区域
        regions = {
            'north_europe': {'latitude': slice(55, 65), 'longitude': slice(-10, 30)},
            'central_europe': {'latitude': slice(45, 55), 'longitude': slice(-5, 20)},
            'south_europe': {'latitude': slice(35, 45), 'longitude': slice(-10, 30)}
        }
        
        # 计算预报技巧
        skill_metrics = analyzer.calculate_forecast_skill(ds, regions)
        
        # 分析预测技巧来源
        predictability = analyzer.analyze_predictability_sources(ds, regions)
        
        # 生成报告和可视化
        report_path = analyzer.generate_skill_report(skill_metrics, predictability)
        figures = analyzer.plot_skill_metrics(skill_metrics)
        
        print("Forecast skill assessment completed!")
        return {
            'skill_metrics': skill_metrics,
            'predictability': predictability,
            'figures': figures
        }
        
    except Exception as e:
        print(f"Error in main execution: {str(e)}")
        return None

if __name__ == "__main__":
    results = main()