ERA5

- mean, median, std, min, max for temperature values by year and year-month

- maps for both results


In [4]:
import xarray as xr
import pandas as pd
import os

# Define the folder containing NetCDF files
folder_path = r"E:\IPMA\ERA5\Temperature\2conversion_year_1979_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize an empty list to store yearly DataFrames
df_list = []

# Process each file (each corresponding to a year)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    # Open the dataset using memory-efficient lazy loading
    ds = xr.open_dataset(file_path, chunks={'valid_time': 500})  # Process in chunks
    
    # Extract the year from the 'valid_time' dimension
    year = ds['valid_time'].dt.year.values[0]  # Extract the year from the first time step
    
    # Compute statistics using individual reduce calls
    mean = ds['t2m'].mean(dim='valid_time')
    median = ds['t2m'].median(dim='valid_time')
    std = ds['t2m'].std(dim='valid_time')
    max_ = ds['t2m'].max(dim='valid_time')
    min_ = ds['t2m'].min(dim='valid_time')
    
    # Combine results into a single dataset
    stats = xr.Dataset({'Mean': mean, 'Median': median, 'Std': std, 'Max': max_, 'Min': min_})

    # Convert to a DataFrame and restructure
    stats_df = stats.to_dataframe().reset_index()
    stats_df['Year'] = year  # Add the year directly from valid_time
    stats_df = stats_df.set_index(['Year', 'latitude', 'longitude'])
    
    # Append to list
    df_list.append(stats_df)

# Concatenate all years into a single DataFrame
df_final = pd.concat(df_list)

# Display the DataFrame
print(df_final)


                         number       Mean     Median       Std        Max  \
Year latitude longitude                                                      
1979 66.0     -12.00          0   2.255776   2.729889  3.252714   8.246246   
              -11.75          0   2.289954   2.743164  3.252387   8.375153   
              -11.50          0   2.330913   2.761765  3.262226   8.488434   
              -11.25          0   2.373601   2.771454  3.270666   8.593903   
              -11.00          0   2.420143   2.807159  3.273652   8.683746   
...                         ...        ...        ...       ...        ...   
2024 34.0      35.00          0  22.002697  21.498932  4.703490  30.666901   
               35.25          0  22.159876  21.750397  4.771335  30.389313   
               35.50          0  20.894886  20.781891  5.831027  33.362213   
               35.75          0  16.874861  16.751373  7.154267  32.466705   
               36.00          0  14.095114  13.774689  8.582670 

In [5]:
df_final.to_csv(r"E:\IPMA\ERA5\Temperature\yearly_temperature_stats.csv")

In [6]:
import xarray as xr
import pandas as pd
import os

# Define the folder containing NetCDF files
folder_path = r"E:\IPMA\ERA5\Temperature\2conversion_year_1979_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize an empty list to store monthly DataFrames
df_list = []

# Process each file (each corresponding to a year)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    # Open the dataset using memory-efficient lazy loading
    ds = xr.open_dataset(file_path, chunks={'valid_time': 500})  # Process in chunks
    
    # Extract year and month from the 'valid_time' dimension
    ds.coords['year'] = ds['valid_time'].dt.year
    ds.coords['month'] = ds['valid_time'].dt.month
    
    # Compute statistics grouped by year and month (use the dimension names directly)
    mean = ds['t2m'].groupby(['year', 'month']).mean(dim='valid_time')
    median = ds['t2m'].groupby(['year', 'month']).median(dim='valid_time')
    std = ds['t2m'].groupby(['year', 'month']).std(dim='valid_time')
    max_ = ds['t2m'].groupby(['year', 'month']).max(dim='valid_time')
    min_ = ds['t2m'].groupby(['year', 'month']).min(dim='valid_time')
    
    # Combine results into a single dataset
    stats = xr.Dataset({'Mean': mean, 'Median': median, 'Std': std, 'Max': max_, 'Min': min_})

    # Convert to a DataFrame and restructure
    stats_df = stats.to_dataframe().reset_index()
    stats_df['Year'] = stats_df['year']
    stats_df['Month'] = stats_df['month']
    stats_df = stats_df.set_index(['Year', 'Month', 'latitude', 'longitude'])
    
    # Append to list
    df_list.append(stats_df)

# Concatenate all years into a single DataFrame
df_final = pd.concat(df_list)

# Display the DataFrame
print(df_final)


                               year  month  number       Mean     Median  \
Year Month latitude longitude                                              
1979 1     66.0     -12.00     1979      1       0  -0.956760  -0.553314   
                    -11.75     1979      1       0  -0.942214  -0.516647   
                    -11.50     1979      1       0  -0.933165  -0.515823   
                    -11.25     1979      1       0  -0.918887  -0.511261   
                    -11.00     1979      1       0  -0.893903  -0.472977   
...                             ...    ...     ...        ...        ...   
2024 12    34.0      35.00     2024     12       0  17.479179  17.610245   
                     35.25     2024     12       0  17.684513  17.800568   
                     35.50     2024     12       0  15.112290  15.030884   
                     35.75     2024     12       0   9.835276   9.426178   
                     36.00     2024     12       0   5.694470   5.228882   

           

In [7]:
df_final.to_csv(r"E:\IPMA\ERA5\Temperature\monthly_temperature_stats.csv")