ERA5

- mean, median, std, min, max for temperature values by year and year-month

- maps for both results


Yearly

In [24]:
import xarray as xr
import pandas as pd
import os
import numpy as np

# Define the folder containing NetCDF files
folder_path = r"E:\IPMA\ERA5\Temperature\2conversion_year_1979_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize an empty list to store yearly DataFrames
df_list = []

# Process each file (each corresponding to a year)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    # Open the dataset using memory-efficient lazy loading
    ds = xr.open_dataset(file_path, chunks={'valid_time': 500})  # Process in chunks
    
    # Extract the year from the 'valid_time' dimension
    year = ds['valid_time'].dt.year.values[0]  # Extract the year from the first time step
    
    # Get the temperature data as a numpy array
    temp_data = ds['t2m'].values  # Numpy array (all time steps)

    # Compute statistics using numpy's nan functions
    mean = np.nanmean(temp_data, axis=0)
    median = np.nanmedian(temp_data, axis=0)
    std = np.nanstd(temp_data, axis=0)
    max_ = np.nanmax(temp_data, axis=0)
    min_ = np.nanmin(temp_data, axis=0)
    
    # Combine results into a new dataset
    stats = xr.Dataset({
        'Mean': (['latitude', 'longitude'], mean),
        'Median': (['latitude', 'longitude'], median),
        'Std': (['latitude', 'longitude'], std),
        'Max': (['latitude', 'longitude'], max_),
        'Min': (['latitude', 'longitude'], min_)
    }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})

    # Convert to DataFrame and restructure
    stats_df = stats.to_dataframe().reset_index()
    stats_df['Year'] = year  # Add the year directly from valid_time
    stats_df = stats_df.set_index(['Year', 'latitude', 'longitude'])
    
    # Append to list
    df_list.append(stats_df)

# Concatenate all years into a single DataFrame
df_final = pd.concat(df_list)

# Display the DataFrame
print(df_final)


                              Mean     Median       Std        Max        Min  \
Year latitude longitude                                                         
1979 66.0     -12.00      2.255764   2.729889  3.252716   8.246246  -7.719116   
              -11.75      2.289942   2.743164  3.252384   8.375153  -7.681305   
              -11.50      2.330900   2.761765  3.262223   8.488434  -7.620758   
              -11.25      2.373588   2.771454  3.270669   8.593903  -7.581696   
              -11.00      2.420127   2.807159  3.273650   8.683746  -7.583649   
...                            ...        ...       ...        ...        ...   
2024 34.0      35.00     22.002655  21.498932  4.703494  30.666901  11.472931   
               35.25     22.159859  21.750397  4.771343  30.389313  11.465302   
               35.50     20.894817  20.781891  5.831040  33.362213   7.023712   
               35.75     16.874855  16.751373  7.154270  32.466705  -0.911835   
               36.00     14.

In [26]:
# Convert the pandas DataFrame back to an xarray Dataset
df_final_xr = df_final.reset_index().set_index(['Year', 'latitude', 'longitude'])
df_final_xr = df_final_xr.to_xarray()

# Save the xarray Dataset to a NetCDF file
output_file_path = r"E:\IPMA\ERA5\Temperature\yearly_temperature_stats.nc"
df_final_xr.to_netcdf(output_file_path)

print(f"Dataset saved to: {output_file_path}")


Dataset saved to: E:\IPMA\ERA5\Temperature\yearly_temperature_stats.nc


Monthly

In [27]:
import xarray as xr
import pandas as pd
import os
import numpy as np

# Define the folder containing NetCDF files
folder_path = r"E:\IPMA\ERA5\Temperature\2conversion_year_1979_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize an empty list to store monthly DataFrames
df_list = []

# Process each file (each corresponding to a year)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    # Open the dataset using memory-efficient lazy loading
    ds = xr.open_dataset(file_path, chunks={'valid_time': 500})  # Process in chunks
    
    # Extract year and month from the 'valid_time' dimension
    ds.coords['year'] = ds['valid_time'].dt.year
    ds.coords['month'] = ds['valid_time'].dt.month
    
    # Get the temperature data as a numpy array (time, latitude, longitude)
    temp_data = ds['t2m'].values
    
    # Calculate statistics using numpy's nan functions along the 'valid_time' dimension
    mean = np.nanmean(temp_data, axis=0)
    median = np.nanmedian(temp_data, axis=0)
    std = np.nanstd(temp_data, axis=0)
    max_ = np.nanmax(temp_data, axis=0)
    min_ = np.nanmin(temp_data, axis=0)
    
    # Create a new xarray Dataset for the statistics
    stats = xr.Dataset({
        'Mean': (['latitude', 'longitude'], mean),
        'Median': (['latitude', 'longitude'], median),
        'Std': (['latitude', 'longitude'], std),
        'Max': (['latitude', 'longitude'], max_),
        'Min': (['latitude', 'longitude'], min_)
    }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})
    
    # Convert to DataFrame and restructure
    stats_df = stats.to_dataframe().reset_index()
    stats_df['Year'] = ds['year'].values[0]  # Add the year directly from valid_time
    stats_df['Month'] = ds['month'].values[0]  # Add the month directly from valid_time
    stats_df = stats_df.set_index(['Year', 'Month', 'latitude', 'longitude'])
    
    # Append to list
    df_list.append(stats_df)

# Concatenate all years into a single DataFrame
df_final = pd.concat(df_list)

# Display the DataFrame
print(df_final)


                                    Mean     Median       Std        Max  \
Year Month latitude longitude                                              
1979 1     66.0     -12.00      2.255764   2.729889  3.252716   8.246246   
                    -11.75      2.289942   2.743164  3.252384   8.375153   
                    -11.50      2.330900   2.761765  3.262223   8.488434   
                    -11.25      2.373588   2.771454  3.270669   8.593903   
                    -11.00      2.420127   2.807159  3.273650   8.683746   
...                                  ...        ...       ...        ...   
2024 1     34.0      35.00     22.002655  21.498932  4.703494  30.666901   
                     35.25     22.159859  21.750397  4.771343  30.389313   
                     35.50     20.894817  20.781891  5.831040  33.362213   
                     35.75     16.874855  16.751373  7.154270  32.466705   
                     36.00     14.095116  13.774689  8.582687  33.381012   

           

In [28]:
# Convert the pandas DataFrame back to an xarray Dataset
df_final_xr = df_final.reset_index().set_index(['Year', 'latitude', 'longitude'])
df_final_xr = df_final_xr.to_xarray()

# Save the xarray Dataset to a NetCDF file
output_file_path = r"E:\IPMA\ERA5\Temperature\monthly_temperature_stats.nc"
df_final_xr.to_netcdf(output_file_path)

print(f"Dataset saved to: {output_file_path}")


Dataset saved to: E:\IPMA\ERA5\Temperature\monthly_temperature_stats.nc
