ERA5

- mean, median, std, min, max for precipitation values by year and year-month

- maps for both results


Yearly

In [None]:
import xarray as xr
import pandas as pd
import os
import numpy as np

# Define the folder containing NetCDF files
folder_path = r"E:\IPMA\ERA5\Precipitation\1raw_year_1979_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize an empty list to store yearly DataFrames
df_list = []

# Process each file (each corresponding to a year)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    ds = xr.open_dataset(file_path)  # Load without specifying chunks
    ds = ds.chunk({'valid_time': 500})  # Rechunk after loading 

    # Extract the year from the 'valid_time' dimension
    year = ds['valid_time'].dt.year.values[0]  # Extract the year from the first time step
    
    # Get the total precipitation data as a numpy array
    precip_data = ds['tp'].values  # Numpy array (all time steps)

    # Compute statistics using numpy's nan functions
    mean = np.nanmean(precip_data, axis=0)
    median = np.nanmedian(precip_data, axis=0)
    std = np.nanstd(precip_data, axis=0)
    max_ = np.nanmax(precip_data, axis=0)
    min_ = np.nanmin(precip_data, axis=0)
    total = np.nansum(precip_data, axis=0)  # Compute total precipitation
    
    # Combine results into a new dataset
    stats = xr.Dataset({
        'Mean': (['latitude', 'longitude'], mean),
        'Median': (['latitude', 'longitude'], median),
        'Std': (['latitude', 'longitude'], std),
        'Max': (['latitude', 'longitude'], max_),
        'Min': (['latitude', 'longitude'], min_),
        'Total_Precipitation': (['latitude', 'longitude'], total)  # Add total precipitation
    }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})

    # Convert to DataFrame and restructure
    stats_df = stats.to_dataframe().reset_index()
    stats_df['Year'] = year  # Add the year directly from valid_time
    stats_df = stats_df.set_index(['Year', 'latitude', 'longitude'])
    
    # Append to list
    df_list.append(stats_df)

# Concatenate all years into a single DataFrame
df_final = pd.concat(df_list)

# Display the DataFrame
print(df_final)


In [None]:
# Convert the pandas DataFrame back to an xarray Dataset
df_final_xr = df_final.reset_index().set_index(['Year', 'latitude', 'longitude'])
df_final_xr = df_final_xr.to_xarray()

# Save the xarray Dataset to a NetCDF file
output_file_path = r"E:\IPMA\ERA5\Precipitation\yearly_precipitation_stats.nc"
df_final_xr.to_netcdf(output_file_path)

print(f"Dataset saved to: {output_file_path}")


In [None]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# File path
file_path = r"E:\IPMA\ERA5\Precipitation\yearly_precipitation_stats.nc"
output_folder = r"E:\IPMA\ERA5\Precipitation\maps_year"
os.makedirs(output_folder, exist_ok=True)

# Load dataset
ds = xr.open_dataset(file_path)

# Variable to plot
var = 'Total_Precipitation'
title = 'Total Precipitation (m)'

# Global color scale limits
vmin = ds[var].min().item()
vmax = ds[var].max().item()

# Colormap
cmap = 'Blues'

# Projection (assume data is in PlateCarree, common for lat/lon grids)
data_crs = ccrs.PlateCarree()

# Loop through years
for year in ds['Year'].values:
    fig, ax = plt.subplots(figsize=(8, 6),
                           subplot_kw={'projection': ccrs.PlateCarree()})

    # Select data for current year
    data = ds[var].sel(Year=year)

    # Plot the data
    im = ax.pcolormesh(ds['longitude'], ds['latitude'], data,
                       cmap=cmap, vmin=vmin, vmax=vmax,
                       shading='auto', transform=data_crs)

    # Add features (land, ocean, borders)
    ax.coastlines(resolution='10m')
    ax.add_feature(cfeature.BORDERS, linewidth=0.5)
    ax.add_feature(cfeature.LAND, facecolor='lightgray')
    ax.add_feature(cfeature.OCEAN, facecolor='lightblue')
    ax.set_extent([-12, 36, 34, 66], crs=data_crs)  # Adjust to the working area

    # Gridlines (bottom and left only)
    gl = ax.gridlines(draw_labels=True, linewidth=0)
    gl.top_labels = False
    gl.right_labels = False
    gl.bottom_labels = True
    gl.left_labels = True

    # Title and colorbar
    ax.set_title(f'{title} - {year}', fontsize=12)

    # Create a colorbar with more space
    cbar = fig.colorbar(im, ax=ax, orientation='horizontal', label='m', shrink=0.7, pad=0.1)
    
    # Adjust the colorbar's position
    cbar.ax.yaxis.set_ticks_position('right')  # Put colorbar ticks on the right side

    # Save the plot
    plt.tight_layout()
    plt.savefig(os.path.join(output_folder, f"total_precipitation_{year}.png"), dpi=150)
    plt.close(fig)

print("✅ Mean temperature maps with adjusted colorbar spacing generated!")


Monthly

In [None]:
import xarray as xr
import pandas as pd
import os
import numpy as np

# Define the folder containing NetCDF files
folder_path = r"E:\IPMA\ERA5\Precipitation\1raw_month_1979_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize an empty list to store monthly DataFrames
df_list = []

# Process each file (each corresponding to a year)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    ds = xr.open_dataset(file_path)  # Load without specifying chunks
    ds = ds.chunk({'valid_time': 500})  # Rechunk after loading 
    
    # Extract year and month from the 'valid_time' dimension
    ds.coords['year'] = ds['valid_time'].dt.year
    ds.coords['month'] = ds['valid_time'].dt.month
    
    # Get the total precipitation data as a numpy array (time, latitude, longitude)
    precip_data = ds['tp'].values
    
    # Calculate statistics using numpy's nan functions along the 'valid_time' dimension
    mean = np.nanmean(precip_data, axis=0)
    median = np.nanmedian(precip_data, axis=0)
    std = np.nanstd(precip_data, axis=0)
    max_ = np.nanmax(precip_data, axis=0)
    min_ = np.nanmin(precip_data, axis=0)
    total = np.nansum(precip_data, axis=0)  # Compute total precipitation for the month
    
    # Create a new xarray Dataset for the statistics
    stats = xr.Dataset({
        'Mean': (['latitude', 'longitude'], mean),
        'Median': (['latitude', 'longitude'], median),
        'Std': (['latitude', 'longitude'], std),
        'Max': (['latitude', 'longitude'], max_),
        'Min': (['latitude', 'longitude'], min_),
        'Total_Precipitation': (['latitude', 'longitude'], total)  # Add total precipitation
    }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})
    
    # Convert to DataFrame and restructure
    stats_df = stats.to_dataframe().reset_index()
    stats_df['Year'] = ds['year'].values[0]  # Add the year directly from valid_time
    stats_df['Month'] = ds['month'].values[0]  # Add the month directly from valid_time
    stats_df = stats_df.set_index(['Year', 'Month', 'latitude', 'longitude'])
    
    # Append to list
    df_list.append(stats_df)

# Concatenate all months into a single DataFrame
df_final = pd.concat(df_list)

# Display the DataFrame
print(df_final)


In [None]:
# Convert the pandas DataFrame back to an xarray Dataset
df_final_xr = df_final.reset_index().set_index(['Year', 'Month', 'latitude', 'longitude'])
df_final_xr = df_final_xr.to_xarray()

# Save the xarray Dataset to a NetCDF file
output_file_path = r"E:\IPMA\ERA5\Precipitation\monthly_precipitation_stats.nc"
df_final_xr.to_netcdf(output_file_path)

print(f"Dataset saved to: {output_file_path}")


In [None]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# File path
file_path = r"E:\IPMA\ERA5\Precipitation\monthly_precipitation_stats.nc"
output_folder = r"E:\IPMA\ERA5\Precipitation\maps_month"
os.makedirs(output_folder, exist_ok=True)

# Load dataset
ds = xr.open_dataset(file_path)

# Variable to plot
var = 'Total_Precipitation'
title = 'Total Precipitation (m)'

# Global color scale limits
vmin = ds[var].min().item()
vmax = ds[var].max().item()

# Colormap
cmap = 'Blues'

# Coordinates
lat = ds['latitude']
lon = ds['longitude']

# Loop through all years and months
for year in ds['Year'].values:
    for month in ds['Month'].values:
        data = ds[var].sel(Year=year, Month=month)

        # Skip empty or all-NaN values
        if np.isnan(data).all():
            continue

        # Plotting
        fig = plt.figure(figsize=(7, 6))
        ax = plt.axes(projection=ccrs.PlateCarree())
        ax.set_extent([-12, 36, 34, 66], crs=ccrs.PlateCarree())

        im = ax.pcolormesh(lon, lat, data, cmap=cmap, vmin=vmin, vmax=vmax,
                           shading='auto', transform=ccrs.PlateCarree())

        ax.set_title(f'{title} - {year} / {month:02d}', fontsize=12)
        ax.coastlines(resolution='10m', linewidth=0.8)
        ax.add_feature(cfeature.BORDERS, linewidth=0.5)
        ax.add_feature(cfeature.LAND, edgecolor='black', facecolor='none')

        # Gridlines (bottom and left only)
        gl = ax.gridlines(draw_labels=True, linewidth=0)
        gl.top_labels = False
        gl.right_labels = False
        gl.bottom_labels = True
        gl.left_labels = True

        # Colorbar
        cbar = plt.colorbar(im, ax=ax, orientation='horizontal', shrink=0.7, pad=0.1)
        cbar.set_label('m')
        cbar.ax.tick_params(labelsize=8)

        # Save figure
        plt.tight_layout()
        filename = f"total_precipitation_{year}{month:02d}.png"
        plt.savefig(os.path.join(output_folder, filename), dpi=150)
        plt.close(fig)

print("✅ Monthly temperature maps generated for all years and months!")


In [None]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# File path
file_path = r"E:\IPMA\ERA5\Precipitation\monthly_precipitation_stats.nc"
output_folder = r"E:\IPMA\ERA5\Precipitation\maps_month"
os.makedirs(output_folder, exist_ok=True)

# Load dataset
ds = xr.open_dataset(file_path)

# Variable to plot
var = 'Total_Precipitation'
title = 'Total Precipitation (m)'

# Global color scale limits
vmin = ds[var].min().item()
vmax = ds[var].max().item()

# Colormap
cmap = 'Blues'

# Coordinates
lat = ds['latitude']
lon = ds['longitude']

# Loop through all years
for year in ds['Year'].values:
    fig, axs = plt.subplots(3, 4, figsize=(15, 10),
                            subplot_kw={'projection': ccrs.PlateCarree()})
    fig.suptitle(f'{title} - {year}', fontsize=16)

    for i, month in enumerate(ds['Month'].values):
        row, col = divmod(i, 4)
        ax = axs[row, col]

        # Extract data
        data = ds[var].sel(Year=year, Month=month)

        # Skip if no data
        if np.isnan(data).all():
            ax.set_visible(False)
            continue

        ax.set_extent([-12, 36, 34, 66], crs=ccrs.PlateCarree())

        im = ax.pcolormesh(lon, lat, data, cmap=cmap, vmin=vmin, vmax=vmax,
                           shading='auto', transform=ccrs.PlateCarree())

        ax.set_title(f'Month {month:02d}', fontsize=10)
        ax.coastlines(resolution='10m', linewidth=0.6)
        ax.add_feature(cfeature.BORDERS, linewidth=0.4)
        ax.add_feature(cfeature.LAND, edgecolor='black', facecolor='none')

        # Gridlines (bottom and left only)
        gl = ax.gridlines(draw_labels=True, linewidth=0)
        gl.top_labels = False
        gl.right_labels = False
        gl.bottom_labels = row == 2
        gl.left_labels = col == 0

    # Add a single colorbar for all subplots
    cbar_ax = fig.add_axes([0.92, 0.25, 0.015, 0.5])
    cbar = fig.colorbar(im, cax=cbar_ax)
    cbar.set_label('m')

    # Save figure
    plt.tight_layout(rect=[0, 0, 0.9, 0.95])
    filename = f"total_precipitation_year_{year}.png"
    plt.savefig(os.path.join(output_folder, filename), dpi=150)
    plt.close(fig)

print("✅ Yearly temperature grids generated (3x4 months per year)!")


Daily

In [7]:
import xarray as xr
import pandas as pd
import os
import numpy as np

# Define the folder containing NetCDF files
folder_path = r"D:\IPMA\ERA5\Precipitation\1raw_year_1979_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Filter only years 1999–2024
file_list = [f for f in file_list if 1999 <= int(f.split('_')[-1][:4]) <= 2024]

print(f"Processing {len(file_list)} files from 1999 to 2024...")

# Initialize an empty list to store daily DataFrames
df_list = []

# Process each file (each corresponding to a year/month file)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    ds = xr.open_dataset(file_path)  
    ds = ds.chunk({'valid_time': 500})  
    
    # Add year, month, day as coordinates
    ds = ds.assign_coords(
        year=ds['valid_time'].dt.year,
        month=ds['valid_time'].dt.month,
        day=ds['valid_time'].dt.day
    )
    
    # Loop over each unique day in this file
    for day_val in np.unique(ds['day'].values):
        ds_day = ds.sel(valid_time=ds['valid_time'].dt.day == day_val)
        if ds_day['valid_time'].size == 0:
            continue
        
        precip_data = ds_day['tp'].values  # (time_in_day, lat, lon)

        # Compute daily statistics
        mean = np.nanmean(precip_data, axis=0)
        median = np.nanmedian(precip_data, axis=0)
        std = np.nanstd(precip_data, axis=0)
        max_ = np.nanmax(precip_data, axis=0)
        min_ = np.nanmin(precip_data, axis=0)
        total = np.nansum(precip_data, axis=0)

        # Create Dataset for this day's stats
        stats = xr.Dataset({
            'Mean': (['latitude', 'longitude'], mean),
            'Median': (['latitude', 'longitude'], median),
            'Std': (['latitude', 'longitude'], std),
            'Max': (['latitude', 'longitude'], max_),
            'Min': (['latitude', 'longitude'], min_),
            'Total_Precipitation': (['latitude', 'longitude'], total)
        }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})

        # Convert to DataFrame
        stats_df = stats.to_dataframe().reset_index()

        # Add time labels
        year_val = int(ds_day['year'].values[0])
        month_val = int(ds_day['month'].values[0])
        stats_df['Year'] = year_val
        stats_df['Month'] = month_val
        stats_df['Day'] = int(day_val)

        # Set multi-index
        stats_df = stats_df.set_index(['Year', 'Month', 'Day', 'latitude', 'longitude'])

        df_list.append(stats_df)

# Concatenate all daily results
df_final = pd.concat(df_list)

Processing 26 files from 1999 to 2024...


In [8]:
# Convert the DataFrame back to an xarray Dataset
df_final_xr = df_final.reset_index().set_index(['Year', 'Month', 'Day', 'latitude', 'longitude'])
df_final_xr = df_final_xr.to_xarray()

# Save to NetCDF
output_file_path = r"D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_1999_2024.nc"
df_final_xr.to_netcdf(output_file_path)

print(f"Dataset saved to: {output_file_path}")

Dataset saved to: D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_1999_2024.nc
