Precipitation

Daily Statistics

In [None]:
import xarray as xr
import pandas as pd
import os
import numpy as np
from tqdm import tqdm  # For progress bars

# Define the folder containing NetCDF files
folder_path = r"D:\IPMA\ERA5\Precipitation\1raw_year_1979_2024"
output_folder = r"D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_yearly"

os.makedirs(output_folder, exist_ok=True)

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Filter only years 2003–2024
file_list = [f for f in file_list if 2003 <= int(f.split('_')[-1][:4]) <= 2024]

print(f"Processing {len(file_list)} files from 2003 to 2024...")

# Loop over files with a progress bar
for file in tqdm(file_list, desc="Files processed"):
    file_path = os.path.join(folder_path, file)
    
    # Extract year from filename
    year_val = int(file.split('_')[-1][:4])
    print(f"\n📂 Processing year {year_val}...")
    
    ds = xr.open_dataset(file_path)  
    ds = ds.chunk({'valid_time': 500})  
    
    # Add year, month, day as coordinates
    ds = ds.assign_coords(
        year=ds['valid_time'].dt.year,
        month=ds['valid_time'].dt.month,
        day=ds['valid_time'].dt.day
    )
    
    # Temporary list for this year
    df_list = []
    
    # Loop over each unique month in this file
    for month_val in np.unique(ds['month'].values):
        days_in_month = np.unique(ds['day'].values[ds['month'].values == month_val])
        
        # Loop over each day in the month with a progress bar
        for day_val in tqdm(days_in_month, desc=f"Month {month_val}", leave=False):
            ds_day = ds.sel(
                valid_time=(ds['month'] == month_val) & (ds['day'] == day_val)
            )
            if ds_day['valid_time'].size == 0:
                continue
            
            precip_data = ds_day['tp'].values

            # Compute daily statistics
            mean = np.nanmean(precip_data, axis=0)
            median = np.nanmedian(precip_data, axis=0)
            std = np.nanstd(precip_data, axis=0)
            max_ = np.nanmax(precip_data, axis=0)
            min_ = np.nanmin(precip_data, axis=0)
            total = np.nansum(precip_data, axis=0)

            # Create Dataset for this day's stats
            stats = xr.Dataset({
                'Mean': (['latitude', 'longitude'], mean),
                'Median': (['latitude', 'longitude'], median),
                'Std': (['latitude', 'longitude'], std),
                'Max': (['latitude', 'longitude'], max_),
                'Min': (['latitude', 'longitude'], min_),
                'Total_Precipitation': (['latitude', 'longitude'], total)
            }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})

            # Convert to DataFrame
            stats_df = stats.to_dataframe().reset_index()

            # Add time labels
            stats_df['Year'] = year_val
            stats_df['Month'] = month_val
            stats_df['Day'] = int(day_val)

            # Set multi-index
            stats_df = stats_df.set_index(['Year', 'Month', 'Day', 'latitude', 'longitude'])
            df_list.append(stats_df)
    
    # Save this year's results
    if df_list:  # Only save if data exists
        df_final = pd.concat(df_list)
        df_final_xr = df_final.reset_index().set_index(['Year', 'Month', 'Day', 'latitude', 'longitude'])
        df_final_xr = df_final_xr.to_xarray()
        
        output_file_path = os.path.join(output_folder, f"daily_precipitation_stats_{year_val}.nc")
        df_final_xr.to_netcdf(output_file_path)
        print(f"✅ Saved {output_file_path}")

print("\n🎉 All yearly files saved successfully!")


Convert lat and lon from 0.25 to 0.75

In [None]:
import xarray as xr
import numpy as np
import os
from tqdm import tqdm

# Input/output folders
input_folder = r"D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_yearly"
output_folder = r"D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_yearly_regridded"
os.makedirs(output_folder, exist_ok=True)

# Get list of input NetCDF files
file_list = sorted([f for f in os.listdir(input_folder) if f.endswith(".nc")])
print(f"Found {len(file_list)} yearly files to regrid...")

# Regrid parameters
factor_lat = 3
factor_lon = 3

# Target grid
lat_target = np.arange(34.5, 66.0 + 0.001, 0.75)   # 43 lats
lon_target = np.arange(-12.0, 36.0 + 0.001, 0.75)  # 65 lons

# Loop through files
for file in tqdm(file_list, desc="Regridding yearly files"):
    file_path = os.path.join(input_folder, file)
    year_val = file.split('_')[-1][:4]  # extract year from filename
    
    # Open dataset
    ds = xr.open_dataset(file_path)
    
    # Step 1: Coarsen (aggregation)
    ds_coarse = xr.Dataset()
    for var in ["Mean", "Median", "Std", "Max", "Min", "Total_Precipitation"]:
        if var in ds:
            ds_coarse[var] = ds[var].coarsen(
                latitude=factor_lat, longitude=factor_lon, boundary="trim"
            ).mean(skipna=True)

    # Handle "number" only if it's in the dataset AND has lat/lon dims
    if "number" in ds and {"latitude", "longitude"}.issubset(ds["number"].dims):
        ds_coarse["number"] = ds["number"].coarsen(
            latitude=factor_lat, longitude=factor_lon, boundary="trim"
        ).sum(skipna=True)


    # Copy non-spatial coords (if they align)
    for coord in ["Year", "Month", "Day"]:
        if coord in ds:
            ds_coarse[coord] = ds[coord]
    
    # Step 2: Interpolate onto target grid
    ds_final = ds_coarse.interp(
        latitude=lat_target,
        longitude=lon_target,
        method="linear",
        kwargs={"fill_value": "extrapolate"}
    )
    
    # Save output
    out_file = os.path.join(output_folder, f"daily_precipitation_stats_{year_val}_regrid.nc")
    ds_final.to_netcdf(out_file)
    print(f"✅ Saved {out_file}")

print("\n🎉 All yearly files regridded and saved successfully!")


Temperature

Daily Statistics

In [None]:
import xarray as xr
import pandas as pd
import os
import numpy as np
from tqdm import tqdm  # For progress bars

# Define the folder containing NetCDF files
folder_path = r"D:\IPMA\ERA5\Temperature\2conversion_year_1979_2024"
output_folder = r"D:\IPMA\ERA5\Temperature\daily_temperature_stats_yearly"

os.makedirs(output_folder, exist_ok=True)

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Filter only years 2003–2024
file_list = [f for f in file_list if 2003 <= int(f.split('_')[-1][:4]) <= 2024]

print(f"Processing {len(file_list)} files from 2003 to 2024...")

# Loop over files with a progress bar
for file in tqdm(file_list, desc="Files processed"):
    file_path = os.path.join(folder_path, file)
    
    # Extract year from filename
    year_val = int(file.split('_')[-1][:4])
    print(f"\n📂 Processing year {year_val}...")
    
    ds = xr.open_dataset(file_path)  
    ds = ds.chunk({'valid_time': 500})  
    
    # Add year, month, day as coordinates
    ds = ds.assign_coords(
        year=ds['valid_time'].dt.year,
        month=ds['valid_time'].dt.month,
        day=ds['valid_time'].dt.day
    )
    
    # Temporary list for this year
    df_list = []
    
    # Loop over each unique month in this file
    for month_val in np.unique(ds['month'].values):
        days_in_month = np.unique(ds['day'].values[ds['month'].values == month_val])
        
        # Loop over each day in the month with a progress bar
        for day_val in tqdm(days_in_month, desc=f"Month {month_val}", leave=False):
            ds_day = ds.sel(
                valid_time=(ds['month'] == month_val) & (ds['day'] == day_val)
            )
            if ds_day['valid_time'].size == 0:
                continue
            
            temp_data = ds_day['t2m'].values

            # Compute daily statistics
            mean = np.nanmean(temp_data, axis=0)
            median = np.nanmedian(temp_data, axis=0)
            std = np.nanstd(temp_data, axis=0)
            max_ = np.nanmax(temp_data, axis=0)
            min_ = np.nanmin(temp_data, axis=0)

            # Create Dataset for this day's stats
            stats = xr.Dataset({
                'Mean': (['latitude', 'longitude'], mean),
                'Median': (['latitude', 'longitude'], median),
                'Std': (['latitude', 'longitude'], std),
                'Max': (['latitude', 'longitude'], max_),
                'Min': (['latitude', 'longitude'], min_)
            }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})

            # Convert to DataFrame
            stats_df = stats.to_dataframe().reset_index()

            # Add time labels
            stats_df['Year'] = year_val
            stats_df['Month'] = month_val
            stats_df['Day'] = int(day_val)

            # Set multi-index
            stats_df = stats_df.set_index(['Year', 'Month', 'Day', 'latitude', 'longitude'])
            df_list.append(stats_df)
    
    # Save this year's results
    if df_list:  # Only save if data exists
        df_final = pd.concat(df_list)
        df_final_xr = df_final.reset_index().set_index(['Year', 'Month', 'Day', 'latitude', 'longitude'])
        df_final_xr = df_final_xr.to_xarray()
        
        output_file_path = os.path.join(output_folder, f"daily_temperature_stats_{year_val}.nc")
        df_final_xr.to_netcdf(output_file_path)
        print(f"✅ Saved {output_file_path}")

print("\n🎉 All yearly files saved successfully!")


Convert lat and lon from 0.25 to 0.75

In [None]:
import xarray as xr
import numpy as np
import os
from tqdm import tqdm

# Input/output folders
input_folder = r"D:\IPMA\ERA5\Temperature\daily_temperature_stats_yearly"
output_folder = r"D:\IPMA\ERA5\Temperature\daily_temperature_stats_yearly_regridded"
os.makedirs(output_folder, exist_ok=True)

# Get list of input NetCDF files
file_list = sorted([f for f in os.listdir(input_folder) if f.endswith(".nc")])
print(f"Found {len(file_list)} yearly files to regrid...")

# Regrid parameters
factor_lat = 3
factor_lon = 3

# Target grid
lat_target = np.arange(34.5, 66.0 + 0.001, 0.75)   # 43 lats
lon_target = np.arange(-12.0, 36.0 + 0.001, 0.75)  # 65 lons

# Loop through files
for file in tqdm(file_list, desc="Regridding yearly files"):
    file_path = os.path.join(input_folder, file)
    year_val = file.split('_')[-1][:4]  # extract year from filename
    
    # Open dataset
    ds = xr.open_dataset(file_path)
    
    # Step 1: Coarsen (aggregation)
    ds_coarse = xr.Dataset()
    for var in ["Mean", "Median", "Std", "Max", "Min"]:
        if var in ds:
            ds_coarse[var] = ds[var].coarsen(
                latitude=factor_lat, longitude=factor_lon, boundary="trim"
            ).mean(skipna=True)

    # Handle "number" only if it's in the dataset AND has lat/lon dims
    if "number" in ds and {"latitude", "longitude"}.issubset(ds["number"].dims):
        ds_coarse["number"] = ds["number"].coarsen(
            latitude=factor_lat, longitude=factor_lon, boundary="trim"
        ).sum(skipna=True)

    # Copy non-spatial coords (if they align)
    for coord in ["Year", "Month", "Day"]:
        if coord in ds:
            ds_coarse[coord] = ds[coord]
    
    # Step 2: Interpolate onto target grid
    ds_final = ds_coarse.interp(
        latitude=lat_target,
        longitude=lon_target,
        method="linear",
        kwargs={"fill_value": "extrapolate"}
    )
    
    # Save output
    out_file = os.path.join(output_folder, f"daily_temperature_stats_{year_val}_regrid.nc")
    ds_final.to_netcdf(out_file)
    print(f"✅ Saved {out_file}")

print("\n🎉 All yearly temperature files regridded and saved successfully!")


Wind

In [None]:
#Calculate wind speed and direction based on u&v component

import xarray as xr
import numpy as np
import os
from glob import glob

# Define folders
input_folder = r"E:\IPMA\ERA5\UV_wind\1raw_year_1979_2024"
output_folder = r"E:\IPMA\ERA5\UV_wind\2wind_speed_direction"
os.makedirs(output_folder, exist_ok=True)

# Find all relevant NetCDF files
nc_files = sorted(glob(os.path.join(input_folder, "ERA5_hourly_uv_*.nc")))

for file_path in nc_files:
    print(f"Processing {os.path.basename(file_path)}")

    # Open dataset
    ds = xr.open_dataset(file_path)

    # Calculate wind speed
    wind_speed = np.sqrt(ds['u10']**2 + ds['v10']**2)

    # Calculate wind direction (degrees, meteorological convention)
    wind_dir = (180 + np.degrees(np.arctan2(ds['u10'], ds['v10']))) % 360

    # Add to dataset
    ds = ds.assign(wind_speed=wind_speed, wind_direction=wind_dir)

    # Add metadata
    ds['wind_speed'].attrs['units'] = 'm/s'
    ds['wind_speed'].attrs['description'] = '10m wind speed calculated from u10 and v10'
    ds['wind_direction'].attrs['units'] = 'degrees'
    ds['wind_direction'].attrs['description'] = 'Wind direction (from which wind blows, 0°=North, clockwise)'

    # Create output filename, e.g., ERA5_hourly_wind_1979.nc
    year_str = os.path.basename(file_path).split('_')[-1].split('.')[0]
    out_filename = f"ERA5_hourly_wind_{year_str}.nc"
    out_path = os.path.join(output_folder, out_filename)

    # Save only the wind_speed and wind_direction variables (optional)
    ds[['wind_speed', 'wind_direction']].to_netcdf(out_path)

    ds.close()

print("✅ All files processed and saved.")


Daily statistics

In [None]:
import xarray as xr
import pandas as pd
import os
import numpy as np
from tqdm import tqdm  # For progress bars

# Define the folder containing NetCDF files
folder_path = r"D:\IPMA\ERA5\UV_wind\2wind_speed_direction"
output_folder = r"D:\IPMA\ERA5\UV_wind\daily_wind_speed_stats_yearly"

os.makedirs(output_folder, exist_ok=True)

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Filter only years 2003–2024
file_list = [f for f in file_list if 2003 <= int(f.split('_')[-1][:4]) <= 2024]

print(f"Processing {len(file_list)} files from 2003 to 2024...")

# Loop over files with a progress bar
for file in tqdm(file_list, desc="Files processed"):
    file_path = os.path.join(folder_path, file)
    
    # Extract year from filename
    year_val = int(file.split('_')[-1][:4])
    print(f"\n📂 Processing year {year_val}...")
    
    ds = xr.open_dataset(file_path)  
    ds = ds.chunk({'valid_time': 500})  
    
    # Add year, month, day as coordinates
    ds = ds.assign_coords(
        year=ds['valid_time'].dt.year,
        month=ds['valid_time'].dt.month,
        day=ds['valid_time'].dt.day
    )
    
    # Temporary list for this year
    df_list = []
    
    # Loop over each unique month in this file
    for month_val in np.unique(ds['month'].values):
        days_in_month = np.unique(ds['day'].values[ds['month'].values == month_val])
        
        # Loop over each day in the month with a progress bar
        for day_val in tqdm(days_in_month, desc=f"Month {month_val}", leave=False):
            ds_day = ds.sel(
                valid_time=(ds['month'] == month_val) & (ds['day'] == day_val)
            )
            if ds_day['valid_time'].size == 0:
                continue
            
            wind_data = ds_day['wind_speed'].values

            # Compute daily statistics
            mean = np.nanmean(wind_data, axis=0)
            median = np.nanmedian(wind_data, axis=0)
            std = np.nanstd(wind_data, axis=0)
            max_ = np.nanmax(wind_data, axis=0)
            min_ = np.nanmin(wind_data, axis=0)

            # Create Dataset for this day's stats
            stats = xr.Dataset({
                'Mean': (['latitude', 'longitude'], mean),
                'Median': (['latitude', 'longitude'], median),
                'Std': (['latitude', 'longitude'], std),
                'Max': (['latitude', 'longitude'], max_),
                'Min': (['latitude', 'longitude'], min_)
            }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})

            # Convert to DataFrame
            stats_df = stats.to_dataframe().reset_index()

            # Add time labels
            stats_df['Year'] = year_val
            stats_df['Month'] = month_val
            stats_df['Day'] = int(day_val)

            # Set multi-index
            stats_df = stats_df.set_index(['Year', 'Month', 'Day', 'latitude', 'longitude'])
            df_list.append(stats_df)
    
    # Save this year's results
    if df_list:  # Only save if data exists
        df_final = pd.concat(df_list)
        df_final_xr = df_final.reset_index().set_index(['Year', 'Month', 'Day', 'latitude', 'longitude'])
        df_final_xr = df_final_xr.to_xarray()
        
        output_file_path = os.path.join(output_folder, f"daily_wind_speed_stats_{year_val}.nc")
        df_final_xr.to_netcdf(output_file_path)
        print(f"✅ Saved {output_file_path}")

print("\n🎉 All yearly files saved successfully!")


Convert lat and lon from 0.25 to 0.75

In [None]:
import xarray as xr
import numpy as np
import os
from tqdm import tqdm

# Input/output folders
input_folder = r"D:\IPMA\ERA5\UV_wind\daily_wind_speed_stats_yearly"
output_folder = r"D:\IPMA\ERA5\UV_wind\daily_wind_speed_stats_yearly_regridded"
os.makedirs(output_folder, exist_ok=True)

# Get list of input NetCDF files
file_list = sorted([f for f in os.listdir(input_folder) if f.endswith(".nc")])
print(f"Found {len(file_list)} yearly files to regrid...")

# Regrid parameters
factor_lat = 3
factor_lon = 3

# Target grid
lat_target = np.arange(34.5, 66.0 + 0.001, 0.75)   # 43 lats
lon_target = np.arange(-12.0, 36.0 + 0.001, 0.75)  # 65 lons

# Loop through files
for file in tqdm(file_list, desc="Regridding yearly files"):
    file_path = os.path.join(input_folder, file)
    year_val = file.split('_')[-1][:4]  # extract year from filename
    
    # Open dataset
    ds = xr.open_dataset(file_path)
    
    # Step 1: Coarsen (aggregation)
    ds_coarse = xr.Dataset()
    for var in ["Mean", "Median", "Std", "Max", "Min"]:
        if var in ds:
            ds_coarse[var] = ds[var].coarsen(
                latitude=factor_lat, longitude=factor_lon, boundary="trim"
            ).mean(skipna=True)

    # Handle "number" only if it's in the dataset AND has lat/lon dims
    if "number" in ds and {"latitude", "longitude"}.issubset(ds["number"].dims):
        ds_coarse["number"] = ds["number"].coarsen(
            latitude=factor_lat, longitude=factor_lon, boundary="trim"
        ).sum(skipna=True)

    # Copy non-spatial coords (if they align)
    for coord in ["Year", "Month", "Day"]:
        if coord in ds:
            ds_coarse[coord] = ds[coord]
    
    # Step 2: Interpolate onto target grid
    ds_final = ds_coarse.interp(
        latitude=lat_target,
        longitude=lon_target,
        method="linear",
        kwargs={"fill_value": "extrapolate"}
    )
    
    # Save output
    out_file = os.path.join(output_folder, f"daily_wind_speed_stats_{year_val}_regrid.nc")
    ds_final.to_netcdf(out_file)
    print(f"✅ Saved {out_file}")

print("\n🎉 All yearly wind speed files regridded and saved successfully!")
