Precipitation

Convert lat and lon from 0.25 to 0.75

In [22]:
import xarray as xr
import numpy as np

# Open dataset
ds = xr.open_dataset(r"D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_1999_2024.nc")

factor_lat = 3
factor_lon = 3

# Step 1: Coarsen (aggregation)
ds_coarse = xr.Dataset()
for var in ["Mean", "Median", "Std", "Max", "Min"]:
    ds_coarse[var] = ds[var].coarsen(latitude=factor_lat, longitude=factor_lon, boundary="trim").mean()
for var in ["Total_Precipitation", "number"]:
    ds_coarse[var] = ds[var].coarsen(latitude=factor_lat, longitude=factor_lon, boundary="trim").sum()

# Copy non-spatial coords
ds_coarse["Year"] = ds["Year"]
ds_coarse["Month"] = ds["Month"]
ds_coarse["Day"] = ds["Day"]

# Step 2: Define target grid (forced)
lat_target = np.arange(34.5, 66.0 + 0.001, 0.75)   # 43 lats
lon_target = np.arange(-12.0, 36.0 + 0.001, 0.75)  # 65 lons

# Step 3: Interpolate coarsened data onto target grid
ds_final = ds_coarse.interp(latitude=lat_target, longitude=lon_target, method="linear")

# Save
out_path = r"D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_1999_2024_regrid.nc"
ds_final.to_netcdf(out_path)
print("✅ Saved:", out_path)


✅ Saved: D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_1999_2024_regrid.nc


Temperature

Convert lat and lon from 0.25 to 0.75

In [23]:
import xarray as xr
import numpy as np

# Open dataset
ds = xr.open_dataset(r"D:\IPMA\ERA5\Temperature\daily_temperature_stats.nc")

factor_lat = 3
factor_lon = 3

# Step 1: Coarsen (aggregation)
ds_coarse = xr.Dataset()
for var in ["Mean", "Median", "Std", "Max", "Min"]:
    ds_coarse[var] = ds[var].coarsen(latitude=factor_lat, longitude=factor_lon, boundary="trim").mean()
for var in ["number"]:
    ds_coarse[var] = ds[var].coarsen(latitude=factor_lat, longitude=factor_lon, boundary="trim").sum()

# Copy non-spatial coords
ds_coarse["Year"] = ds["Year"]
ds_coarse["Month"] = ds["Month"]
ds_coarse["Day"] = ds["Day"]

# Step 2: Define target grid (forced)
lat_target = np.arange(34.5, 66.0 + 0.001, 0.75)   # 43 lats
lon_target = np.arange(-12.0, 36.0 + 0.001, 0.75)  # 65 lons

# Step 3: Interpolate coarsened data onto target grid
ds_final = ds_coarse.interp(latitude=lat_target, longitude=lon_target, method="linear")

# Save
out_path = r"D:\IPMA\ERA5\Temperature\daily_temperature_stats_regrid.nc"
ds_final.to_netcdf(out_path)
print("✅ Saved:", out_path)


✅ Saved: D:\IPMA\ERA5\Temperature\daily_temperature_stats_regrid.nc


Wind

In [None]:
#Calculate wind speed and direction based on u&v component

import xarray as xr
import numpy as np
import os
from glob import glob

# Define folders
input_folder = r"E:\IPMA\ERA5\UV_wind\1raw_year_1979_2024"
output_folder = r"E:\IPMA\ERA5\UV_wind\2wind_speed_direction"
os.makedirs(output_folder, exist_ok=True)

# Find all relevant NetCDF files
nc_files = sorted(glob(os.path.join(input_folder, "ERA5_hourly_uv_*.nc")))

for file_path in nc_files:
    print(f"Processing {os.path.basename(file_path)}")

    # Open dataset
    ds = xr.open_dataset(file_path)

    # Calculate wind speed
    wind_speed = np.sqrt(ds['u10']**2 + ds['v10']**2)

    # Calculate wind direction (degrees, meteorological convention)
    wind_dir = (180 + np.degrees(np.arctan2(ds['u10'], ds['v10']))) % 360

    # Add to dataset
    ds = ds.assign(wind_speed=wind_speed, wind_direction=wind_dir)

    # Add metadata
    ds['wind_speed'].attrs['units'] = 'm/s'
    ds['wind_speed'].attrs['description'] = '10m wind speed calculated from u10 and v10'
    ds['wind_direction'].attrs['units'] = 'degrees'
    ds['wind_direction'].attrs['description'] = 'Wind direction (from which wind blows, 0°=North, clockwise)'

    # Create output filename, e.g., ERA5_hourly_wind_1979.nc
    year_str = os.path.basename(file_path).split('_')[-1].split('.')[0]
    out_filename = f"ERA5_hourly_wind_{year_str}.nc"
    out_path = os.path.join(output_folder, out_filename)

    # Save only the wind_speed and wind_direction variables (optional)
    ds[['wind_speed', 'wind_direction']].to_netcdf(out_path)

    ds.close()

print("✅ All files processed and saved.")


Daily statistics

In [1]:
import xarray as xr
import pandas as pd
import os
import numpy as np

# Folder with your wind files
folder_path = r"D:\IPMA\ERA5\UV_wind\2wind_speed_direction"

# Get sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize empty list to collect daily DataFrames
df_list = []

# Loop over each file (one year per file)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    print(f"Processing: {file_path}")
    
    # Open file lazily (to avoid memory overload)
    ds = xr.open_dataset(file_path)
    ds = ds.chunk({'valid_time': 500})  # chunk time dimension
    
    # Add year, month, day as coordinates
    ds = ds.assign_coords(
        year=ds['valid_time'].dt.year,
        month=ds['valid_time'].dt.month,
        day=ds['valid_time'].dt.day
    )
    
    # Loop through each unique day in the file
    for day_val in np.unique(ds['day'].values):
        ds_day = ds.sel(valid_time=ds['valid_time'].dt.day == day_val)
        if ds_day['valid_time'].size == 0:
            continue
        
        wind_data = ds_day['wind_speed'].values  # (time_in_day, lat, lon)

        # Daily statistics
        mean = np.nanmean(wind_data, axis=0)
        median = np.nanmedian(wind_data, axis=0)
        std = np.nanstd(wind_data, axis=0)
        max_ = np.nanmax(wind_data, axis=0)
        min_ = np.nanmin(wind_data, axis=0)

        # Create Dataset for this day's stats
        stats = xr.Dataset({
            'Mean': (['latitude', 'longitude'], mean),
            'Median': (['latitude', 'longitude'], median),
            'Std': (['latitude', 'longitude'], std),
            'Max': (['latitude', 'longitude'], max_),
            'Min': (['latitude', 'longitude'], min_),
        }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})

        # Convert to DataFrame
        stats_df = stats.to_dataframe().reset_index()

        # Add date info
        year_val = int(ds_day['year'].values[0])
        month_val = int(ds_day['month'].values[0])
        stats_df['Year'] = year_val
        stats_df['Month'] = month_val
        stats_df['Day'] = int(day_val)

        # Set multi-index
        stats_df = stats_df.set_index(['Year', 'Month', 'Day', 'latitude', 'longitude'])

        df_list.append(stats_df)

# Concatenate all daily results
df_final = pd.concat(df_list)

# Convert the DataFrame back to an xarray Dataset
df_final_xr = df_final.reset_index().set_index(['Year', 'Month', 'Day', 'latitude', 'longitude'])
df_final_xr = df_final_xr.to_xarray()

# Save to NetCDF
output_file_path = r"D:\IPMA\ERA5\UV_wind\daily_wind_speed_stats.nc"
df_final_xr.to_netcdf(output_file_path)

print(f"✅ Daily wind speed dataset saved to: {output_file_path}")


Processing: D:\IPMA\ERA5\UV_wind\2wind_speed_direction\ERA5_hourly_wind_2000.nc
Processing: D:\IPMA\ERA5\UV_wind\2wind_speed_direction\ERA5_hourly_wind_2001.nc
Processing: D:\IPMA\ERA5\UV_wind\2wind_speed_direction\ERA5_hourly_wind_2002.nc
Processing: D:\IPMA\ERA5\UV_wind\2wind_speed_direction\ERA5_hourly_wind_2003.nc
Processing: D:\IPMA\ERA5\UV_wind\2wind_speed_direction\ERA5_hourly_wind_2004.nc
Processing: D:\IPMA\ERA5\UV_wind\2wind_speed_direction\ERA5_hourly_wind_2005.nc
Processing: D:\IPMA\ERA5\UV_wind\2wind_speed_direction\ERA5_hourly_wind_2006.nc
Processing: D:\IPMA\ERA5\UV_wind\2wind_speed_direction\ERA5_hourly_wind_2007.nc
Processing: D:\IPMA\ERA5\UV_wind\2wind_speed_direction\ERA5_hourly_wind_2008.nc
Processing: D:\IPMA\ERA5\UV_wind\2wind_speed_direction\ERA5_hourly_wind_2009.nc
Processing: D:\IPMA\ERA5\UV_wind\2wind_speed_direction\ERA5_hourly_wind_2010.nc
Processing: D:\IPMA\ERA5\UV_wind\2wind_speed_direction\ERA5_hourly_wind_2011.nc
Processing: D:\IPMA\ERA5\UV_wind\2wind_s

Convert lat and lon from 0.25 to 0.75

In [24]:
import xarray as xr
import numpy as np

# Open dataset
ds = xr.open_dataset(r"D:\IPMA\ERA5\UV_wind\daily_wind_speed_stats.nc")

factor_lat = 3
factor_lon = 3

# Step 1: Coarsen (aggregation)
ds_coarse = xr.Dataset()
for var in ["Mean", "Median", "Std", "Max", "Min"]:
    ds_coarse[var] = ds[var].coarsen(latitude=factor_lat, longitude=factor_lon, boundary="trim").mean()
for var in ["number"]:
    ds_coarse[var] = ds[var].coarsen(latitude=factor_lat, longitude=factor_lon, boundary="trim").sum()

# Copy non-spatial coords
ds_coarse["Year"] = ds["Year"]
ds_coarse["Month"] = ds["Month"]
ds_coarse["Day"] = ds["Day"]

# Step 2: Define target grid (forced)
lat_target = np.arange(34.5, 66.0 + 0.001, 0.75)   # 43 lats
lon_target = np.arange(-12.0, 36.0 + 0.001, 0.75)  # 65 lons

# Step 3: Interpolate coarsened data onto target grid
ds_final = ds_coarse.interp(latitude=lat_target, longitude=lon_target, method="linear")

# Save
out_path = r"D:\IPMA\ERA5\UV_wind\daily_wind_speed_stats_regrid.nc"
ds_final.to_netcdf(out_path)
print("✅ Saved:", out_path)


✅ Saved: D:\IPMA\ERA5\UV_wind\daily_wind_speed_stats_regrid.nc
