ERA5 - uv_wind, temp, precip

Wind speed and direction; Temp conver

## Wind/Temp/Precip

### Separates files by year when downloading more than 1 year

In [None]:
#To separate year by year files downloaded with more than 1 year each - DONE for 1979-2024

import xarray as xr
import pandas as pd

# Load the dataset
file_path = r"E:\IPMA\ERA5\UV_wind\ERA5_hourly_uv_2003_1999.nc"
ds = xr.open_dataset(file_path)

# Ensure valid_time is a datetime object
ds['valid_time'] = pd.to_datetime(ds['valid_time'].values)

# Get unique years in the dataset
years = pd.Series(ds['valid_time'].dt.year.values).unique()

# Destination folder to save yearly files
dest_folder = r"E:\IPMA\ERA5\UV_wind"

# Iterate through years
for year in years:
    # Filter dataset for the given year
    yearly_ds = ds.sel(valid_time=ds.valid_time.dt.year == year)

    if yearly_ds.valid_time.size > 0:  # Only save if data exists for the year
        output_filename = rf"{dest_folder}\ERA5_hourly_uv_{year}.nc" #change accordingly to what file is being used
        yearly_ds.to_netcdf(output_filename)
        print(f"Saved {output_filename}")

# Close the dataset
ds.close()


### Separates files by month

In [None]:
# To separate monthly files when downloading the full year - DONE for 1979-2024

import os
import xarray as xr
import pandas as pd

# Directory where all .nc files are located
file_path = r"E:\IPMA\ERA5\UV_wind\1raw_year_1979_2024"

# Destination folder for monthly files
dest_folder = r"E:\IPMA\ERA5\UV_wind\1raw_month_1979_2024"

# Iterate over each file in the directory
for filename in os.listdir(file_path):
    if filename.endswith(".nc"):
        file_full_path = os.path.join(file_path, filename)
        
        # Load the dataset
        ds = xr.open_dataset(file_full_path)

        # Ensure valid_time is a datetime object
        ds['valid_time'] = pd.to_datetime(ds['valid_time'].values)

        # Get unique years in the dataset
        years = pd.Series(ds['valid_time'].dt.year.values).unique()

        # Iterate through years and months
        for year in years:
            for month in range(1, 13):
                # Filter dataset for the given year and month
                monthly_ds = ds.sel(valid_time=(ds.valid_time.dt.year == year) & (ds.valid_time.dt.month == month))

                if monthly_ds.valid_time.size > 0:  # Only save if data exists for the month
                    output_filename = rf"{dest_folder}\ERA5_hourly_uv_{year}{month:02d}.nc" #change accordingly to what file is being used
                    monthly_ds.to_netcdf(output_filename)
                    print(f"Saved {output_filename}")

        # Close the dataset
        ds.close()


## Temperature

In [None]:
# To change Kelvin to Celsius - DONE for temperature

import xarray as xr
import os
import glob

# Define the folder containing the NetCDF files
data_dir = r"E:\IPMA\ERA5\Temperature\1raw_year_1979_2024"
# Define the folder to save the converted files
output_dir = r"E:\IPMA\ERA5\Temperature\2conversion_year_1979_2024"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Get all NetCDF files in the directory
nc_files = glob.glob(os.path.join(data_dir, "*.nc"))

# Loop through each file
for file_path in nc_files:
    print(f"Processing {file_path}...")

    # Open the NetCDF file
    ds = xr.open_dataset(file_path)

    # Check if 't2m' exists and convert it from Kelvin to Celsius
    if "t2m" in ds:
        ds["t2m"] = ds["t2m"] - 273.15  # Convert to Celsius
        ds["t2m"].attrs["units"] = "Celsius"  # Update metadata

        # Define output file path
        output_file_path = os.path.join(output_dir, os.path.basename(file_path))
        
        # Save the updated dataset to the new folder
        ds.to_netcdf(output_file_path, mode="w")

        print(f"Converted 't2m' to Celsius and saved to {output_file_path}")
    else:
        print(f"Skipping {file_path}, 't2m' variable not found.")


## Wind

In [None]:
#Calculate wind speed and direction based on u&v component

import xarray as xr
import numpy as np
import os
from glob import glob

# Define folders
input_folder = r"D:\IPMA\ERA5\UV_wind\1raw_year_1979_2024"
output_folder = r"D:\IPMA\ERA5\UV_wind\2wind_speed_direction"
os.makedirs(output_folder, exist_ok=True)

# Find all relevant NetCDF files
nc_files = sorted(glob(os.path.join(input_folder, "ERA5_hourly_uv_*.nc")))

for file_path in nc_files:
    print(f"Processing {os.path.basename(file_path)}")

    # Open dataset
    ds = xr.open_dataset(file_path)

    # Calculate wind speed
    wind_speed = np.sqrt(ds['u10']**2 + ds['v10']**2)

    # Calculate wind direction (degrees, meteorological convention)
    wind_dir = (180 + np.degrees(np.arctan2(ds['u10'], ds['v10']))) % 360

    # Add to dataset
    ds = ds.assign(wind_speed=wind_speed, wind_direction=wind_dir)

    # Add metadata
    ds['wind_speed'].attrs['units'] = 'm/s'
    ds['wind_speed'].attrs['description'] = '10m wind speed calculated from u10 and v10'
    ds['wind_direction'].attrs['units'] = 'degrees'
    ds['wind_direction'].attrs['description'] = 'Wind direction (from which wind blows, 0°=North, clockwise)'

    # Create output filename, e.g., ERA5_hourly_wind_1979.nc
    year_str = os.path.basename(file_path).split('_')[-1].split('.')[0]
    out_filename = f"ERA5_hourly_wind_{year_str}.nc"
    out_path = os.path.join(output_folder, out_filename)

    # Save only the wind_speed and wind_direction variables (optional)
    ds[['wind_speed', 'wind_direction']].to_netcdf(out_path)

    ds.close()

print("✅ All files processed and saved.")


## SPEI

In [None]:
# To convert .npy to .nc files - RAQUEL SOURCE - DONE

import numpy as np
import xarray as xr
import os

# Folder containing .npy files
input_folder = r"E:\IPMA\SPI\SPI3"
output_folder = r"E:\IPMA\SPI\SPI3\nc"

# Ensure the output directory exists
os.makedirs(output_folder, exist_ok=True)

# Loop through all .npy files in the folder
for filename in os.listdir(input_folder):
    if filename.endswith(".npy"):  # Process only .npy files
        file_path = os.path.join(input_folder, filename)
        data = np.load(file_path)  # Load the 3D array

        # Define dimensions (adjust accordingly)
        dims = ("time", "lat", "lon")  # Change based on data structure
        coords = {
            "time": np.arange(data.shape[0]),  # Modify based on actual data
            "lat": np.linspace(34, 66, data.shape[1]),  # Modify latitudes
            "lon": np.linspace(-12, 36, data.shape[2])  # Modify longitudes
        }

        # Convert to xarray DataArray
        da = xr.DataArray(data, dims=dims, coords=coords, name="spi03")

        # Convert to Dataset
        ds = da.to_dataset(name="spi03")

        # Save as NetCDF file
        output_filename = filename.replace(".npy", ".nc")
        output_path = os.path.join(output_folder, output_filename)
        ds.to_netcdf(output_path)

        print(f"Converted {filename} -> {output_filename}")

print("Batch conversion complete!")


In [None]:
# To separate spei for 1979-2024 - WEB SOURCE - DONE

import pandas as pd
import xarray as xr

def extract_spei_data(input_file, start_year=1979, end_year=2024):
    """
    Extracts data from a NetCDF file for the years between start_year and end_year and saves it to a new NetCDF file.
    
    Args:
        input_file (str): Path to the input NetCDF file.
        start_year (int): Start year for extraction (default 1979).
        end_year (int): End year for extraction (default 2024).
    """
    # Open the NetCDF file using xarray
    ds = xr.open_dataset(input_file)

    # Ensure time is in datetime format (if it's not already in datetime format)
    ds['time'] = pd.to_datetime(ds['time'].values)

    # Filter the data based on the time dimension (between the start and end year)
    filtered_ds = ds.sel(time=slice(f"{start_year}-01-01", f"{end_year}-12-31"))
    
    # Create output filename dynamically based on the input file name and year range
    base_filename = input_file.split('/')[-1].split('.')[0]  # Extract file name (e.g., spei_01)
    output_file = f"{base_filename}_{start_year}-{end_year}.nc"
    
    # Save the filtered dataset to a new NetCDF file
    filtered_ds.to_netcdf(output_file)
    print(f"Saved filtered data to: {output_file}")

# Example usage:
input_file = r"E:\IPMA\SPEIbase_v2-10\SPEI12\spei12.nc"
extract_spei_data(input_file)


In [None]:
# To separate spei for each year between 1979-2024 - WEB SOURCE - DONE

import xarray as xr
import pandas as pd

def extract_spei_data_by_year(input_file, start_year=1979, end_year=2024):
    """
    Extracts data from a NetCDF file for each year between start_year and end_year
    and saves each year as a separate NetCDF file.
    
    Args:
        input_file (str): Path to the input NetCDF file.
        start_year (int): Start year for extraction (default 1979).
        end_year (int): End year for extraction (default 2024).
    """
    # Open the NetCDF file using xarray
    ds = xr.open_dataset(input_file)

    # Ensure time is in datetime format (if it's not already in datetime format)
    ds['time'] = pd.to_datetime(ds['time'].values)

    # Loop through each year and extract the data for that year
    for year in range(start_year, end_year + 1):
        # Filter the dataset for the current year
        filtered_ds = ds.sel(time=slice(f"{year}-01-01", f"{year}-12-31"))
        
        # Create output filename for each year
        base_filename = input_file.split('/')[-1].split('.')[0]  # Extract file name (e.g., spei_01)
        output_file = f"{base_filename}_{year}.nc"
        
        # Save the filtered data for this year to a new NetCDF file
        filtered_ds.to_netcdf(output_file)
        print(f"Saved filtered data for {year} to: {output_file}")

# Example usage:
input_file = r"E:\IPMA\SPEIbase_v2-10\SPEI12\spei12.nc"
extract_spei_data_by_year(input_file)


In [None]:
# To crop to study area - WEB SOURCE - DONE for 1979-2023

import os
import xarray as xr

# Define your input and output directories
input_dir = r"E:\IPMA\SPEIbase_v2-10\SPEI12\1raw_1979_2023"
output_dir = r"E:\IPMA\SPEIbase_v2-10\SPEI12\2cropped_1979_2023"

# Define the latitude and longitude boundaries for your study area
lat_max, lon_min, lat_min, lon_max = 66, -12, 34, 36  # Study area for Europe

# Make sure the output directory exists
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Loop through all NC files in the input directory
for file_name in os.listdir(input_dir):
    if file_name.endswith(".nc"):  # Check if it's a NetCDF file
        input_file_path = os.path.join(input_dir, file_name)

        # Open the NetCDF file using xarray
        with xr.open_dataset(input_file_path) as ds:
            # Crop the dataset to include only the specified region
            ds_europe = ds.sel(lat=slice(lat_min, lat_max), lon=slice(lon_min, lon_max))

            # Create the output file path
            output_file_path = os.path.join(output_dir, file_name)

            # Save the cropped data to a new NetCDF file
            ds_europe.to_netcdf(output_file_path)
            print(f"Saved cropped file: {output_file_path}")
