Main pollutants PM10 & PM2.5, NO2, CO2, O3


CAMS global reanalysis EAC4 - Data Preparation (according to what Virgilio gave in matlab scripts)

### Single level

#### Separates files by month

In [None]:
# To separate monthly files when downloading the full year - DONE for 2023-2024 nov

import xarray as xr
import pandas as pd

# Load the dataset
file_path = r"E:\IPMA\CAMS\chem_singlvl\0raw_2023_2024\CAMS_global_reanalysis_EAC4_chem_singlvl_2024.nc"
ds = xr.open_dataset(file_path)

# Ensure valid_time is a datetime object
ds['valid_time'] = pd.to_datetime(ds['valid_time'].values)

# Get unique years in the dataset
years = pd.Series(ds['valid_time'].dt.year.values).unique()

# Iterate through years and months
dest_folder = r"E:\IPMA\CAMS\chem_singlvl\0raw_2023_2024"
for year in years:
    for month in range(1, 13):
        # Filter dataset for the given year and month
        monthly_ds = ds.sel(valid_time=(ds.valid_time.dt.year == year) & (ds.valid_time.dt.month == month))

        if monthly_ds.valid_time.size > 0:  # Only save if data exists for the month
            output_filename = rf"{dest_folder}\CAMS_global_reanalysis_EAC4_chem_singlvl_{year}{month:02d}.nc"
            monthly_ds.to_netcdf(output_filename)
            print(f"Saved {output_filename}")

# Close the dataset
ds.close()

#### Changes name variable

In [None]:
# To change valid_time to time (so it matches) - DONE for 2023-2024 again

import xarray as xr
import os
import glob

# Path to your directory containing the NetCDF files
input_dir = r"D:\IPMA\CAMS\chem_singlvl\0raw_2023_2024"
output_dir = r"D:\IPMA\CAMS\chem_singlvl\1raw_2003_2024"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Get all NetCDF files in the input directory
nc_files = glob.glob(os.path.join(input_dir, "*.nc"))

# Loop through each file
for file_path in nc_files:
    print(f"Processing {file_path}...")

    # Open the NetCDF file
    ds = xr.open_dataset(file_path)

    # Check if 'valid_time' exists and rename it to 'time'
    if 'valid_time' in ds:
        ds = ds.rename({"valid_time": "time"})

    # Get the filename and create the output path in the new directory
    output_file = os.path.join(output_dir, os.path.basename(file_path))

    # Save the updated dataset to the output directory (overwrite original file in the new folder)
    ds.to_netcdf(output_file)

    print(f"Renamed 'valid_time' to 'time' and saved to {output_file}")

print("Renaming and saving to new folder complete!")


#### Converts coordinates

In [None]:
# To convert coords from 0 to 360 to -180 to 180 - DONE for 2003-2024 again

import os
import xarray as xr
import numpy as np

# Define input and output folders
input_folder = r"D:\IPMA\CAMS\chem_singlvl\1raw_2003_2024"
output_folder = r"D:\IPMA\CAMS\chem_singlvl\2transformation_2003_2024"

# Ensure output folder exists
os.makedirs(output_folder, exist_ok=True)

# Get list of NetCDF files in the input folder
nc_files = [f for f in os.listdir(input_folder) if f.endswith(".nc")]

# Process each file
for nc_file in nc_files:
    input_path = os.path.join(input_folder, nc_file)
    output_path = os.path.join(output_folder, nc_file)

    print(f"Processing: {nc_file}")

    # Open the dataset
    ds = xr.open_dataset(input_path)

    # Extract longitude and latitude
    lon = ds['longitude'].values  # (480,)
    lat = ds['latitude'].values   # (241,)

    # Convert longitude from 0-360 to -180 to 180
    lon2 = (lon + 180) % 360 - 180

    # Swap the first and second halves
    lon3 = np.copy(lon2)
    lon3[:240] = lon2[240:480]
    lon3[240:480] = lon2[:240]

    # Create a meshgrid (not strictly needed for saving, but useful)
    LON, LAT = np.meshgrid(lon3, lat)

    # Apply the same transformation to pm10, pm1, and pm2p5
    for var in ["pm10", "pm1", "pm2p5"]:
        if var in ds:
            data = ds[var].values  # Shape: (time, lat, lon)
            transformed_data = np.copy(data)

            # Swap the longitude axis (last axis)
            transformed_data[:, :, :240] = data[:, :, 240:480]
            transformed_data[:, :, 240:480] = data[:, :, :240]

            # Replace the dataset variable with the corrected data
            ds[var].values = transformed_data

    # Update longitude in the dataset
    ds = ds.assign_coords(longitude=lon3)

    # Save the modified dataset
    ds.to_netcdf(output_path)

    print(f"Saved transformed file: {output_path}")



#### Crop to Study Area

In [None]:
# To crop to study area - DONE for 2003-2024 again

import xarray as xr
import os
import glob

# Define your study area (lat_max, lon_min, lat_min, lon_max)
lat_max, lon_min, lat_min, lon_max = 66, -12, 34, 36

# Input and output directories
input_dir = r"D:\IPMA\CAMS\chem_singlvl\2transformation_2003_2024"
output_dir = r"D:\IPMA\CAMS\chem_singlvl\3cropped_2003_2024"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Get all NetCDF files in the input directory
nc_files = glob.glob(os.path.join(input_dir, "*.nc"))

for file_path in nc_files:
    print(f"Processing {file_path}...")

    # Open the NetCDF file
    ds = xr.open_dataset(file_path)

    # Ensure latitude slicing is correct (since it decreases from 90 to -90)
    ds_cropped = ds.sel(
        latitude=slice(lat_max, lat_min),  # lat_max is greater than lat_min
        longitude=slice(lon_min, lon_max)  # lon_min is less than lon_max
    )

    # Define output file path
    output_file = os.path.join(output_dir, os.path.basename(file_path))

    # Save the cropped dataset
    ds_cropped.to_netcdf(output_file)
    print(f"Saved cropped file to {output_file}")

print("Processing complete!")

#### Converts units

In [None]:
# To convert PM from kg/m3 to ug/m3 - DONE again

import xarray as xr
import glob
import os

# Define input and output directories
input_folder = r"D:\IPMA\CAMS\chem_singlvl\3cropped_2003_2024"
output_folder = r"D:\IPMA\CAMS\chem_singlvl\4conversion_2003_2024"

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# List of NetCDF files
nc_files = glob.glob(os.path.join(input_folder, "*.nc"))

for nc_file in nc_files:
    try:
        # Open the NetCDF file
        ds = xr.open_dataset(nc_file)

        # Convert PM values to µg/m³
        ds["pm1"] = ds["pm1"] * 1e9
        ds["pm2p5"] = ds["pm2p5"] * 1e9
        ds["pm10"] = ds["pm10"] * 1e9

        # Get the filename and construct output path
        filename = os.path.basename(nc_file)  # Extracts filename only
        output_file = os.path.join(output_folder, filename)  # Keeps same filename in new folder

        # Save the modified NetCDF file in the new folder
        ds.to_netcdf(output_file)
        ds.close()  # Close the dataset

    except Exception as e:
        print(f"Error processing {nc_file}: {e}")


#### Puts together files

In [None]:
# To put together files for each year - DONE for 2003-2024 (final step) again

import xarray as xr
import glob
import os

# Define the original path where the files are located
input_path = r"D:\IPMA\CAMS\chem_singlvl\4conversion_2003_2024"

# Define the new path where you want to save the compiled files
output_path_base = r"D:\IPMA\CAMS\chem_singlvl\5compile_2003_2024"

# Make sure the output directory exists
os.makedirs(output_path_base, exist_ok=True)

# Loop over each year from 2003 to 2024
for year in range(2003, 2025):
    # Create the file pattern for the specific year (e.g., 2003*)
    file_pattern = os.path.join(input_path, f"CAMS_global_reanalysis_EAC4_chem_singlvl_{year}*.nc")
    
    # Use glob to find all .nc files for the specified year
    files = glob.glob(file_pattern)
    
    if files:  # Only proceed if there are files for that year
        # Open all files for the year and concatenate along the 'time' dimension
        ds = xr.open_mfdataset(files, combine='by_coords')

        # Save the compiled dataset to the new path
        output_path = os.path.join(output_path_base, f"CAMS_global_reanalysis_EAC4_chem_singlvl_{year}.nc")
        ds.to_netcdf(output_path)

        print(f"Files for {year} have been compiled into: {output_path}")
    else:
        print(f"No files found for {year}.")



### Multi level

#### Separates files by month

In [None]:
# To separate monthly files when downloading the full year - DONE for 2023-2024 nov

import xarray as xr
import pandas as pd

# Load the dataset
file_path = r"E:\IPMA\CAMS\chem_multlvl\0raw_2023_2024\CAMS_global_reanalysis_EAC4_chem_multlvl_2024.nc"
ds = xr.open_dataset(file_path)

# Ensure valid_time is a datetime object
ds['valid_time'] = pd.to_datetime(ds['valid_time'].values)

# Get unique years in the dataset
years = pd.Series(ds['valid_time'].dt.year.values).unique()

# Iterate through years and months
dest_folder = r"E:\IPMA\CAMS\chem_multlvl\0raw_2023_2024"
for year in years:
    for month in range(1, 13):
        # Filter dataset for the given year and month
        monthly_ds = ds.sel(valid_time=(ds.valid_time.dt.year == year) & (ds.valid_time.dt.month == month))

        if monthly_ds.valid_time.size > 0:  # Only save if data exists for the month
            output_filename = rf"{dest_folder}\CAMS_global_reanalysis_EAC4_chem_multlvl_{year}{month:02d}.nc"
            monthly_ds.to_netcdf(output_filename)
            print(f"Saved {output_filename}")

# Close the dataset
ds.close()

#### Changes name variable

In [11]:
# To change valid_time to time + remove pressure_level if present - DONE for 2023–2024 files again

import xarray as xr
import os
import glob

# Paths
input_dir = r"D:\IPMA\CAMS\chem_multlvl\0raw_2023_2024"
output_dir = r"D:\IPMA\CAMS\chem_multlvl\1raw_2003_2024"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Get all NetCDF files in the input directory
nc_files = glob.glob(os.path.join(input_dir, "*.nc"))

# Process each file
for file_path in nc_files:
    print(f"Processing {file_path}...")

    # Open dataset
    ds = xr.open_dataset(file_path)

    # Rename 'valid_time' to 'time' if needed
    if 'valid_time' in ds:
        ds = ds.rename({"valid_time": "time"})

    # Handle pressure_level if it's a dimension of size 1
    if "pressure_level" in ds.dims and ds.dims["pressure_level"] == 1:
        ds = ds.isel(pressure_level=0).squeeze(drop=True)

    # Drop pressure_level variable if it's still around (not a dimension)
    if "pressure_level" in ds.variables and "pressure_level" not in ds.dims:
        ds = ds.drop_vars("pressure_level")

    # Save cleaned dataset
    output_file = os.path.join(output_dir, os.path.basename(file_path))
    ds.to_netcdf(output_file)

    print(f"Saved cleaned file to: {output_file}")

print("✅ All files processed!")


Processing D:\IPMA\CAMS\chem_multlvl\0raw_2023_2024\CAMS_global_reanalysis_EAC4_chem_multlvl_202411.nc...


  if "pressure_level" in ds.dims and ds.dims["pressure_level"] == 1:


Saved cleaned file to: D:\IPMA\CAMS\chem_multlvl\1raw_2003_2024\CAMS_global_reanalysis_EAC4_chem_multlvl_202411.nc
Processing D:\IPMA\CAMS\chem_multlvl\0raw_2023_2024\CAMS_global_reanalysis_EAC4_chem_multlvl_202412.nc...


  if "pressure_level" in ds.dims and ds.dims["pressure_level"] == 1:


Saved cleaned file to: D:\IPMA\CAMS\chem_multlvl\1raw_2003_2024\CAMS_global_reanalysis_EAC4_chem_multlvl_202412.nc
✅ All files processed!


#### Converts coordinates

In [12]:
# To convert coords from 0 to 360 to -180 to 180 - DONE for 2003-2024 again

import os
import xarray as xr
import numpy as np

# Define input and output folders
input_folder = r"D:\IPMA\CAMS\chem_multlvl\1raw_2003_2024"
output_folder = r"D:\IPMA\CAMS\chem_multlvl\2transformation_2003_2024"

# Ensure output folder exists
os.makedirs(output_folder, exist_ok=True)

# Get list of NetCDF files in the input folder
nc_files = [f for f in os.listdir(input_folder) if f.endswith(".nc")]

# Process each file
for nc_file in nc_files:
    input_path = os.path.join(input_folder, nc_file)
    output_path = os.path.join(output_folder, nc_file)

    print(f"Processing: {nc_file}")

    # Open the dataset
    ds = xr.open_dataset(input_path)

    # Extract longitude and latitude
    lon = ds['longitude'].values  # (480,)
    lat = ds['latitude'].values   # (241,)

    # Convert longitude from 0-360 to -180 to 180
    lon2 = (lon + 180) % 360 - 180

    # Swap the first and second halves
    lon3 = np.copy(lon2)
    lon3[:240] = lon2[240:480]
    lon3[240:480] = lon2[:240]

    # Create a meshgrid (not strictly needed for saving, but useful)
    LON, LAT = np.meshgrid(lon3, lat)

    # Apply the same transformation to co, no2, and go3
    for var in ["co", "no2", "no", "go3"]:
        if var in ds:
            data = ds[var].values  # Shape: (time, lat, lon)
            transformed_data = np.copy(data)

            # Swap the longitude axis (last axis)
            transformed_data[:, :, :240] = data[:, :, 240:480]
            transformed_data[:, :, 240:480] = data[:, :, :240]

            # Replace the dataset variable with the corrected data
            ds[var].values = transformed_data

    # Update longitude in the dataset
    ds = ds.assign_coords(longitude=lon3)

    # Save the modified dataset
    ds.to_netcdf(output_path)

    print(f"Saved transformed file: {output_path}")



Processing: CAMS_global_reanalysis_EAC4_chem_multlvl_202411.nc
Saved transformed file: D:\IPMA\CAMS\chem_multlvl\2transformation_2003_2024\CAMS_global_reanalysis_EAC4_chem_multlvl_202411.nc
Processing: CAMS_global_reanalysis_EAC4_chem_multlvl_202412.nc
Saved transformed file: D:\IPMA\CAMS\chem_multlvl\2transformation_2003_2024\CAMS_global_reanalysis_EAC4_chem_multlvl_202412.nc


#### Crop to study area

In [13]:
# To crop to study area - DONE for 2003-2024 again

import xarray as xr
import os
import glob

# Define your study area (lat_max, lon_min, lat_min, lon_max)
lat_max, lon_min, lat_min, lon_max = 66, -12, 34, 36

# Input and output directories
input_dir = r"D:\IPMA\CAMS\chem_multlvl\2transformation_2003_2024"
output_dir = r"D:\IPMA\CAMS\chem_multlvl\3cropped_2003_2024"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Get all NetCDF files in the input directory
nc_files = glob.glob(os.path.join(input_dir, "*.nc"))

for file_path in nc_files:
    print(f"Processing {file_path}...")

    # Open the NetCDF file
    ds = xr.open_dataset(file_path)

    # Ensure latitude slicing is correct (since it decreases from 90 to -90)
    ds_cropped = ds.sel(
        latitude=slice(lat_max, lat_min),  # lat_max is greater than lat_min
        longitude=slice(lon_min, lon_max)  # lon_min is less than lon_max
    )

    # Define output file path
    output_file = os.path.join(output_dir, os.path.basename(file_path))

    # Save the cropped dataset
    ds_cropped.to_netcdf(output_file)
    print(f"Saved cropped file to {output_file}")

print("Processing complete!")

Processing D:\IPMA\CAMS\chem_multlvl\2transformation_2003_2024\CAMS_global_reanalysis_EAC4_chem_multlvl_202411.nc...
Saved cropped file to D:\IPMA\CAMS\chem_multlvl\3cropped_2003_2024\CAMS_global_reanalysis_EAC4_chem_multlvl_202411.nc
Processing D:\IPMA\CAMS\chem_multlvl\2transformation_2003_2024\CAMS_global_reanalysis_EAC4_chem_multlvl_202412.nc...
Saved cropped file to D:\IPMA\CAMS\chem_multlvl\3cropped_2003_2024\CAMS_global_reanalysis_EAC4_chem_multlvl_202412.nc
Processing complete!


#### Converts units

In [14]:
# To convert from kg/kg to kg/m3 to ug/m3 (co in mg/m3) again

import os
import xarray as xr
import numpy as np

# Paths
chem_folder = r"D:\IPMA\CAMS\chem_multlvl\3cropped_2003_2024"
temp_folder = r"D:\IPMA\CAMS\meteo_multlvl\3cropped_2003_2024"
output_folder = r"D:\IPMA\CAMS\chem_multlvl\4conversion_2003_2024"
os.makedirs(output_folder, exist_ok=True)

# Constants
pressure = 1e5  # Pa equal to 1000 hPa
R = 287.0500676  # J/(kg·K)

# Loop through chemistry files
for file in os.listdir(chem_folder):
    if file.endswith('.nc'):
        chem_path = os.path.join(chem_folder, file)

        # Replace 'chem_multlvl' with 'meteo_multlvl' to find the corresponding temp file
        temp_filename = file.replace('chem_multlvl', 'meteo_multlvl')
        temp_path = os.path.join(temp_folder, temp_filename)

        if not os.path.exists(temp_path):
            print(f"⚠ Temperature file not found for {file}")
            continue

        # Open datasets
        ds_chem = xr.open_dataset(chem_path)
        ds_temp = xr.open_dataset(temp_path)

        # Ensure 't' exists in temperature file
        if 't' not in ds_temp.variables:
            print(f"⚠ No temperature variable 't' in {temp_filename}")
            continue

        # Align time and space
        ds_chem, ds_temp = xr.align(ds_chem, ds_temp, join='inner')
        t = ds_temp['t']

        # STEP 1: kg/kg → kg/m³
        co_kgm3 = ds_chem['co'] * pressure / (R * t)
        no2_kgm3 = ds_chem['no2'] * pressure / (R * t)
        no_kgm3 = ds_chem['no'] * pressure / (R * t)
        go3_kgm3 = ds_chem['go3'] * pressure / (R * t)

        # STEP 2: kg/m³ → final units
        co_final = co_kgm3 * 1e6    # mg/m³
        no2_final = no2_kgm3 * 1e9  # µg/m³
        no_final = no_kgm3 * 1e9    # µg/m³
        go3_final = go3_kgm3 * 1e9  # µg/m³

        # Create new dataset
        new_ds = xr.Dataset({
            'co': co_final,
            'no2': no2_final,
            'no': no_final,
            'go3': go3_final
        })

        # Assign coordinates and attributes
        for coord in ds_chem.coords:
            new_ds = new_ds.assign_coords({coord: ds_chem[coord]})
        new_ds.attrs = ds_chem.attrs

        # Optional: add units metadata
        new_ds['co'].attrs['units'] = 'mg m-3'
        new_ds['no2'].attrs['units'] = 'µg m-3'
        new_ds['no'].attrs['units'] = 'µg m-3'
        new_ds['go3'].attrs['units'] = 'µg m-3'

        # Save output
        output_path = os.path.join(output_folder, f"{file}")
        new_ds.to_netcdf(output_path)

        print(f"✔ Converted and saved: {output_path}")

✔ Converted and saved: D:\IPMA\CAMS\chem_multlvl\4conversion_2003_2024\CAMS_global_reanalysis_EAC4_chem_multlvl_202411.nc
✔ Converted and saved: D:\IPMA\CAMS\chem_multlvl\4conversion_2003_2024\CAMS_global_reanalysis_EAC4_chem_multlvl_202412.nc


#### Puts together files

In [16]:
# To put together files for each year - DONE for 2003-2024 (final step) again

import xarray as xr
import glob
import os

# Define the original path where the files are located
input_path = r"D:\IPMA\CAMS\chem_multlvl\4conversion_2003_2024"

# Define the new path where you want to save the compiled files
output_path_base = r"D:\IPMA\CAMS\chem_multlvl\5compile_2003_2024"

# Make sure the output directory exists
os.makedirs(output_path_base, exist_ok=True)

# Loop over each year from 2003 to 2024
for year in range(2003, 2025):
    # Create the file pattern for the specific year (e.g., 2003*)
    file_pattern = os.path.join(input_path, f"CAMS_global_reanalysis_EAC4_chem_multlvl_{year}*.nc")
    
    # Use glob to find all .nc files for the specified year
    files = glob.glob(file_pattern)
    
    if files:  # Only proceed if there are files for that year
        # Open all files for the year and concatenate along the 'time' dimension
        ds = xr.open_mfdataset(files, combine='by_coords')

        # Save the compiled dataset to the new path
        output_path = os.path.join(output_path_base, f"CAMS_global_reanalysis_EAC4_chem_multlvl_{year}.nc")
        ds.to_netcdf(output_path)

        print(f"Files for {year} have been compiled into: {output_path}")
    else:
        print(f"No files found for {year}.")



No files found for 2003.
No files found for 2004.
No files found for 2005.
No files found for 2006.
No files found for 2007.
No files found for 2008.
No files found for 2009.
No files found for 2010.
No files found for 2011.
No files found for 2012.
No files found for 2013.
No files found for 2014.
No files found for 2015.
No files found for 2016.
No files found for 2017.
No files found for 2018.
No files found for 2019.
No files found for 2020.
No files found for 2021.
No files found for 2022.
No files found for 2023.
Files for 2024 have been compiled into: D:\IPMA\CAMS\chem_multlvl\5compile_2003_2024\CAMS_global_reanalysis_EAC4_chem_multlvl_2024.nc


### Meteo multlvl - INTERMEDIATE STEP BEFORE RUNNING CONVERTION OF MULTLVL

#### Separates files by month

In [None]:
# To separate monthly files when downloading the full year - DONE for 2023-2024 nov

import xarray as xr
import pandas as pd

# Load the dataset
file_path = r"E:\IPMA\CAMS\meteo_multlvl\0raw_2023_2024\CAMS_global_reanalysis_EAC4_meteo_multlvl_2024.nc"
ds = xr.open_dataset(file_path)

# Ensure valid_time is a datetime object
ds['valid_time'] = pd.to_datetime(ds['valid_time'].values)

# Get unique years in the dataset
years = pd.Series(ds['valid_time'].dt.year.values).unique()

# Iterate through years and months
dest_folder = r"E:\IPMA\CAMS\meteo_multlvl\0raw_2023_2024"
for year in years:
    for month in range(1, 13):
        # Filter dataset for the given year and month
        monthly_ds = ds.sel(valid_time=(ds.valid_time.dt.year == year) & (ds.valid_time.dt.month == month))

        if monthly_ds.valid_time.size > 0:  # Only save if data exists for the month
            output_filename = rf"{dest_folder}\CAMS_global_reanalysis_EAC4_meteo_multlvl_{year}{month:02d}.nc"
            monthly_ds.to_netcdf(output_filename)
            print(f"Saved {output_filename}")

# Close the dataset
ds.close()

#### Changes name variable

In [None]:
# To change valid_time to time + remove pressure_level if present - DONE for 2023–2024 files again

import xarray as xr
import os
import glob

# Paths
input_dir = r"D:\IPMA\CAMS\meteo_multlvl\0raw_2023_2024"
output_dir = r"D:\IPMA\CAMS\meteo_multlvl\1raw_2003_2024"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Get all NetCDF files in the input directory
nc_files = glob.glob(os.path.join(input_dir, "*.nc"))

# Process each file
for file_path in nc_files:
    print(f"Processing {file_path}...")

    # Open dataset
    ds = xr.open_dataset(file_path)

    # Rename 'valid_time' to 'time' if needed
    if 'valid_time' in ds:
        ds = ds.rename({"valid_time": "time"})

    # Handle pressure_level if it's a dimension of size 1
    if "pressure_level" in ds.dims and ds.dims["pressure_level"] == 1:
        ds = ds.isel(pressure_level=0).squeeze(drop=True)

    # Drop pressure_level variable if it's still around (not a dimension)
    if "pressure_level" in ds.variables and "pressure_level" not in ds.dims:
        ds = ds.drop_vars("pressure_level")

    # Save cleaned dataset
    output_file = os.path.join(output_dir, os.path.basename(file_path))
    ds.to_netcdf(output_file)

    print(f"Saved cleaned file to: {output_file}")

print("✅ All files processed!")


#### Converts coordinates

In [None]:
# To convert coords from 0 to 360 to -180 to 180 - DONE for 2003-2024 again

import os
import xarray as xr
import numpy as np

# Define input and output folders
input_folder = r"D:\IPMA\CAMS\meteo_multlvl\1raw_2003_2024"
output_folder = r"D:\IPMA\CAMS\meteo_multlvl\2transformation_2003_2024"

# Ensure output folder exists
os.makedirs(output_folder, exist_ok=True)

# Get list of NetCDF files in the input folder
nc_files = [f for f in os.listdir(input_folder) if f.endswith(".nc")]

# Process each file
for nc_file in nc_files:
    input_path = os.path.join(input_folder, nc_file)
    output_path = os.path.join(output_folder, nc_file)

    print(f"Processing: {nc_file}")

    # Open the dataset
    ds = xr.open_dataset(input_path)

    # Extract longitude and latitude
    lon = ds['longitude'].values  # (480,)
    lat = ds['latitude'].values   # (241,)

    # Convert longitude from 0-360 to -180 to 180
    lon2 = (lon + 180) % 360 - 180

    # Swap the first and second halves
    lon3 = np.copy(lon2)
    lon3[:240] = lon2[240:480]
    lon3[240:480] = lon2[:240]

    # Create a meshgrid (not strictly needed for saving, but useful)
    LON, LAT = np.meshgrid(lon3, lat)

    # Apply the same transformation to co, no2, and go3
    for var in ["t", "q"]:
        if var in ds:
            data = ds[var].values  # Shape: (time, lat, lon)
            transformed_data = np.copy(data)

            # Swap the longitude axis (last axis)
            transformed_data[:, :, :240] = data[:, :, 240:480]
            transformed_data[:, :, 240:480] = data[:, :, :240]

            # Replace the dataset variable with the corrected data
            ds[var].values = transformed_data

    # Update longitude in the dataset
    ds = ds.assign_coords(longitude=lon3)

    # Save the modified dataset
    ds.to_netcdf(output_path)

    print(f"Saved transformed file: {output_path}")



#### Crop to study area

In [None]:
# To crop to study area - DONE for 2003-2024 again

import xarray as xr
import os
import glob

# Define your study area (lat_max, lon_min, lat_min, lon_max)
lat_max, lon_min, lat_min, lon_max = 66, -12, 34, 36

# Input and output directories
input_dir = r"D:\IPMA\CAMS\meteo_multlvl\2transformation_2003_2024"
output_dir = r"D:\IPMA\CAMS\meteo_multlvl\3cropped_2003_2024"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Get all NetCDF files in the input directory
nc_files = glob.glob(os.path.join(input_dir, "*.nc"))

for file_path in nc_files:
    print(f"Processing {file_path}...")

    # Open the NetCDF file
    ds = xr.open_dataset(file_path)

    # Ensure latitude slicing is correct (since it decreases from 90 to -90)
    ds_cropped = ds.sel(
        latitude=slice(lat_max, lat_min),  # lat_max is greater than lat_min
        longitude=slice(lon_min, lon_max)  # lon_min is less than lon_max
    )

    # Define output file path
    output_file = os.path.join(output_dir, os.path.basename(file_path))

    # Save the cropped dataset
    ds_cropped.to_netcdf(output_file)
    print(f"Saved cropped file to {output_file}")

print("Processing complete!")