BI Goals:

i) investigar as ligações entre a atividade do fogo, conforme medido pelo FRP (Fire Radiative Power), e as concentrações de poluentes e avaliar a zona espacial e temporal de influência da atividade dos incêndios florestais.

(ii) investigar a utilização de FRP como ferramenta para filtrar a contribuição do fumo de biomassa para os registos de poluição atmosférica em bacias atmosféricas urbanas, nomeadamente as emissões de carbono resultantes de incêndios florestais graves.

(iii) desenvolver abordagens multirriscos para caracterizar o comportamento conjunto de múltiplos perigos e riscos consequentes e avaliar o papel desempenhado por condições anteriores e simultâneas de seca e/ou calor na exacerbação de incêndios rurais e consequentes ondas de fumo

Combine datasets (labeled dataset that contains info about labels and FRP & pollutants statistics) by day and pixel

In [21]:
import xarray as xr
import pandas as pd
import numpy as np
import os

# ==== User settings ====
pollutant_name = "pm2p5"  # <-- change this to your pollutant (e.g., "ozone", "no2")
country = "Greece"        # <-- change this to your region/country

# ==== File paths ====
pollutant_path = fr"D:\IPMA\CAMS\chem_singlvl\daily_{pollutant_name}_stats.nc"
fire_path = fr"D:\IPMA\FRP\fire_labels_by_region\fire_data_{country}.nc"
mask_path = fr"D:\IPMA\Countries\{country}_mask.nc"
output_path = fr"D:\IPMA\Results\{pollutant_name}_fire_meteo_{country}.nc"

# Meteorological folders
precip_path = r"D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_yearly_regridded"
temp_path = r"D:\IPMA\ERA5\Temperature\daily_temperature_stats_yearly_regridded"
wind_path = r"D:\IPMA\ERA5\UV_wind\daily_wind_speed_stats_yearly_regridded"

# ==== Load datasets ====
print("📂 Loading pollutant, fire, and mask datasets...")
ds_pollutant = xr.open_dataset(pollutant_path)
ds_fire = xr.open_dataset(fire_path)
mask_ds = xr.open_dataset(mask_path)
mask = mask_ds["mask"]  # adjust if mask variable has a different name

# ==== Helper function to load yearly meteorological data ====
def load_meteorological_dataset(folder_path, prefix, variables_to_keep):
    """Load all yearly ERA5 NetCDF files, keep only selected variables, and flatten into a time dimension."""
    files = sorted([os.path.join(folder_path, f)
                    for f in os.listdir(folder_path)
                    if f.endswith(".nc")])
    if not files:
        raise FileNotFoundError(f"No NetCDF files found in {folder_path}")
    
    ds_list = []
    for f in files:
        ds = xr.open_dataset(f)
        # Keep only desired variables
        keep_vars = [v for v in ds.data_vars if v in variables_to_keep]
        ds = ds[keep_vars]
        # Rename to avoid variable name collisions (add prefix)
        rename_dict = {v: f"{prefix}_{v}" for v in keep_vars}
        ds = ds.rename(rename_dict)
        ds_list.append(ds)
    
    # Concatenate yearly datasets
    ds_all = xr.concat(ds_list, dim="Year")
    
    # Build valid date list
    years = ds_all['Year'].values
    months = ds_all['Month'].values
    days = ds_all['Day'].values
    ymd = pd.MultiIndex.from_product([years, months, days],
                                     names=['year', 'month', 'day']).to_frame(index=False)
    def is_valid_date(row):
        try:
            pd.Timestamp(year=int(row['year']), month=int(row['month']), day=int(row['day']))
            return True
        except ValueError:
            return False
    valid_mask = ymd.apply(is_valid_date, axis=1)
    ymd_valid = ymd[valid_mask].reset_index(drop=True)
    time_index = pd.to_datetime(ymd_valid)
    
    # Stack and assign new time coordinate
    ds_stack = ds_all.stack(time=('Year', 'Month', 'Day'))
    ds_stack = ds_stack.isel(time=valid_mask.values)
    ds_stack = ds_stack.assign_coords(time=time_index)
    return ds_stack

# ==== Load meteorological datasets ====
print("🌦 Loading meteorological datasets...")
ds_precip = load_meteorological_dataset(precip_path, "precip", ["Total_Precipitation", "Max", "Mean"])
ds_temp = load_meteorological_dataset(temp_path, "temp", ["Mean", "Max"])
ds_wind = load_meteorological_dataset(wind_path, "wind", ["Mean", "Max"])

# ==== Flatten pollutant dataset ====
years = ds_pollutant['Year'].values
months = ds_pollutant['Month'].values
days = ds_pollutant['Day'].values

ymd = pd.MultiIndex.from_product([years, months, days],
                                 names=['year', 'month', 'day']).to_frame(index=False)

def is_valid_date(row):
    try:
        pd.Timestamp(year=int(row['year']), month=int(row['month']), day=int(row['day']))
        return True
    except ValueError:
        return False

valid_mask = ymd.apply(is_valid_date, axis=1)
ymd_valid = ymd[valid_mask].reset_index(drop=True)
time_index = pd.to_datetime(ymd_valid)

ds_pollutant_stack = ds_pollutant.stack(time=('Year', 'Month', 'Day'))
ds_pollutant_stack = ds_pollutant_stack.isel(time=valid_mask.values)
ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)

# ==== Align time ranges ====
start_time = max(
    ds_pollutant_stack.time.values[0],
    ds_fire.time.values[0],
    ds_precip.time.values[0],
    ds_temp.time.values[0],
    ds_wind.time.values[0]
)
end_time = min(
    ds_pollutant_stack.time.values[-1],
    ds_fire.time.values[-1],
    ds_precip.time.values[-1],
    ds_temp.time.values[-1],
    ds_wind.time.values[-1]
)

ds_pollutant_sel = ds_pollutant_stack.sel(time=slice(start_time, end_time))
ds_fire_sel = ds_fire.sel(time=slice(start_time, end_time))
ds_precip_sel = ds_precip.sel(time=slice(start_time, end_time))
ds_temp_sel = ds_temp.sel(time=slice(start_time, end_time))
ds_wind_sel = ds_wind.sel(time=slice(start_time, end_time))

print(f"✅ Common time range: {pd.to_datetime(start_time).date()} → {pd.to_datetime(end_time).date()}")
print(f"   Total valid days: {len(ds_pollutant_sel.time)}")

# ==== Spatial alignment checks ====
print("📏 Checking spatial consistency...")
assert np.allclose(ds_pollutant_sel.latitude.values, ds_fire_sel.latitude.values), "Latitude mismatch (fire)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_fire_sel.longitude.values), "Longitude mismatch (fire)"
assert np.allclose(ds_pollutant_sel.latitude.values, mask.latitude.values), "Latitude mismatch (mask)"
assert np.allclose(ds_pollutant_sel.longitude.values, mask.longitude.values), "Longitude mismatch (mask)"
assert np.allclose(ds_pollutant_sel.latitude.values, ds_precip_sel.latitude.values), "Latitude mismatch (precip)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_precip_sel.longitude.values), "Longitude mismatch (precip)"
assert np.allclose(ds_pollutant_sel.latitude.values, ds_temp_sel.latitude.values), "Latitude mismatch (temp)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_temp_sel.longitude.values), "Longitude mismatch (temp)"
assert np.allclose(ds_pollutant_sel.latitude.values, ds_wind_sel.latitude.values), "Latitude mismatch (wind)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_wind_sel.longitude.values), "Longitude mismatch (wind)"
print("✅ All datasets share identical lat/lon grids.")

# ==== Apply mask ====
print("🎭 Applying land/region mask...")
ds_pollutant_masked = ds_pollutant_sel.where(mask == 1)
ds_fire_masked = ds_fire_sel.where(mask == 1)
ds_precip_masked = ds_precip_sel.where(mask == 1)
ds_temp_masked = ds_temp_sel.where(mask == 1)
ds_wind_masked = ds_wind_sel.where(mask == 1)

# ==== Merge everything ====
print("🔗 Merging all datasets...")
ds_merged = xr.merge([
    ds_pollutant_masked,
    ds_fire_masked,
    ds_precip_masked,
    ds_temp_masked,
    ds_wind_masked
])

# ==== Save final dataset ====
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ds_merged.to_netcdf(output_path)

print(f"\n🎉 Merged dataset successfully saved to:\n   {output_path}")


📂 Loading pollutant, fire, and mask datasets...
🌦 Loading meteorological datasets...


  ds_stack = ds_stack.assign_coords(time=time_index)
  ds_stack = ds_stack.assign_coords(time=time_index)
  ds_stack = ds_stack.assign_coords(time=time_index)
  ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)


✅ Common time range: 2003-01-01 → 2024-12-25
   Total valid days: 8030
📏 Checking spatial consistency...
✅ All datasets share identical lat/lon grids.
🎭 Applying land/region mask...
🔗 Merging all datasets...

🎉 Merged dataset successfully saved to:
   D:\IPMA\Results\pm2p5_fire_meteo_Greece.nc


In [22]:
import xarray as xr
import pandas as pd
import numpy as np
import os

# ==== User settings ====
pollutant_name = "pm10"  # <-- change this to your pollutant (e.g., "ozone", "no2")
country = "Greece"        # <-- change this to your region/country

# ==== File paths ====
pollutant_path = fr"D:\IPMA\CAMS\chem_singlvl\daily_{pollutant_name}_stats.nc"
fire_path = fr"D:\IPMA\FRP\fire_labels_by_region\fire_data_{country}.nc"
mask_path = fr"D:\IPMA\Countries\{country}_mask.nc"
output_path = fr"D:\IPMA\Results\{pollutant_name}_fire_meteo_{country}.nc"

# Meteorological folders
precip_path = r"D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_yearly_regridded"
temp_path = r"D:\IPMA\ERA5\Temperature\daily_temperature_stats_yearly_regridded"
wind_path = r"D:\IPMA\ERA5\UV_wind\daily_wind_speed_stats_yearly_regridded"

# ==== Load datasets ====
print("📂 Loading pollutant, fire, and mask datasets...")
ds_pollutant = xr.open_dataset(pollutant_path)
ds_fire = xr.open_dataset(fire_path)
mask_ds = xr.open_dataset(mask_path)
mask = mask_ds["mask"]  # adjust if mask variable has a different name

# ==== Helper function to load yearly meteorological data ====
def load_meteorological_dataset(folder_path, prefix, variables_to_keep):
    """Load all yearly ERA5 NetCDF files, keep only selected variables, and flatten into a time dimension."""
    files = sorted([os.path.join(folder_path, f)
                    for f in os.listdir(folder_path)
                    if f.endswith(".nc")])
    if not files:
        raise FileNotFoundError(f"No NetCDF files found in {folder_path}")
    
    ds_list = []
    for f in files:
        ds = xr.open_dataset(f)
        # Keep only desired variables
        keep_vars = [v for v in ds.data_vars if v in variables_to_keep]
        ds = ds[keep_vars]
        # Rename to avoid variable name collisions (add prefix)
        rename_dict = {v: f"{prefix}_{v}" for v in keep_vars}
        ds = ds.rename(rename_dict)
        ds_list.append(ds)
    
    # Concatenate yearly datasets
    ds_all = xr.concat(ds_list, dim="Year")
    
    # Build valid date list
    years = ds_all['Year'].values
    months = ds_all['Month'].values
    days = ds_all['Day'].values
    ymd = pd.MultiIndex.from_product([years, months, days],
                                     names=['year', 'month', 'day']).to_frame(index=False)
    def is_valid_date(row):
        try:
            pd.Timestamp(year=int(row['year']), month=int(row['month']), day=int(row['day']))
            return True
        except ValueError:
            return False
    valid_mask = ymd.apply(is_valid_date, axis=1)
    ymd_valid = ymd[valid_mask].reset_index(drop=True)
    time_index = pd.to_datetime(ymd_valid)
    
    # Stack and assign new time coordinate
    ds_stack = ds_all.stack(time=('Year', 'Month', 'Day'))
    ds_stack = ds_stack.isel(time=valid_mask.values)
    ds_stack = ds_stack.assign_coords(time=time_index)
    return ds_stack

# ==== Load meteorological datasets ====
print("🌦 Loading meteorological datasets...")
ds_precip = load_meteorological_dataset(precip_path, "precip", ["Total_Precipitation", "Max", "Mean"])
ds_temp = load_meteorological_dataset(temp_path, "temp", ["Mean", "Max"])
ds_wind = load_meteorological_dataset(wind_path, "wind", ["Mean", "Max"])

# ==== Flatten pollutant dataset ====
years = ds_pollutant['Year'].values
months = ds_pollutant['Month'].values
days = ds_pollutant['Day'].values

ymd = pd.MultiIndex.from_product([years, months, days],
                                 names=['year', 'month', 'day']).to_frame(index=False)

def is_valid_date(row):
    try:
        pd.Timestamp(year=int(row['year']), month=int(row['month']), day=int(row['day']))
        return True
    except ValueError:
        return False

valid_mask = ymd.apply(is_valid_date, axis=1)
ymd_valid = ymd[valid_mask].reset_index(drop=True)
time_index = pd.to_datetime(ymd_valid)

ds_pollutant_stack = ds_pollutant.stack(time=('Year', 'Month', 'Day'))
ds_pollutant_stack = ds_pollutant_stack.isel(time=valid_mask.values)
ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)

# ==== Align time ranges ====
start_time = max(
    ds_pollutant_stack.time.values[0],
    ds_fire.time.values[0],
    ds_precip.time.values[0],
    ds_temp.time.values[0],
    ds_wind.time.values[0]
)
end_time = min(
    ds_pollutant_stack.time.values[-1],
    ds_fire.time.values[-1],
    ds_precip.time.values[-1],
    ds_temp.time.values[-1],
    ds_wind.time.values[-1]
)

ds_pollutant_sel = ds_pollutant_stack.sel(time=slice(start_time, end_time))
ds_fire_sel = ds_fire.sel(time=slice(start_time, end_time))
ds_precip_sel = ds_precip.sel(time=slice(start_time, end_time))
ds_temp_sel = ds_temp.sel(time=slice(start_time, end_time))
ds_wind_sel = ds_wind.sel(time=slice(start_time, end_time))

print(f"✅ Common time range: {pd.to_datetime(start_time).date()} → {pd.to_datetime(end_time).date()}")
print(f"   Total valid days: {len(ds_pollutant_sel.time)}")

# ==== Spatial alignment checks ====
print("📏 Checking spatial consistency...")
assert np.allclose(ds_pollutant_sel.latitude.values, ds_fire_sel.latitude.values), "Latitude mismatch (fire)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_fire_sel.longitude.values), "Longitude mismatch (fire)"
assert np.allclose(ds_pollutant_sel.latitude.values, mask.latitude.values), "Latitude mismatch (mask)"
assert np.allclose(ds_pollutant_sel.longitude.values, mask.longitude.values), "Longitude mismatch (mask)"
assert np.allclose(ds_pollutant_sel.latitude.values, ds_precip_sel.latitude.values), "Latitude mismatch (precip)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_precip_sel.longitude.values), "Longitude mismatch (precip)"
assert np.allclose(ds_pollutant_sel.latitude.values, ds_temp_sel.latitude.values), "Latitude mismatch (temp)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_temp_sel.longitude.values), "Longitude mismatch (temp)"
assert np.allclose(ds_pollutant_sel.latitude.values, ds_wind_sel.latitude.values), "Latitude mismatch (wind)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_wind_sel.longitude.values), "Longitude mismatch (wind)"
print("✅ All datasets share identical lat/lon grids.")

# ==== Apply mask ====
print("🎭 Applying land/region mask...")
ds_pollutant_masked = ds_pollutant_sel.where(mask == 1)
ds_fire_masked = ds_fire_sel.where(mask == 1)
ds_precip_masked = ds_precip_sel.where(mask == 1)
ds_temp_masked = ds_temp_sel.where(mask == 1)
ds_wind_masked = ds_wind_sel.where(mask == 1)

# ==== Merge everything ====
print("🔗 Merging all datasets...")
ds_merged = xr.merge([
    ds_pollutant_masked,
    ds_fire_masked,
    ds_precip_masked,
    ds_temp_masked,
    ds_wind_masked
])

# ==== Save final dataset ====
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ds_merged.to_netcdf(output_path)

print(f"\n🎉 Merged dataset successfully saved to:\n   {output_path}")


📂 Loading pollutant, fire, and mask datasets...
🌦 Loading meteorological datasets...


  ds_stack = ds_stack.assign_coords(time=time_index)
  ds_stack = ds_stack.assign_coords(time=time_index)
  ds_stack = ds_stack.assign_coords(time=time_index)
  ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)


✅ Common time range: 2003-01-01 → 2024-12-25
   Total valid days: 8030
📏 Checking spatial consistency...
✅ All datasets share identical lat/lon grids.
🎭 Applying land/region mask...
🔗 Merging all datasets...

🎉 Merged dataset successfully saved to:
   D:\IPMA\Results\pm10_fire_meteo_Greece.nc


In [23]:
import xarray as xr
import pandas as pd
import numpy as np
import os

# ==== User settings ====
pollutant_name = "co"  # <-- change this to your pollutant (e.g., "ozone", "no2")
country = "Greece"        # <-- change this to your region/country

# ==== File paths ====
pollutant_path = fr"D:\IPMA\CAMS\chem_multlvl\daily_{pollutant_name}_stats.nc"
fire_path = fr"D:\IPMA\FRP\fire_labels_by_region\fire_data_{country}.nc"
mask_path = fr"D:\IPMA\Countries\{country}_mask.nc"
output_path = fr"D:\IPMA\Results\{pollutant_name}_fire_meteo_{country}.nc"

# Meteorological folders
precip_path = r"D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_yearly_regridded"
temp_path = r"D:\IPMA\ERA5\Temperature\daily_temperature_stats_yearly_regridded"
wind_path = r"D:\IPMA\ERA5\UV_wind\daily_wind_speed_stats_yearly_regridded"

# ==== Load datasets ====
print("📂 Loading pollutant, fire, and mask datasets...")
ds_pollutant = xr.open_dataset(pollutant_path)
ds_fire = xr.open_dataset(fire_path)
mask_ds = xr.open_dataset(mask_path)
mask = mask_ds["mask"]  # adjust if mask variable has a different name

# ==== Helper function to load yearly meteorological data ====
def load_meteorological_dataset(folder_path, prefix, variables_to_keep):
    """Load all yearly ERA5 NetCDF files, keep only selected variables, and flatten into a time dimension."""
    files = sorted([os.path.join(folder_path, f)
                    for f in os.listdir(folder_path)
                    if f.endswith(".nc")])
    if not files:
        raise FileNotFoundError(f"No NetCDF files found in {folder_path}")
    
    ds_list = []
    for f in files:
        ds = xr.open_dataset(f)
        # Keep only desired variables
        keep_vars = [v for v in ds.data_vars if v in variables_to_keep]
        ds = ds[keep_vars]
        # Rename to avoid variable name collisions (add prefix)
        rename_dict = {v: f"{prefix}_{v}" for v in keep_vars}
        ds = ds.rename(rename_dict)
        ds_list.append(ds)
    
    # Concatenate yearly datasets
    ds_all = xr.concat(ds_list, dim="Year")
    
    # Build valid date list
    years = ds_all['Year'].values
    months = ds_all['Month'].values
    days = ds_all['Day'].values
    ymd = pd.MultiIndex.from_product([years, months, days],
                                     names=['year', 'month', 'day']).to_frame(index=False)
    def is_valid_date(row):
        try:
            pd.Timestamp(year=int(row['year']), month=int(row['month']), day=int(row['day']))
            return True
        except ValueError:
            return False
    valid_mask = ymd.apply(is_valid_date, axis=1)
    ymd_valid = ymd[valid_mask].reset_index(drop=True)
    time_index = pd.to_datetime(ymd_valid)
    
    # Stack and assign new time coordinate
    ds_stack = ds_all.stack(time=('Year', 'Month', 'Day'))
    ds_stack = ds_stack.isel(time=valid_mask.values)
    ds_stack = ds_stack.assign_coords(time=time_index)
    return ds_stack

# ==== Load meteorological datasets ====
print("🌦 Loading meteorological datasets...")
ds_precip = load_meteorological_dataset(precip_path, "precip", ["Total_Precipitation", "Max", "Mean"])
ds_temp = load_meteorological_dataset(temp_path, "temp", ["Mean", "Max"])
ds_wind = load_meteorological_dataset(wind_path, "wind", ["Mean", "Max"])

# ==== Flatten pollutant dataset ====
years = ds_pollutant['Year'].values
months = ds_pollutant['Month'].values
days = ds_pollutant['Day'].values

ymd = pd.MultiIndex.from_product([years, months, days],
                                 names=['year', 'month', 'day']).to_frame(index=False)

def is_valid_date(row):
    try:
        pd.Timestamp(year=int(row['year']), month=int(row['month']), day=int(row['day']))
        return True
    except ValueError:
        return False

valid_mask = ymd.apply(is_valid_date, axis=1)
ymd_valid = ymd[valid_mask].reset_index(drop=True)
time_index = pd.to_datetime(ymd_valid)

ds_pollutant_stack = ds_pollutant.stack(time=('Year', 'Month', 'Day'))
ds_pollutant_stack = ds_pollutant_stack.isel(time=valid_mask.values)
ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)

# ==== Align time ranges ====
start_time = max(
    ds_pollutant_stack.time.values[0],
    ds_fire.time.values[0],
    ds_precip.time.values[0],
    ds_temp.time.values[0],
    ds_wind.time.values[0]
)
end_time = min(
    ds_pollutant_stack.time.values[-1],
    ds_fire.time.values[-1],
    ds_precip.time.values[-1],
    ds_temp.time.values[-1],
    ds_wind.time.values[-1]
)

ds_pollutant_sel = ds_pollutant_stack.sel(time=slice(start_time, end_time))
ds_fire_sel = ds_fire.sel(time=slice(start_time, end_time))
ds_precip_sel = ds_precip.sel(time=slice(start_time, end_time))
ds_temp_sel = ds_temp.sel(time=slice(start_time, end_time))
ds_wind_sel = ds_wind.sel(time=slice(start_time, end_time))

print(f"✅ Common time range: {pd.to_datetime(start_time).date()} → {pd.to_datetime(end_time).date()}")
print(f"   Total valid days: {len(ds_pollutant_sel.time)}")

# ==== Spatial alignment checks ====
print("📏 Checking spatial consistency...")
assert np.allclose(ds_pollutant_sel.latitude.values, ds_fire_sel.latitude.values), "Latitude mismatch (fire)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_fire_sel.longitude.values), "Longitude mismatch (fire)"
assert np.allclose(ds_pollutant_sel.latitude.values, mask.latitude.values), "Latitude mismatch (mask)"
assert np.allclose(ds_pollutant_sel.longitude.values, mask.longitude.values), "Longitude mismatch (mask)"
assert np.allclose(ds_pollutant_sel.latitude.values, ds_precip_sel.latitude.values), "Latitude mismatch (precip)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_precip_sel.longitude.values), "Longitude mismatch (precip)"
assert np.allclose(ds_pollutant_sel.latitude.values, ds_temp_sel.latitude.values), "Latitude mismatch (temp)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_temp_sel.longitude.values), "Longitude mismatch (temp)"
assert np.allclose(ds_pollutant_sel.latitude.values, ds_wind_sel.latitude.values), "Latitude mismatch (wind)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_wind_sel.longitude.values), "Longitude mismatch (wind)"
print("✅ All datasets share identical lat/lon grids.")

# ==== Apply mask ====
print("🎭 Applying land/region mask...")
ds_pollutant_masked = ds_pollutant_sel.where(mask == 1)
ds_fire_masked = ds_fire_sel.where(mask == 1)
ds_precip_masked = ds_precip_sel.where(mask == 1)
ds_temp_masked = ds_temp_sel.where(mask == 1)
ds_wind_masked = ds_wind_sel.where(mask == 1)

# ==== Merge everything ====
print("🔗 Merging all datasets...")
ds_merged = xr.merge([
    ds_pollutant_masked,
    ds_fire_masked,
    ds_precip_masked,
    ds_temp_masked,
    ds_wind_masked
])

# ==== Save final dataset ====
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ds_merged.to_netcdf(output_path)

print(f"\n🎉 Merged dataset successfully saved to:\n   {output_path}")


📂 Loading pollutant, fire, and mask datasets...
🌦 Loading meteorological datasets...


  ds_stack = ds_stack.assign_coords(time=time_index)
  ds_stack = ds_stack.assign_coords(time=time_index)
  ds_stack = ds_stack.assign_coords(time=time_index)
  ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)


✅ Common time range: 2003-01-01 → 2024-12-25
   Total valid days: 8030
📏 Checking spatial consistency...
✅ All datasets share identical lat/lon grids.
🎭 Applying land/region mask...
🔗 Merging all datasets...

🎉 Merged dataset successfully saved to:
   D:\IPMA\Results\co_fire_meteo_Greece.nc


In [24]:
import xarray as xr
import pandas as pd
import numpy as np
import os

# ==== User settings ====
pollutant_name = "no"  # <-- change this to your pollutant (e.g., "ozone", "no2")
country = "Greece"        # <-- change this to your region/country

# ==== File paths ====
pollutant_path = fr"D:\IPMA\CAMS\chem_multlvl\daily_{pollutant_name}_stats.nc"
fire_path = fr"D:\IPMA\FRP\fire_labels_by_region\fire_data_{country}.nc"
mask_path = fr"D:\IPMA\Countries\{country}_mask.nc"
output_path = fr"D:\IPMA\Results\{pollutant_name}_fire_meteo_{country}.nc"

# Meteorological folders
precip_path = r"D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_yearly_regridded"
temp_path = r"D:\IPMA\ERA5\Temperature\daily_temperature_stats_yearly_regridded"
wind_path = r"D:\IPMA\ERA5\UV_wind\daily_wind_speed_stats_yearly_regridded"

# ==== Load datasets ====
print("📂 Loading pollutant, fire, and mask datasets...")
ds_pollutant = xr.open_dataset(pollutant_path)
ds_fire = xr.open_dataset(fire_path)
mask_ds = xr.open_dataset(mask_path)
mask = mask_ds["mask"]  # adjust if mask variable has a different name

# ==== Helper function to load yearly meteorological data ====
def load_meteorological_dataset(folder_path, prefix, variables_to_keep):
    """Load all yearly ERA5 NetCDF files, keep only selected variables, and flatten into a time dimension."""
    files = sorted([os.path.join(folder_path, f)
                    for f in os.listdir(folder_path)
                    if f.endswith(".nc")])
    if not files:
        raise FileNotFoundError(f"No NetCDF files found in {folder_path}")
    
    ds_list = []
    for f in files:
        ds = xr.open_dataset(f)
        # Keep only desired variables
        keep_vars = [v for v in ds.data_vars if v in variables_to_keep]
        ds = ds[keep_vars]
        # Rename to avoid variable name collisions (add prefix)
        rename_dict = {v: f"{prefix}_{v}" for v in keep_vars}
        ds = ds.rename(rename_dict)
        ds_list.append(ds)
    
    # Concatenate yearly datasets
    ds_all = xr.concat(ds_list, dim="Year")
    
    # Build valid date list
    years = ds_all['Year'].values
    months = ds_all['Month'].values
    days = ds_all['Day'].values
    ymd = pd.MultiIndex.from_product([years, months, days],
                                     names=['year', 'month', 'day']).to_frame(index=False)
    def is_valid_date(row):
        try:
            pd.Timestamp(year=int(row['year']), month=int(row['month']), day=int(row['day']))
            return True
        except ValueError:
            return False
    valid_mask = ymd.apply(is_valid_date, axis=1)
    ymd_valid = ymd[valid_mask].reset_index(drop=True)
    time_index = pd.to_datetime(ymd_valid)
    
    # Stack and assign new time coordinate
    ds_stack = ds_all.stack(time=('Year', 'Month', 'Day'))
    ds_stack = ds_stack.isel(time=valid_mask.values)
    ds_stack = ds_stack.assign_coords(time=time_index)
    return ds_stack

# ==== Load meteorological datasets ====
print("🌦 Loading meteorological datasets...")
ds_precip = load_meteorological_dataset(precip_path, "precip", ["Total_Precipitation", "Max", "Mean"])
ds_temp = load_meteorological_dataset(temp_path, "temp", ["Mean", "Max"])
ds_wind = load_meteorological_dataset(wind_path, "wind", ["Mean", "Max"])

# ==== Flatten pollutant dataset ====
years = ds_pollutant['Year'].values
months = ds_pollutant['Month'].values
days = ds_pollutant['Day'].values

ymd = pd.MultiIndex.from_product([years, months, days],
                                 names=['year', 'month', 'day']).to_frame(index=False)

def is_valid_date(row):
    try:
        pd.Timestamp(year=int(row['year']), month=int(row['month']), day=int(row['day']))
        return True
    except ValueError:
        return False

valid_mask = ymd.apply(is_valid_date, axis=1)
ymd_valid = ymd[valid_mask].reset_index(drop=True)
time_index = pd.to_datetime(ymd_valid)

ds_pollutant_stack = ds_pollutant.stack(time=('Year', 'Month', 'Day'))
ds_pollutant_stack = ds_pollutant_stack.isel(time=valid_mask.values)
ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)

# ==== Align time ranges ====
start_time = max(
    ds_pollutant_stack.time.values[0],
    ds_fire.time.values[0],
    ds_precip.time.values[0],
    ds_temp.time.values[0],
    ds_wind.time.values[0]
)
end_time = min(
    ds_pollutant_stack.time.values[-1],
    ds_fire.time.values[-1],
    ds_precip.time.values[-1],
    ds_temp.time.values[-1],
    ds_wind.time.values[-1]
)

ds_pollutant_sel = ds_pollutant_stack.sel(time=slice(start_time, end_time))
ds_fire_sel = ds_fire.sel(time=slice(start_time, end_time))
ds_precip_sel = ds_precip.sel(time=slice(start_time, end_time))
ds_temp_sel = ds_temp.sel(time=slice(start_time, end_time))
ds_wind_sel = ds_wind.sel(time=slice(start_time, end_time))

print(f"✅ Common time range: {pd.to_datetime(start_time).date()} → {pd.to_datetime(end_time).date()}")
print(f"   Total valid days: {len(ds_pollutant_sel.time)}")

# ==== Spatial alignment checks ====
print("📏 Checking spatial consistency...")
assert np.allclose(ds_pollutant_sel.latitude.values, ds_fire_sel.latitude.values), "Latitude mismatch (fire)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_fire_sel.longitude.values), "Longitude mismatch (fire)"
assert np.allclose(ds_pollutant_sel.latitude.values, mask.latitude.values), "Latitude mismatch (mask)"
assert np.allclose(ds_pollutant_sel.longitude.values, mask.longitude.values), "Longitude mismatch (mask)"
assert np.allclose(ds_pollutant_sel.latitude.values, ds_precip_sel.latitude.values), "Latitude mismatch (precip)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_precip_sel.longitude.values), "Longitude mismatch (precip)"
assert np.allclose(ds_pollutant_sel.latitude.values, ds_temp_sel.latitude.values), "Latitude mismatch (temp)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_temp_sel.longitude.values), "Longitude mismatch (temp)"
assert np.allclose(ds_pollutant_sel.latitude.values, ds_wind_sel.latitude.values), "Latitude mismatch (wind)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_wind_sel.longitude.values), "Longitude mismatch (wind)"
print("✅ All datasets share identical lat/lon grids.")

# ==== Apply mask ====
print("🎭 Applying land/region mask...")
ds_pollutant_masked = ds_pollutant_sel.where(mask == 1)
ds_fire_masked = ds_fire_sel.where(mask == 1)
ds_precip_masked = ds_precip_sel.where(mask == 1)
ds_temp_masked = ds_temp_sel.where(mask == 1)
ds_wind_masked = ds_wind_sel.where(mask == 1)

# ==== Merge everything ====
print("🔗 Merging all datasets...")
ds_merged = xr.merge([
    ds_pollutant_masked,
    ds_fire_masked,
    ds_precip_masked,
    ds_temp_masked,
    ds_wind_masked
])

# ==== Save final dataset ====
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ds_merged.to_netcdf(output_path)

print(f"\n🎉 Merged dataset successfully saved to:\n   {output_path}")


📂 Loading pollutant, fire, and mask datasets...
🌦 Loading meteorological datasets...


  ds_stack = ds_stack.assign_coords(time=time_index)
  ds_stack = ds_stack.assign_coords(time=time_index)
  ds_stack = ds_stack.assign_coords(time=time_index)
  ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)


✅ Common time range: 2003-01-01 → 2024-12-25
   Total valid days: 8030
📏 Checking spatial consistency...
✅ All datasets share identical lat/lon grids.
🎭 Applying land/region mask...
🔗 Merging all datasets...

🎉 Merged dataset successfully saved to:
   D:\IPMA\Results\no_fire_meteo_Greece.nc


In [25]:
import xarray as xr
import pandas as pd
import numpy as np
import os

# ==== User settings ====
pollutant_name = "no2"  # <-- change this to your pollutant (e.g., "ozone", "no2")
country = "Greece"        # <-- change this to your region/country

# ==== File paths ====
pollutant_path = fr"D:\IPMA\CAMS\chem_multlvl\daily_{pollutant_name}_stats.nc"
fire_path = fr"D:\IPMA\FRP\fire_labels_by_region\fire_data_{country}.nc"
mask_path = fr"D:\IPMA\Countries\{country}_mask.nc"
output_path = fr"D:\IPMA\Results\{pollutant_name}_fire_meteo_{country}.nc"

# Meteorological folders
precip_path = r"D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_yearly_regridded"
temp_path = r"D:\IPMA\ERA5\Temperature\daily_temperature_stats_yearly_regridded"
wind_path = r"D:\IPMA\ERA5\UV_wind\daily_wind_speed_stats_yearly_regridded"

# ==== Load datasets ====
print("📂 Loading pollutant, fire, and mask datasets...")
ds_pollutant = xr.open_dataset(pollutant_path)
ds_fire = xr.open_dataset(fire_path)
mask_ds = xr.open_dataset(mask_path)
mask = mask_ds["mask"]  # adjust if mask variable has a different name

# ==== Helper function to load yearly meteorological data ====
def load_meteorological_dataset(folder_path, prefix, variables_to_keep):
    """Load all yearly ERA5 NetCDF files, keep only selected variables, and flatten into a time dimension."""
    files = sorted([os.path.join(folder_path, f)
                    for f in os.listdir(folder_path)
                    if f.endswith(".nc")])
    if not files:
        raise FileNotFoundError(f"No NetCDF files found in {folder_path}")
    
    ds_list = []
    for f in files:
        ds = xr.open_dataset(f)
        # Keep only desired variables
        keep_vars = [v for v in ds.data_vars if v in variables_to_keep]
        ds = ds[keep_vars]
        # Rename to avoid variable name collisions (add prefix)
        rename_dict = {v: f"{prefix}_{v}" for v in keep_vars}
        ds = ds.rename(rename_dict)
        ds_list.append(ds)
    
    # Concatenate yearly datasets
    ds_all = xr.concat(ds_list, dim="Year")
    
    # Build valid date list
    years = ds_all['Year'].values
    months = ds_all['Month'].values
    days = ds_all['Day'].values
    ymd = pd.MultiIndex.from_product([years, months, days],
                                     names=['year', 'month', 'day']).to_frame(index=False)
    def is_valid_date(row):
        try:
            pd.Timestamp(year=int(row['year']), month=int(row['month']), day=int(row['day']))
            return True
        except ValueError:
            return False
    valid_mask = ymd.apply(is_valid_date, axis=1)
    ymd_valid = ymd[valid_mask].reset_index(drop=True)
    time_index = pd.to_datetime(ymd_valid)
    
    # Stack and assign new time coordinate
    ds_stack = ds_all.stack(time=('Year', 'Month', 'Day'))
    ds_stack = ds_stack.isel(time=valid_mask.values)
    ds_stack = ds_stack.assign_coords(time=time_index)
    return ds_stack

# ==== Load meteorological datasets ====
print("🌦 Loading meteorological datasets...")
ds_precip = load_meteorological_dataset(precip_path, "precip", ["Total_Precipitation", "Max", "Mean"])
ds_temp = load_meteorological_dataset(temp_path, "temp", ["Mean", "Max"])
ds_wind = load_meteorological_dataset(wind_path, "wind", ["Mean", "Max"])

# ==== Flatten pollutant dataset ====
years = ds_pollutant['Year'].values
months = ds_pollutant['Month'].values
days = ds_pollutant['Day'].values

ymd = pd.MultiIndex.from_product([years, months, days],
                                 names=['year', 'month', 'day']).to_frame(index=False)

def is_valid_date(row):
    try:
        pd.Timestamp(year=int(row['year']), month=int(row['month']), day=int(row['day']))
        return True
    except ValueError:
        return False

valid_mask = ymd.apply(is_valid_date, axis=1)
ymd_valid = ymd[valid_mask].reset_index(drop=True)
time_index = pd.to_datetime(ymd_valid)

ds_pollutant_stack = ds_pollutant.stack(time=('Year', 'Month', 'Day'))
ds_pollutant_stack = ds_pollutant_stack.isel(time=valid_mask.values)
ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)

# ==== Align time ranges ====
start_time = max(
    ds_pollutant_stack.time.values[0],
    ds_fire.time.values[0],
    ds_precip.time.values[0],
    ds_temp.time.values[0],
    ds_wind.time.values[0]
)
end_time = min(
    ds_pollutant_stack.time.values[-1],
    ds_fire.time.values[-1],
    ds_precip.time.values[-1],
    ds_temp.time.values[-1],
    ds_wind.time.values[-1]
)

ds_pollutant_sel = ds_pollutant_stack.sel(time=slice(start_time, end_time))
ds_fire_sel = ds_fire.sel(time=slice(start_time, end_time))
ds_precip_sel = ds_precip.sel(time=slice(start_time, end_time))
ds_temp_sel = ds_temp.sel(time=slice(start_time, end_time))
ds_wind_sel = ds_wind.sel(time=slice(start_time, end_time))

print(f"✅ Common time range: {pd.to_datetime(start_time).date()} → {pd.to_datetime(end_time).date()}")
print(f"   Total valid days: {len(ds_pollutant_sel.time)}")

# ==== Spatial alignment checks ====
print("📏 Checking spatial consistency...")
assert np.allclose(ds_pollutant_sel.latitude.values, ds_fire_sel.latitude.values), "Latitude mismatch (fire)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_fire_sel.longitude.values), "Longitude mismatch (fire)"
assert np.allclose(ds_pollutant_sel.latitude.values, mask.latitude.values), "Latitude mismatch (mask)"
assert np.allclose(ds_pollutant_sel.longitude.values, mask.longitude.values), "Longitude mismatch (mask)"
assert np.allclose(ds_pollutant_sel.latitude.values, ds_precip_sel.latitude.values), "Latitude mismatch (precip)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_precip_sel.longitude.values), "Longitude mismatch (precip)"
assert np.allclose(ds_pollutant_sel.latitude.values, ds_temp_sel.latitude.values), "Latitude mismatch (temp)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_temp_sel.longitude.values), "Longitude mismatch (temp)"
assert np.allclose(ds_pollutant_sel.latitude.values, ds_wind_sel.latitude.values), "Latitude mismatch (wind)"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_wind_sel.longitude.values), "Longitude mismatch (wind)"
print("✅ All datasets share identical lat/lon grids.")

# ==== Apply mask ====
print("🎭 Applying land/region mask...")
ds_pollutant_masked = ds_pollutant_sel.where(mask == 1)
ds_fire_masked = ds_fire_sel.where(mask == 1)
ds_precip_masked = ds_precip_sel.where(mask == 1)
ds_temp_masked = ds_temp_sel.where(mask == 1)
ds_wind_masked = ds_wind_sel.where(mask == 1)

# ==== Merge everything ====
print("🔗 Merging all datasets...")
ds_merged = xr.merge([
    ds_pollutant_masked,
    ds_fire_masked,
    ds_precip_masked,
    ds_temp_masked,
    ds_wind_masked
])

# ==== Save final dataset ====
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ds_merged.to_netcdf(output_path)

print(f"\n🎉 Merged dataset successfully saved to:\n   {output_path}")


📂 Loading pollutant, fire, and mask datasets...
🌦 Loading meteorological datasets...


  ds_stack = ds_stack.assign_coords(time=time_index)
  ds_stack = ds_stack.assign_coords(time=time_index)
  ds_stack = ds_stack.assign_coords(time=time_index)
  ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)


✅ Common time range: 2003-01-01 → 2024-12-25
   Total valid days: 8030
📏 Checking spatial consistency...
✅ All datasets share identical lat/lon grids.
🎭 Applying land/region mask...
🔗 Merging all datasets...

🎉 Merged dataset successfully saved to:
   D:\IPMA\Results\no2_fire_meteo_Greece.nc


#### Not In Use

In [None]:
import xarray as xr
import pandas as pd
import numpy as np
import os

# ==== User settings ====
pollutant_name = "pm2p5"  # <-- change this to your pollutant (e.g., "ozone", "no2")
country = "Iberia"         # <-- change this to the country

# ==== File paths ====
pollutant_path = fr"D:\IPMA\CAMS\chem_singlvl\daily_{pollutant_name}_stats.nc"
fire_path = fr"D:\IPMA\FRP\fire_labels_by_region\fire_data_{country}.nc"
mask_path = fr"D:\IPMA\Countries\{country}_mask.nc"
output_path = fr"D:\IPMA\Results\{pollutant_name}_fire_{country}.nc"

# ==== Load datasets ====
ds_pollutant = xr.open_dataset(pollutant_path)
ds_fire = xr.open_dataset(fire_path)
mask_ds = xr.open_dataset(mask_path)

# ==== Load mask variable ====
mask = mask_ds["mask"]  # change name here if different

# ==== Extract Year, Month, Day ====
years = ds_pollutant['Year'].values
months = ds_pollutant['Month'].values
days = ds_pollutant['Day'].values

# ==== Create all combinations ====
ymd = pd.MultiIndex.from_product(
    [years, months, days],
    names=['year', 'month', 'day']
).to_frame(index=False)

# ==== Remove invalid dates ====
def is_valid_date(row):
    try:
        pd.Timestamp(year=int(row['year']), month=int(row['month']), day=int(row['day']))
        return True
    except ValueError:
        return False

valid_mask = ymd.apply(is_valid_date, axis=1)
ymd_valid = ymd[valid_mask].reset_index(drop=True)

# ==== Convert valid dates to datetime ====
time_index = pd.to_datetime(ymd_valid)

# ==== Print number of valid pollutant days ====
print(f"✅ Total valid {pollutant_name.upper()} days: {len(time_index)}")

# ==== Flatten pollutant data ====
ds_pollutant_stack = ds_pollutant.stack(time=('Year', 'Month', 'Day'))

# Keep only valid dates
ds_pollutant_stack = ds_pollutant_stack.isel(time=valid_mask.values)

# Assign new datetime coordinate
ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)

# ==== Select overlapping time ====
start_time = max(ds_pollutant_stack.time.values[0], ds_fire.time.values[0])
end_time = min(ds_pollutant_stack.time.values[-1], ds_fire.time.values[-1])

ds_pollutant_sel = ds_pollutant_stack.sel(time=slice(start_time, end_time))
ds_fire_sel = ds_fire.sel(time=slice(start_time, end_time))

# ==== Print number of overlapping valid days ====
print(f"✅ Overlapping valid days ({pollutant_name.upper()} + fire): {len(ds_pollutant_sel.time)}")
print(f"   Overlap span: {pd.to_datetime(ds_pollutant_sel.time.values[0]).date()} → {pd.to_datetime(ds_pollutant_sel.time.values[-1]).date()}")

# ==== Check lat/lon match ====
assert np.allclose(ds_pollutant_sel.latitude.values, ds_fire_sel.latitude.values), "Latitude mismatch"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_fire_sel.longitude.values), "Longitude mismatch"
assert np.allclose(ds_pollutant_sel.latitude.values, mask.latitude.values), "Latitude mismatch with mask"
assert np.allclose(ds_pollutant_sel.longitude.values, mask.longitude.values), "Longitude mismatch with mask"

# ==== Apply mask to both datasets ====
ds_pollutant_masked = ds_pollutant_sel.where(mask == 1)
ds_fire_masked = ds_fire_sel.where(mask == 1)

# ==== Merge datasets ====
ds_merged = xr.merge([ds_pollutant_masked, ds_fire_masked])

# ==== Save ====
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ds_merged.to_netcdf(output_path)

print(f"\n🎉 Merged dataset saved to: {output_path}")


✅ Total valid PM2P5 days: 8036


  ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)


✅ Overlapping valid days (PM2P5 + fire): 8030
   Overlap span: 2003-01-01 → 2024-12-25

🎉 Merged dataset saved to: D:\IPMA\CAMS\pm2p5_fire_Iberia.nc


In [None]:
import xarray as xr
import pandas as pd
import numpy as np
import os

# ==== User settings ====
pollutant_name = "pm10"  # <-- change this to your pollutant (e.g., "ozone", "no2")
country = "Iberia"         # <-- change this to the country

# ==== File paths ====
pollutant_path = fr"D:\IPMA\CAMS\chem_singlvl\daily_{pollutant_name}_stats.nc"
fire_path = fr"D:\IPMA\FRP\fire_labels_by_region\fire_data_{country}.nc"
mask_path = fr"D:\IPMA\Countries\{country}_mask.nc"
output_path = fr"D:\IPMA\Results\{pollutant_name}_fire_{country}.nc"

# ==== Load datasets ====
ds_pollutant = xr.open_dataset(pollutant_path)
ds_fire = xr.open_dataset(fire_path)
mask_ds = xr.open_dataset(mask_path)

# ==== Load mask variable ====
mask = mask_ds["mask"]  # change name here if different

# ==== Extract Year, Month, Day ====
years = ds_pollutant['Year'].values
months = ds_pollutant['Month'].values
days = ds_pollutant['Day'].values

# ==== Create all combinations ====
ymd = pd.MultiIndex.from_product(
    [years, months, days],
    names=['year', 'month', 'day']
).to_frame(index=False)

# ==== Remove invalid dates ====
def is_valid_date(row):
    try:
        pd.Timestamp(year=int(row['year']), month=int(row['month']), day=int(row['day']))
        return True
    except ValueError:
        return False

valid_mask = ymd.apply(is_valid_date, axis=1)
ymd_valid = ymd[valid_mask].reset_index(drop=True)

# ==== Convert valid dates to datetime ====
time_index = pd.to_datetime(ymd_valid)

# ==== Print number of valid pollutant days ====
print(f"✅ Total valid {pollutant_name.upper()} days: {len(time_index)}")

# ==== Flatten pollutant data ====
ds_pollutant_stack = ds_pollutant.stack(time=('Year', 'Month', 'Day'))

# Keep only valid dates
ds_pollutant_stack = ds_pollutant_stack.isel(time=valid_mask.values)

# Assign new datetime coordinate
ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)

# ==== Select overlapping time ====
start_time = max(ds_pollutant_stack.time.values[0], ds_fire.time.values[0])
end_time = min(ds_pollutant_stack.time.values[-1], ds_fire.time.values[-1])

ds_pollutant_sel = ds_pollutant_stack.sel(time=slice(start_time, end_time))
ds_fire_sel = ds_fire.sel(time=slice(start_time, end_time))

# ==== Print number of overlapping valid days ====
print(f"✅ Overlapping valid days ({pollutant_name.upper()} + fire): {len(ds_pollutant_sel.time)}")
print(f"   Overlap span: {pd.to_datetime(ds_pollutant_sel.time.values[0]).date()} → {pd.to_datetime(ds_pollutant_sel.time.values[-1]).date()}")

# ==== Check lat/lon match ====
assert np.allclose(ds_pollutant_sel.latitude.values, ds_fire_sel.latitude.values), "Latitude mismatch"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_fire_sel.longitude.values), "Longitude mismatch"
assert np.allclose(ds_pollutant_sel.latitude.values, mask.latitude.values), "Latitude mismatch with mask"
assert np.allclose(ds_pollutant_sel.longitude.values, mask.longitude.values), "Longitude mismatch with mask"

# ==== Apply mask to both datasets ====
ds_pollutant_masked = ds_pollutant_sel.where(mask == 1)
ds_fire_masked = ds_fire_sel.where(mask == 1)

# ==== Merge datasets ====
ds_merged = xr.merge([ds_pollutant_masked, ds_fire_masked])

# ==== Save ====
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ds_merged.to_netcdf(output_path)

print(f"\n🎉 Merged dataset saved to: {output_path}")


✅ Total valid PM10 days: 8036


  ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)


✅ Overlapping valid days (PM10 + fire): 8030
   Overlap span: 2003-01-01 → 2024-12-25

🎉 Merged dataset saved to: D:\IPMA\CAMS\pm10_fire_Iberia.nc


In [None]:
import xarray as xr
import pandas as pd
import numpy as np
import os

# ==== User settings ====
pollutant_name = "no2"  # <-- change this to your pollutant (e.g., "ozone", "no2")
country = "Iberia"         # <-- change this to the country

# ==== File paths ====
pollutant_path = fr"D:\IPMA\CAMS\chem_multlvl\daily_{pollutant_name}_stats.nc"
fire_path = fr"D:\IPMA\FRP\fire_labels_by_region\fire_data_{country}.nc"
mask_path = fr"D:\IPMA\Countries\{country}_mask.nc"
output_path = fr"D:\IPMA\Results\{pollutant_name}_fire_{country}.nc"

# ==== Load datasets ====
ds_pollutant = xr.open_dataset(pollutant_path)
ds_fire = xr.open_dataset(fire_path)
mask_ds = xr.open_dataset(mask_path)

# ==== Load mask variable ====
mask = mask_ds["mask"]  # change name here if different

# ==== Extract Year, Month, Day ====
years = ds_pollutant['Year'].values
months = ds_pollutant['Month'].values
days = ds_pollutant['Day'].values

# ==== Create all combinations ====
ymd = pd.MultiIndex.from_product(
    [years, months, days],
    names=['year', 'month', 'day']
).to_frame(index=False)

# ==== Remove invalid dates ====
def is_valid_date(row):
    try:
        pd.Timestamp(year=int(row['year']), month=int(row['month']), day=int(row['day']))
        return True
    except ValueError:
        return False

valid_mask = ymd.apply(is_valid_date, axis=1)
ymd_valid = ymd[valid_mask].reset_index(drop=True)

# ==== Convert valid dates to datetime ====
time_index = pd.to_datetime(ymd_valid)

# ==== Print number of valid pollutant days ====
print(f"✅ Total valid {pollutant_name.upper()} days: {len(time_index)}")

# ==== Flatten pollutant data ====
ds_pollutant_stack = ds_pollutant.stack(time=('Year', 'Month', 'Day'))

# Keep only valid dates
ds_pollutant_stack = ds_pollutant_stack.isel(time=valid_mask.values)

# Assign new datetime coordinate
ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)

# ==== Select overlapping time ====
start_time = max(ds_pollutant_stack.time.values[0], ds_fire.time.values[0])
end_time = min(ds_pollutant_stack.time.values[-1], ds_fire.time.values[-1])

ds_pollutant_sel = ds_pollutant_stack.sel(time=slice(start_time, end_time))
ds_fire_sel = ds_fire.sel(time=slice(start_time, end_time))

# ==== Print number of overlapping valid days ====
print(f"✅ Overlapping valid days ({pollutant_name.upper()} + fire): {len(ds_pollutant_sel.time)}")
print(f"   Overlap span: {pd.to_datetime(ds_pollutant_sel.time.values[0]).date()} → {pd.to_datetime(ds_pollutant_sel.time.values[-1]).date()}")

# ==== Check lat/lon match ====
assert np.allclose(ds_pollutant_sel.latitude.values, ds_fire_sel.latitude.values), "Latitude mismatch"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_fire_sel.longitude.values), "Longitude mismatch"
assert np.allclose(ds_pollutant_sel.latitude.values, mask.latitude.values), "Latitude mismatch with mask"
assert np.allclose(ds_pollutant_sel.longitude.values, mask.longitude.values), "Longitude mismatch with mask"

# ==== Apply mask to both datasets ====
ds_pollutant_masked = ds_pollutant_sel.where(mask == 1)
ds_fire_masked = ds_fire_sel.where(mask == 1)

# ==== Merge datasets ====
ds_merged = xr.merge([ds_pollutant_masked, ds_fire_masked])

# ==== Save ====
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ds_merged.to_netcdf(output_path)

print(f"\n🎉 Merged dataset saved to: {output_path}")


✅ Total valid NO2 days: 8036


  ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)


✅ Overlapping valid days (NO2 + fire): 8030
   Overlap span: 2003-01-01 → 2024-12-25

🎉 Merged dataset saved to: D:\IPMA\CAMS\no2_fire_Iberia.nc


In [None]:
import xarray as xr
import pandas as pd
import numpy as np
import os

# ==== User settings ====
pollutant_name = "no"  # <-- change this to your pollutant (e.g., "ozone", "no2")
country = "Iberia"         # <-- change this to the country

# ==== File paths ====
pollutant_path = fr"D:\IPMA\CAMS\chem_multlvl\daily_{pollutant_name}_stats.nc"
fire_path = fr"D:\IPMA\FRP\fire_labels_by_region\fire_data_{country}.nc"
mask_path = fr"D:\IPMA\Countries\{country}_mask.nc"
output_path = fr"D:\IPMA\Results\{pollutant_name}_fire_{country}.nc"

# ==== Load datasets ====
ds_pollutant = xr.open_dataset(pollutant_path)
ds_fire = xr.open_dataset(fire_path)
mask_ds = xr.open_dataset(mask_path)

# ==== Load mask variable ====
mask = mask_ds["mask"]  # change name here if different

# ==== Extract Year, Month, Day ====
years = ds_pollutant['Year'].values
months = ds_pollutant['Month'].values
days = ds_pollutant['Day'].values

# ==== Create all combinations ====
ymd = pd.MultiIndex.from_product(
    [years, months, days],
    names=['year', 'month', 'day']
).to_frame(index=False)

# ==== Remove invalid dates ====
def is_valid_date(row):
    try:
        pd.Timestamp(year=int(row['year']), month=int(row['month']), day=int(row['day']))
        return True
    except ValueError:
        return False

valid_mask = ymd.apply(is_valid_date, axis=1)
ymd_valid = ymd[valid_mask].reset_index(drop=True)

# ==== Convert valid dates to datetime ====
time_index = pd.to_datetime(ymd_valid)

# ==== Print number of valid pollutant days ====
print(f"✅ Total valid {pollutant_name.upper()} days: {len(time_index)}")

# ==== Flatten pollutant data ====
ds_pollutant_stack = ds_pollutant.stack(time=('Year', 'Month', 'Day'))

# Keep only valid dates
ds_pollutant_stack = ds_pollutant_stack.isel(time=valid_mask.values)

# Assign new datetime coordinate
ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)

# ==== Select overlapping time ====
start_time = max(ds_pollutant_stack.time.values[0], ds_fire.time.values[0])
end_time = min(ds_pollutant_stack.time.values[-1], ds_fire.time.values[-1])

ds_pollutant_sel = ds_pollutant_stack.sel(time=slice(start_time, end_time))
ds_fire_sel = ds_fire.sel(time=slice(start_time, end_time))

# ==== Print number of overlapping valid days ====
print(f"✅ Overlapping valid days ({pollutant_name.upper()} + fire): {len(ds_pollutant_sel.time)}")
print(f"   Overlap span: {pd.to_datetime(ds_pollutant_sel.time.values[0]).date()} → {pd.to_datetime(ds_pollutant_sel.time.values[-1]).date()}")

# ==== Check lat/lon match ====
assert np.allclose(ds_pollutant_sel.latitude.values, ds_fire_sel.latitude.values), "Latitude mismatch"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_fire_sel.longitude.values), "Longitude mismatch"
assert np.allclose(ds_pollutant_sel.latitude.values, mask.latitude.values), "Latitude mismatch with mask"
assert np.allclose(ds_pollutant_sel.longitude.values, mask.longitude.values), "Longitude mismatch with mask"

# ==== Apply mask to both datasets ====
ds_pollutant_masked = ds_pollutant_sel.where(mask == 1)
ds_fire_masked = ds_fire_sel.where(mask == 1)

# ==== Merge datasets ====
ds_merged = xr.merge([ds_pollutant_masked, ds_fire_masked])

# ==== Save ====
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ds_merged.to_netcdf(output_path)

print(f"\n🎉 Merged dataset saved to: {output_path}")


✅ Total valid NO days: 8036


  ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)


✅ Overlapping valid days (NO + fire): 8030
   Overlap span: 2003-01-01 → 2024-12-25

🎉 Merged dataset saved to: D:\IPMA\CAMS\no_fire_Iberia.nc


In [None]:
import xarray as xr
import pandas as pd
import numpy as np
import os

# ==== User settings ====
pollutant_name = "co"  # <-- change this to your pollutant (e.g., "ozone", "no2")
country = "Iberia"         # <-- change this to the country

# ==== File paths ====
pollutant_path = fr"D:\IPMA\CAMS\chem_multlvl\daily_{pollutant_name}_stats.nc"
fire_path = fr"D:\IPMA\FRP\fire_labels_by_region\fire_data_{country}.nc"
mask_path = fr"D:\IPMA\Countries\{country}_mask.nc"
output_path = fr"D:\IPMA\Results\{pollutant_name}_fire_{country}.nc"

# ==== Load datasets ====
ds_pollutant = xr.open_dataset(pollutant_path)
ds_fire = xr.open_dataset(fire_path)
mask_ds = xr.open_dataset(mask_path)

# ==== Load mask variable ====
mask = mask_ds["mask"]  # change name here if different

# ==== Extract Year, Month, Day ====
years = ds_pollutant['Year'].values
months = ds_pollutant['Month'].values
days = ds_pollutant['Day'].values

# ==== Create all combinations ====
ymd = pd.MultiIndex.from_product(
    [years, months, days],
    names=['year', 'month', 'day']
).to_frame(index=False)

# ==== Remove invalid dates ====
def is_valid_date(row):
    try:
        pd.Timestamp(year=int(row['year']), month=int(row['month']), day=int(row['day']))
        return True
    except ValueError:
        return False

valid_mask = ymd.apply(is_valid_date, axis=1)
ymd_valid = ymd[valid_mask].reset_index(drop=True)

# ==== Convert valid dates to datetime ====
time_index = pd.to_datetime(ymd_valid)

# ==== Print number of valid pollutant days ====
print(f"✅ Total valid {pollutant_name.upper()} days: {len(time_index)}")

# ==== Flatten pollutant data ====
ds_pollutant_stack = ds_pollutant.stack(time=('Year', 'Month', 'Day'))

# Keep only valid dates
ds_pollutant_stack = ds_pollutant_stack.isel(time=valid_mask.values)

# Assign new datetime coordinate
ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)

# ==== Select overlapping time ====
start_time = max(ds_pollutant_stack.time.values[0], ds_fire.time.values[0])
end_time = min(ds_pollutant_stack.time.values[-1], ds_fire.time.values[-1])

ds_pollutant_sel = ds_pollutant_stack.sel(time=slice(start_time, end_time))
ds_fire_sel = ds_fire.sel(time=slice(start_time, end_time))

# ==== Print number of overlapping valid days ====
print(f"✅ Overlapping valid days ({pollutant_name.upper()} + fire): {len(ds_pollutant_sel.time)}")
print(f"   Overlap span: {pd.to_datetime(ds_pollutant_sel.time.values[0]).date()} → {pd.to_datetime(ds_pollutant_sel.time.values[-1]).date()}")

# ==== Check lat/lon match ====
assert np.allclose(ds_pollutant_sel.latitude.values, ds_fire_sel.latitude.values), "Latitude mismatch"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_fire_sel.longitude.values), "Longitude mismatch"
assert np.allclose(ds_pollutant_sel.latitude.values, mask.latitude.values), "Latitude mismatch with mask"
assert np.allclose(ds_pollutant_sel.longitude.values, mask.longitude.values), "Longitude mismatch with mask"

# ==== Apply mask to both datasets ====
ds_pollutant_masked = ds_pollutant_sel.where(mask == 1)
ds_fire_masked = ds_fire_sel.where(mask == 1)

# ==== Merge datasets ====
ds_merged = xr.merge([ds_pollutant_masked, ds_fire_masked])

# ==== Save ====
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ds_merged.to_netcdf(output_path)

print(f"\n🎉 Merged dataset saved to: {output_path}")


✅ Total valid CO days: 8036


  ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)


✅ Overlapping valid days (CO + fire): 8030
   Overlap span: 2003-01-01 → 2024-12-25

🎉 Merged dataset saved to: D:\IPMA\CAMS\co_fire_Iberia.nc
