BI Goals:

i) investigar as ligações entre a atividade do fogo, conforme medido pelo FRP (Fire Radiative Power), e as concentrações de poluentes e avaliar a zona espacial e temporal de influência da atividade dos incêndios florestais.

(ii) investigar a utilização de FRP como ferramenta para filtrar a contribuição do fumo de biomassa para os registos de poluição atmosférica em bacias atmosféricas urbanas, nomeadamente as emissões de carbono resultantes de incêndios florestais graves.

(iii) desenvolver abordagens multirriscos para caracterizar o comportamento conjunto de múltiplos perigos e riscos consequentes e avaliar o papel desempenhado por condições anteriores e simultâneas de seca e/ou calor na exacerbação de incêndios rurais e consequentes ondas de fumo

Combine datasets (labeled dataset that contains info about labels and FRP & pollutants statistics) by day and pixel

In [21]:
import xarray as xr
import pandas as pd
import numpy as np
import os

# ==== User settings ====
pollutant_name = "pm2p5"  # <-- change this to your pollutant (e.g., "ozone", "no2")
country = "Spain"         # <-- change this to the country

# ==== File paths ====
pollutant_path = fr"D:\IPMA\CAMS\chem_singlvl\daily_{pollutant_name}_stats.nc"
fire_path = fr"D:\IPMA\FRP\fire_labels_by_region\fire_data_{country}.nc"
mask_path = fr"D:\IPMA\Countries\{country}_mask.nc"
output_path = fr"D:\IPMA\CAMS\{pollutant_name}_fire_{country}.nc"

# ==== Load datasets ====
ds_pollutant = xr.open_dataset(pollutant_path)
ds_fire = xr.open_dataset(fire_path)
mask_ds = xr.open_dataset(mask_path)

# ==== Load mask variable ====
mask = mask_ds["mask"]  # change name here if different

# ==== Extract Year, Month, Day ====
years = ds_pollutant['Year'].values
months = ds_pollutant['Month'].values
days = ds_pollutant['Day'].values

# ==== Create all combinations ====
ymd = pd.MultiIndex.from_product(
    [years, months, days],
    names=['year', 'month', 'day']
).to_frame(index=False)

# ==== Remove invalid dates ====
def is_valid_date(row):
    try:
        pd.Timestamp(year=int(row['year']), month=int(row['month']), day=int(row['day']))
        return True
    except ValueError:
        return False

valid_mask = ymd.apply(is_valid_date, axis=1)
ymd_valid = ymd[valid_mask].reset_index(drop=True)

# ==== Convert valid dates to datetime ====
time_index = pd.to_datetime(ymd_valid)

# ==== Print number of valid pollutant days ====
print(f"✅ Total valid {pollutant_name.upper()} days: {len(time_index)}")

# ==== Flatten pollutant data ====
ds_pollutant_stack = ds_pollutant.stack(time=('Year', 'Month', 'Day'))

# Keep only valid dates
ds_pollutant_stack = ds_pollutant_stack.isel(time=valid_mask.values)

# Assign new datetime coordinate
ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)

# ==== Select overlapping time ====
start_time = max(ds_pollutant_stack.time.values[0], ds_fire.time.values[0])
end_time = min(ds_pollutant_stack.time.values[-1], ds_fire.time.values[-1])

ds_pollutant_sel = ds_pollutant_stack.sel(time=slice(start_time, end_time))
ds_fire_sel = ds_fire.sel(time=slice(start_time, end_time))

# ==== Print number of overlapping valid days ====
print(f"✅ Overlapping valid days ({pollutant_name.upper()} + fire): {len(ds_pollutant_sel.time)}")
print(f"   Overlap span: {pd.to_datetime(ds_pollutant_sel.time.values[0]).date()} → {pd.to_datetime(ds_pollutant_sel.time.values[-1]).date()}")

# ==== Check lat/lon match ====
assert np.allclose(ds_pollutant_sel.latitude.values, ds_fire_sel.latitude.values), "Latitude mismatch"
assert np.allclose(ds_pollutant_sel.longitude.values, ds_fire_sel.longitude.values), "Longitude mismatch"
assert np.allclose(ds_pollutant_sel.latitude.values, mask.latitude.values), "Latitude mismatch with mask"
assert np.allclose(ds_pollutant_sel.longitude.values, mask.longitude.values), "Longitude mismatch with mask"

# ==== Apply mask to both datasets ====
ds_pollutant_masked = ds_pollutant_sel.where(mask == 1)
ds_fire_masked = ds_fire_sel.where(mask == 1)

# ==== Merge datasets ====
ds_merged = xr.merge([ds_pollutant_masked, ds_fire_masked])

# ==== Save ====
os.makedirs(os.path.dirname(output_path), exist_ok=True)
ds_merged.to_netcdf(output_path)

print(f"\n🎉 Merged dataset saved to: {output_path}")


✅ Total valid PM2P5 days: 8036


  ds_pollutant_stack = ds_pollutant_stack.assign_coords(time=time_index)


✅ Overlapping valid days (PM2P5 + fire): 8036
   Overlap span: 2003-01-01 → 2024-12-31

🎉 Merged dataset saved to: D:\IPMA\CAMS\pm2p5_fire_Spain.nc
