mean +- standard error of mean of pollutants on day without fire, with fire and up to 5 days after fire outbreak


In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import warnings

# Suppress RuntimeWarnings for invalid SEM calculations
warnings.filterwarnings("ignore", category=RuntimeWarning)

# Dictionary of pollutants and their NetCDF file paths
pollutant_files = {
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Portugal.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Portugal.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Portugal.nc",
    "CO": r"D:\IPMA\CAMS\co_fire_Portugal.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Portugal.nc"
}

# Human-readable labels
label_names = {
    0: "No fire",
    1: "Day 0 (Fire outbreak)",
    2: "Day 1",
    3: "Day 2",
    4: "Day 3",
    5: "Day 4",
    6: "Day 5",
}

# Initialize results dictionary
results = { "Label": list(label_names.values()) }

# Loop over pollutants
for pollutant, filepath in pollutant_files.items():
    ds = xr.open_dataset(filepath)

    data = ds['Mean']  # pollutant values
    labels = ds['fire_label_Portugal'].transpose('latitude', 'longitude', 'time')

    mean_list, sem_list = [], []

    # Loop over fire labels
    for label in label_names.keys():
        mask = labels == label
        masked_data = data.where(mask)

        # Stats across time per grid cell
        mean = masked_data.mean(dim='time', skipna=True)
        std = masked_data.std(dim='time', skipna=True)
        count = masked_data.count(dim='time')
        sem = std / np.sqrt(count)

        # Spatial average
        mean_val = mean.mean(skipna=True).item()
        sem_val = sem.mean(skipna=True).item()

        # Convert CO mg/m³ → µg/m³
        if pollutant == "CO":
            mean_val *= 1000
            sem_val *= 1000    

        mean_list.append(mean_val)
        sem_list.append(sem_val)

    # Add columns for this pollutant
    results[f"{pollutant} Mean (µg/m³)"] = mean_list
    results[f"{pollutant} SEM (µg/m³)"] = sem_list

# Convert to DataFrame for pretty table
df = pd.DataFrame(results)

# Print the table
print("\nPollutant concentrations by fire label (spatial average across Portugal):")
print(df.to_string(index=False))


In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import warnings

# Suppress RuntimeWarnings for invalid SEM calculations
warnings.filterwarnings("ignore", category=RuntimeWarning)

# Dictionary of pollutants and their NetCDF file paths
pollutant_files = {
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Italy.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Italy.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Italy.nc",
    "CO": r"D:\IPMA\CAMS\co_fire_Italy.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Italy.nc"
}

# Human-readable labels
label_names = {
    0: "No fire",
    1: "Day 0 (Fire outbreak)",
    2: "Day 1",
    3: "Day 2",
    4: "Day 3",
    5: "Day 4",
    6: "Day 5",
}

# Initialize results dictionary
results = { "Label": list(label_names.values()) }

# Loop over pollutants
for pollutant, filepath in pollutant_files.items():
    ds = xr.open_dataset(filepath)

    data = ds['Mean']  # pollutant values
    labels = ds['fire_label_Italy'].transpose('latitude', 'longitude', 'time')

    mean_list, sem_list = [], []

    # Loop over fire labels
    for label in label_names.keys():
        mask = labels == label
        masked_data = data.where(mask)

        # Stats across time per grid cell
        mean = masked_data.mean(dim='time', skipna=True)
        std = masked_data.std(dim='time', skipna=True)
        count = masked_data.count(dim='time')
        sem = std / np.sqrt(count)

        # Spatial average
        mean_val = mean.mean(skipna=True).item()
        sem_val = sem.mean(skipna=True).item()

        # Convert CO mg/m³ → µg/m³
        if pollutant == "CO":
            mean_val *= 1000
            sem_val *= 1000

        mean_list.append(mean_val)
        sem_list.append(sem_val)

    # Add columns for this pollutant
    results[f"{pollutant} Mean (µg/m³)"] = mean_list
    results[f"{pollutant} SEM (µg/m³)"] = sem_list

# Convert to DataFrame for pretty table
df = pd.DataFrame(results)

# Print the table
print("\nPollutant concentrations by fire label (spatial average across Italy):")
print(df.to_string(index=False))


In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import warnings

# Suppress RuntimeWarnings for invalid SEM calculations
warnings.filterwarnings("ignore", category=RuntimeWarning)

# Dictionary of pollutants and their NetCDF file paths
pollutant_files = {
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Spain.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Spain.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Spain.nc",
    "CO": r"D:\IPMA\CAMS\co_fire_Spain.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Spain.nc"
}

# Human-readable labels
label_names = {
    0: "No fire",
    1: "Day 0 (Fire outbreak)",
    2: "Day 1",
    3: "Day 2",
    4: "Day 3",
    5: "Day 4",
    6: "Day 5",
}

# Initialize results dictionary
results = { "Label": list(label_names.values()) }

# Loop over pollutants
for pollutant, filepath in pollutant_files.items():
    ds = xr.open_dataset(filepath)

    data = ds['Mean']  # pollutant values
    labels = ds['fire_label_Spain'].transpose('latitude', 'longitude', 'time')

    mean_list, sem_list = [], []

    # Loop over fire labels
    for label in label_names.keys():
        mask = labels == label
        masked_data = data.where(mask)

        # Stats across time per grid cell
        mean = masked_data.mean(dim='time', skipna=True)
        std = masked_data.std(dim='time', skipna=True)
        count = masked_data.count(dim='time')
        sem = std / np.sqrt(count)

        # Spatial average
        mean_val = mean.mean(skipna=True).item()
        sem_val = sem.mean(skipna=True).item()

        # Convert CO mg/m³ → µg/m³
        if pollutant == "CO":
            mean_val *= 1000
            sem_val *= 1000

        mean_list.append(mean_val)
        sem_list.append(sem_val)

    # Add columns for this pollutant
    results[f"{pollutant} Mean (µg/m³)"] = mean_list
    results[f"{pollutant} SEM (µg/m³)"] = sem_list

# Convert to DataFrame for pretty table
df = pd.DataFrame(results)

# Print the table
print("\nPollutant concentrations by fire label (spatial average across Spain):")
print(df.to_string(index=False))


In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import warnings

# Suppress RuntimeWarnings for invalid SEM calculations
warnings.filterwarnings("ignore", category=RuntimeWarning)

# Dictionary of pollutants and their NetCDF file paths
pollutant_files = {
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Greece.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Greece.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Greece.nc",
    "CO": r"D:\IPMA\CAMS\co_fire_Greece.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Greece.nc"
}

# Human-readable labels
label_names = {
    0: "No fire",
    1: "Day 0 (Fire outbreak)",
    2: "Day 1",
    3: "Day 2",
    4: "Day 3",
    5: "Day 4",
    6: "Day 5",
}

# Initialize results dictionary
results = { "Label": list(label_names.values()) }

# Loop over pollutants
for pollutant, filepath in pollutant_files.items():
    ds = xr.open_dataset(filepath)

    data = ds['Mean']  # pollutant values
    labels = ds['fire_label_Greece'].transpose('latitude', 'longitude', 'time')

    mean_list, sem_list = [], []

    # Loop over fire labels
    for label in label_names.keys():
        mask = labels == label
        masked_data = data.where(mask)

        # Stats across time per grid cell
        mean = masked_data.mean(dim='time', skipna=True)
        std = masked_data.std(dim='time', skipna=True)
        count = masked_data.count(dim='time')
        sem = std / np.sqrt(count)

        # Spatial average
        mean_val = mean.mean(skipna=True).item()
        sem_val = sem.mean(skipna=True).item()

        # Convert CO mg/m³ → µg/m³
        if pollutant == "CO":
            mean_val *= 1000
            sem_val *= 1000

        mean_list.append(mean_val)
        sem_list.append(sem_val)

    # Add columns for this pollutant
    results[f"{pollutant} Mean (µg/m³)"] = mean_list
    results[f"{pollutant} SEM (µg/m³)"] = sem_list

# Convert to DataFrame for pretty table
df = pd.DataFrame(results)

# Print the table
print("\nPollutant concentrations by fire label (spatial average across Greece):")
print(df.to_string(index=False))


Divide daily concentrations of pollutants into quartiles Q1 (lowest), Q2, Q3 and Q4 (highest) for days when fires occurred and days they did not - Assess the impact of fire events on the concentration of air pollutants. Calculation of percentage of days (non-wildfire and wildfire) in each of the four qaurtiles

For fire days and non-fire days separately, it takes the distribution of pollutant concentrations.

Then it splits that distribution into 4 equal-sized groups (quartiles):

Q1: values ≤ 25th percentile

Q2: between 25th–50th percentile

Q3: between 50th–75th percentile

Q4: ≥ 75th percentile

In [None]:
import xarray as xr
import pandas as pd

# --- List of pollutant files (example with CO and PM2.5) ---
files = {
    "CO": r"D:\IPMA\CAMS\co_fire_Portugal.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Portugal.nc",
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Portugal.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Portugal.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Portugal.nc"
}

all_results = []

for pol_name, file_path in files.items():
    ds = xr.open_dataset(file_path)

    pollutant = ds["Mean"]  # adjust if different variable name
    fire_flag = ds["fire_binary_Portugal"]

    # Convert to DataFrame
    df = pollutant.to_dataframe(name="pollutant").reset_index()
    df["fire"] = fire_flag.to_dataframe(name="fire").reset_index(drop=True)

    # Drop NaNs before analysis
    df = df.dropna(subset=["pollutant", "fire"])

    # Split into fire / no-fire
    df_fire = df[df["fire"] == 1].copy()
    df_nofire = df[df["fire"] == 0].copy()

    # Quartiles within each group
    df_fire["quartile"] = pd.qcut(df_fire["pollutant"], q=4, labels=["Q1","Q2","Q3","Q4"])
    df_nofire["quartile"] = pd.qcut(df_nofire["pollutant"], q=4, labels=["Q1","Q2","Q3","Q4"])

    df_quartiles = pd.concat([df_fire, df_nofire])

    # Summary: counts
    summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)

    # Summary: percentages
    summary_pct = summary_counts.div(summary_counts.sum(axis=1), axis=0) * 100

    # Add pollutant name for identification
    summary_counts["pollutant"] = pol_name
    summary_pct["pollutant"] = pol_name

    all_results.append((summary_counts, summary_pct))

# --- Combine all pollutants ---
counts_table = pd.concat([c for c, p in all_results], axis=0).reset_index()
percentages_table = pd.concat([p for c, p in all_results], axis=0).reset_index()

print("Counts table:")
print(counts_table)

print("\nPercentages table:")
print(percentages_table)

  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)
  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)
  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)
  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)


Counts table:
quartile  fire   Q1   Q2   Q3   Q4 pollutant
0          0.0  385  384  384  384        CO
1          1.0   36   36   36   36        CO
2          0.0  385  384  384  384     PM2.5
3          1.0   36   36   36   36     PM2.5
4          0.0  385  384  384  384      PM10
5          1.0   36   36   36   36      PM10
6          0.0  385  384  384  384       NO2
7          1.0   36   36   36   36       NO2
8          0.0  385  384  384  384        NO
9          1.0   36   36   36   36        NO

Percentages table:
quartile  fire         Q1         Q2         Q3         Q4 pollutant
0          0.0  25.048796  24.983735  24.983735  24.983735        CO
1          1.0  25.000000  25.000000  25.000000  25.000000        CO
2          0.0  25.048796  24.983735  24.983735  24.983735     PM2.5
3          1.0  25.000000  25.000000  25.000000  25.000000     PM2.5
4          0.0  25.048796  24.983735  24.983735  24.983735      PM10
5          1.0  25.000000  25.000000  25.000000  25.00000

  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)


In [11]:
import xarray as xr
import pandas as pd

# --- List of pollutant files (example with CO and PM2.5) ---
files = {
    "CO": r"D:\IPMA\CAMS\co_fire_Italy.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Italy.nc",
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Italy.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Italy.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Italy.nc"
}

all_results = []

for pol_name, file_path in files.items():
    ds = xr.open_dataset(file_path)

    pollutant = ds["Mean"]  # adjust if different variable name
    fire_flag = ds["fire_binary_Italy"]

    # Convert to DataFrame
    df = pollutant.to_dataframe(name="pollutant").reset_index()
    df["fire"] = fire_flag.to_dataframe(name="fire").reset_index(drop=True)

    # Drop NaNs before analysis
    df = df.dropna(subset=["pollutant", "fire"])

    # Split into fire / no-fire
    df_fire = df[df["fire"] == 1].copy()
    df_nofire = df[df["fire"] == 0].copy()

    # Quartiles within each group
    df_fire["quartile"] = pd.qcut(df_fire["pollutant"], q=4, labels=["Q1","Q2","Q3","Q4"])
    df_nofire["quartile"] = pd.qcut(df_nofire["pollutant"], q=4, labels=["Q1","Q2","Q3","Q4"])

    df_quartiles = pd.concat([df_fire, df_nofire])

    # Summary: counts
    summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)

    # Summary: percentages
    summary_pct = summary_counts.div(summary_counts.sum(axis=1), axis=0) * 100

    # Add pollutant name for identification
    summary_counts["pollutant"] = pol_name
    summary_pct["pollutant"] = pol_name

    all_results.append((summary_counts, summary_pct))

# --- Combine all pollutants ---
counts_table = pd.concat([c for c, p in all_results], axis=0).reset_index()
percentages_table = pd.concat([p for c, p in all_results], axis=0).reset_index()

print("Counts table:")
print(counts_table)

print("\nPercentages table:")
print(percentages_table)

df_quartiles

  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)
  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)
  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)
  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)


Counts table:
quartile  fire     Q1     Q2     Q3     Q4 pollutant
0          0.0  10282  10282  10281  10282        CO
1          1.0      6      6      6      6        CO
2          0.0  10282  10282  10281  10282     PM2.5
3          1.0      6      6      6      6     PM2.5
4          0.0  10282  10281  10281  10282      PM10
5          1.0      6      6      6      6      PM10
6          0.0  10282  10282  10281  10282       NO2
7          1.0      6      6      6      6       NO2
8          0.0  10277  10277  10277  10277        NO
9          1.0      6      6      6      6        NO

Percentages table:
quartile  fire         Q1         Q2         Q3         Q4 pollutant
0          0.0  25.000608  25.000608  24.998176  25.000608        CO
1          1.0  25.000000  25.000000  25.000000  25.000000        CO
2          0.0  25.000608  25.000608  24.998176  25.000608     PM2.5
3          1.0  25.000000  25.000000  25.000000  25.000000     PM2.5
4          0.0  25.001216  24.998784  

  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)


Unnamed: 0,latitude,longitude,time,pollutant,fire,quartile
783147,35.25,12.00,2013-01-03,0.011900,1.0,Q1
791598,35.25,12.75,2014-02-22,0.032564,1.0,Q1
2884858,38.25,12.75,2024-10-27,0.072328,1.0,Q1
3360466,39.00,9.00,2006-11-19,0.086161,1.0,Q1
3362877,39.00,9.00,2013-06-26,0.278755,1.0,Q2
...,...,...,...,...,...,...
9143518,47.25,12.00,2021-01-12,3.720558,0.0,Q4
9143519,47.25,12.00,2021-01-13,1.138191,0.0,Q3
9143580,47.25,12.00,2021-03-15,0.275044,0.0,Q2
9143581,47.25,12.00,2021-03-16,0.412056,0.0,Q3


In [12]:
import xarray as xr
import pandas as pd

# --- List of pollutant files (example with CO and PM2.5) ---
files = {
    "CO": r"D:\IPMA\CAMS\co_fire_Spain.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Spain.nc",
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Spain.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Spain.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Spain.nc"
}

all_results = []

for pol_name, file_path in files.items():
    ds = xr.open_dataset(file_path)

    pollutant = ds["Mean"]  # adjust if different variable name
    fire_flag = ds["fire_binary_Spain"]

    # Convert to DataFrame
    df = pollutant.to_dataframe(name="pollutant").reset_index()
    df["fire"] = fire_flag.to_dataframe(name="fire").reset_index(drop=True)

    # Drop NaNs before analysis
    df = df.dropna(subset=["pollutant", "fire"])

    # Split into fire / no-fire
    df_fire = df[df["fire"] == 1].copy()
    df_nofire = df[df["fire"] == 0].copy()

    # Quartiles within each group
    df_fire["quartile"] = pd.qcut(df_fire["pollutant"], q=4, labels=["Q1","Q2","Q3","Q4"])
    df_nofire["quartile"] = pd.qcut(df_nofire["pollutant"], q=4, labels=["Q1","Q2","Q3","Q4"])

    df_quartiles = pd.concat([df_fire, df_nofire])

    # Summary: counts
    summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)

    # Summary: percentages
    summary_pct = summary_counts.div(summary_counts.sum(axis=1), axis=0) * 100

    # Add pollutant name for identification
    summary_counts["pollutant"] = pol_name
    summary_pct["pollutant"] = pol_name

    all_results.append((summary_counts, summary_pct))

# --- Combine all pollutants ---
counts_table = pd.concat([c for c, p in all_results], axis=0).reset_index()
percentages_table = pd.concat([p for c, p in all_results], axis=0).reset_index()

print("Counts table:")
print(counts_table)

print("\nPercentages table:")
print(percentages_table)


  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)
  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)
  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)
  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)


Counts table:
quartile  fire     Q1     Q2     Q3     Q4 pollutant
0          0.0  11124  11123  11123  11123        CO
1          1.0     88     87     87     87        CO
2          0.0  11124  11123  11123  11123     PM2.5
3          1.0     88     87     87     87     PM2.5
4          0.0  11124  11123  11123  11123      PM10
5          1.0     88     87     87     87      PM10
6          0.0  11124  11123  11123  11123       NO2
7          1.0     88     87     87     87       NO2
8          0.0  11123  11122  11122  11123        NO
9          1.0     88     87     87     87        NO

Percentages table:
quartile  fire         Q1         Q2         Q3         Q4 pollutant
0          0.0  25.001686  24.999438  24.999438  24.999438        CO
1          1.0  25.214900  24.928367  24.928367  24.928367        CO
2          0.0  25.001686  24.999438  24.999438  24.999438     PM2.5
3          1.0  25.214900  24.928367  24.928367  24.928367     PM2.5
4          0.0  25.001686  24.999438  

  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)


In [13]:
import xarray as xr
import pandas as pd

# --- List of pollutant files (example with CO and PM2.5) ---
files = {
    "CO": r"D:\IPMA\CAMS\co_fire_Greece.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Greece.nc",
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Greece.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Greece.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Greece.nc"
}

all_results = []

for pol_name, file_path in files.items():
    ds = xr.open_dataset(file_path)

    pollutant = ds["Mean"]  # adjust if different variable name
    fire_flag = ds["fire_binary_Greece"]

    # Convert to DataFrame
    df = pollutant.to_dataframe(name="pollutant").reset_index()
    df["fire"] = fire_flag.to_dataframe(name="fire").reset_index(drop=True)

    # Drop NaNs before analysis
    df = df.dropna(subset=["pollutant", "fire"])

    # Split into fire / no-fire
    df_fire = df[df["fire"] == 1].copy()
    df_nofire = df[df["fire"] == 0].copy()

    # Quartiles within each group
    df_fire["quartile"] = pd.qcut(df_fire["pollutant"], q=4, labels=["Q1","Q2","Q3","Q4"])
    df_nofire["quartile"] = pd.qcut(df_nofire["pollutant"], q=4, labels=["Q1","Q2","Q3","Q4"])

    df_quartiles = pd.concat([df_fire, df_nofire])

    # Summary: counts
    summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)

    # Summary: percentages
    summary_pct = summary_counts.div(summary_counts.sum(axis=1), axis=0) * 100

    # Add pollutant name for identification
    summary_counts["pollutant"] = pol_name
    summary_pct["pollutant"] = pol_name

    all_results.append((summary_counts, summary_pct))

# --- Combine all pollutants ---
counts_table = pd.concat([c for c, p in all_results], axis=0).reset_index()
percentages_table = pd.concat([p for c, p in all_results], axis=0).reset_index()

print("Counts table:")
print(counts_table)

print("\nPercentages table:")
print(percentages_table)


  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)
  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)
  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)
  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)


Counts table:
quartile  fire    Q1    Q2    Q3    Q4 pollutant
0          0.0  4955  4954  4954  4954        CO
1          1.0    15    14    14    15        CO
2          0.0  4955  4954  4954  4954     PM2.5
3          1.0    15    14    14    15     PM2.5
4          0.0  4953  4952  4952  4952      PM10
5          1.0    15    14    14    15      PM10
6          0.0  4955  4954  4954  4954       NO2
7          1.0    15    14    14    15       NO2
8          0.0  4953  4953  4953  4953        NO
9          1.0    15    14    14    15        NO

Percentages table:
quartile  fire         Q1         Q2         Q3         Q4 pollutant
0          0.0  25.003785  24.998738  24.998738  24.998738        CO
1          1.0  25.862069  24.137931  24.137931  25.862069        CO
2          0.0  25.003785  24.998738  24.998738  24.998738     PM2.5
3          1.0  25.862069  24.137931  24.137931  25.862069     PM2.5
4          0.0  25.003786  24.998738  24.998738  24.998738      PM10
5          1.

  summary_counts = df_quartiles.groupby(["fire", "quartile"]).size().unstack(fill_value=0)


Pearson correlation performed between meteorological variables and air pollutants, for all days and every label established

Wind speed

In [12]:
import xarray as xr
import pandas as pd
from scipy.stats import pearsonr, spearmanr

# --------------------
# 1. Pollutant files
# --------------------
pollutant_files = {
    "CO": r"D:\IPMA\CAMS\co_fire_Portugal.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Portugal.nc",
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Portugal.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Portugal.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Portugal.nc"
}

pollutant_series = {}
fire_ds = None  # store the first dataset for fire labels

for name, file in pollutant_files.items():
    ds = xr.open_dataset(file)
    
    # Daily spatial mean (skip NaNs)
    ts = ds["Mean"].mean(dim=["latitude", "longitude"], skipna=True).to_series()
    pollutant_series[name] = ts
    
    if fire_ds is None:
        fire_ds = ds  # keep dataset with fire labels

# --------------------
# 2. Wind dataset
# --------------------
wind_file = r"D:\IPMA\ERA5\UV_wind\daily_wind_speed_stats_regrid.nc"
ds_wind = xr.open_dataset(wind_file)

# Build datetime index
years = ds_wind["Year"].values
months = ds_wind["Month"].values
days = ds_wind["Day"].values

time_index = pd.to_datetime(
    [f"{y}-{m:02d}-{d:02d}" for y in years for m in months for d in days],
    errors="coerce"
).dropna()

# Stack Year, Month, Day into a single dimension
ds_wind = ds_wind.stack(date=("Year","Month","Day"))
ds_wind = ds_wind.assign_coords(time=("date", time_index))
ds_wind = ds_wind.swap_dims({"date":"time"}).drop_vars("date")

# Daily spatial mean wind speed
wind_ts = ds_wind["Mean"].mean(dim=["latitude","longitude"], skipna=True).to_series()
wind_ts.name = "WindSpeed"

# --------------------
# 3. Prepare FireLabel masks for each label (0-6)
# --------------------
fire_labels = {}
for label in range(0,7):
    mask = fire_ds["fire_label_Portugal"] == label
    # True if any grid cell has this label
    daily_label_present = mask.any(dim=["latitude","longitude"])
    fire_labels[label] = daily_label_present.to_series()

# --------------------
# 4. Correlation function
# --------------------
def correlation_with_pvalues(df, pollutants):
    results = []
    for pol in pollutants:
        x = df[pol]
        y = df["WindSpeed"]
        if len(df) < 2 or x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": "All",
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

def correlation_by_label(df, label_name, mask, pollutants):
    results = []
    subset = df[mask]
    if len(subset) < 2:
        return results
    for pol in pollutants:
        x = subset[pol]
        y = subset["WindSpeed"]
        if x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": label_name,
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

# --------------------
# 5. Build full DataFrame (pollutants + wind)
# --------------------
df_all = pd.concat(list(pollutant_series.values()) + [wind_ts], axis=1,
                   keys=list(pollutant_series.keys()) + ["WindSpeed"])
df_all = df_all.dropna()

pollutant_names = list(pollutant_series.keys())
all_results = []

# Overall correlation
all_results.extend(correlation_with_pvalues(df_all, pollutant_names))

# Correlation by fire label
for label, mask in fire_labels.items():
    # Align mask with df_all
    mask_aligned = mask.reindex(df_all.index, fill_value=False)
    all_results.extend(correlation_by_label(df_all, label, mask_aligned, pollutant_names))

# --------------------
# 6. Print results
# --------------------
results_df = pd.DataFrame(all_results)
results_df.to_csv(r"D:\IPMA\CAMS\pollutant_wind_correlations_by_fire_label_Portugal.csv", index=False)
print("Correlation results saved to CSV.")
print(results_df.to_string(index=False))


Correlation results saved to CSV.
FireLabel Pollutant  Pearson_r  Pearson_p  Spearman_r  Spearman_p
      All        CO  -0.001334   0.972256   -0.011834    0.757715
      All     PM2.5   0.018980   0.620735   -0.001947    0.959528
      All      PM10   0.017186   0.654134   -0.002630    0.945339
      All       NO2  -0.055044   0.151022   -0.046456    0.225656
      All        NO   0.040753   0.287889    0.005239    0.891370
        0        CO  -0.001334   0.972256   -0.011834    0.757715
        0     PM2.5   0.018980   0.620735   -0.001947    0.959528
        0      PM10   0.017186   0.654134   -0.002630    0.945339
        0       NO2  -0.055044   0.151022   -0.046456    0.225656
        0        NO   0.040753   0.287889    0.005239    0.891370
        1        CO   0.049354   0.765419    0.107692    0.514041
        1     PM2.5  -0.064601   0.696006   -0.073684    0.655747
        1      PM10  -0.046602   0.778163   -0.024089    0.884267
        1       NO2   0.074260   0.653223 

In [15]:
import xarray as xr
import pandas as pd
from scipy.stats import pearsonr, spearmanr

# --------------------
# 1. Pollutant files
# --------------------
pollutant_files = {
    "CO": r"D:\IPMA\CAMS\co_fire_Italy.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Italy.nc",
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Italy.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Italy.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Italy.nc"
}

pollutant_series = {}
fire_ds = None  # store the first dataset for fire labels

for name, file in pollutant_files.items():
    ds = xr.open_dataset(file)
    
    # Daily spatial mean (skip NaNs)
    ts = ds["Mean"].mean(dim=["latitude", "longitude"], skipna=True).to_series()
    pollutant_series[name] = ts
    
    if fire_ds is None:
        fire_ds = ds  # keep dataset with fire labels

# --------------------
# 2. Wind dataset
# --------------------
wind_file = r"D:\IPMA\ERA5\UV_wind\daily_wind_speed_stats_regrid.nc"
ds_wind = xr.open_dataset(wind_file)

# Build datetime index
years = ds_wind["Year"].values
months = ds_wind["Month"].values
days = ds_wind["Day"].values

time_index = pd.to_datetime(
    [f"{y}-{m:02d}-{d:02d}" for y in years for m in months for d in days],
    errors="coerce"
).dropna()

# Stack Year, Month, Day into a single dimension
ds_wind = ds_wind.stack(date=("Year","Month","Day"))
ds_wind = ds_wind.assign_coords(time=("date", time_index))
ds_wind = ds_wind.swap_dims({"date":"time"}).drop_vars("date")

# Daily spatial mean wind speed
wind_ts = ds_wind["Mean"].mean(dim=["latitude","longitude"], skipna=True).to_series()
wind_ts.name = "WindSpeed"

# --------------------
# 3. Prepare FireLabel masks for each label (0-6)
# --------------------
fire_labels = {}
for label in range(0,7):
    mask = fire_ds["fire_label_Italy"] == label
    # True if any grid cell has this label
    daily_label_present = mask.any(dim=["latitude","longitude"])
    fire_labels[label] = daily_label_present.to_series()

# --------------------
# 4. Correlation function
# --------------------
def correlation_with_pvalues(df, pollutants):
    results = []
    for pol in pollutants:
        x = df[pol]
        y = df["WindSpeed"]
        if len(df) < 2 or x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": "All",
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

def correlation_by_label(df, label_name, mask, pollutants):
    results = []
    subset = df[mask]
    if len(subset) < 2:
        return results
    for pol in pollutants:
        x = subset[pol]
        y = subset["WindSpeed"]
        if x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": label_name,
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

# --------------------
# 5. Build full DataFrame (pollutants + wind)
# --------------------
df_all = pd.concat(list(pollutant_series.values()) + [wind_ts], axis=1,
                   keys=list(pollutant_series.keys()) + ["WindSpeed"])
df_all = df_all.dropna()

pollutant_names = list(pollutant_series.keys())
all_results = []

# Overall correlation
all_results.extend(correlation_with_pvalues(df_all, pollutant_names))

# Correlation by fire label
for label, mask in fire_labels.items():
    # Align mask with df_all
    mask_aligned = mask.reindex(df_all.index, fill_value=False)
    all_results.extend(correlation_by_label(df_all, label, mask_aligned, pollutant_names))

# --------------------
# 6. Print results
# --------------------
results_df = pd.DataFrame(all_results)
results_df.to_csv(r"D:\IPMA\CAMS\pollutant_wind_correlations_by_fire_label_Italy.csv", index=False)
print("Correlation results saved to CSV.")
print(results_df.to_string(index=False))


Correlation results saved to CSV.
FireLabel Pollutant  Pearson_r  Pearson_p  Spearman_r  Spearman_p
      All        CO  -0.098295   0.010214   -0.065375    0.088014
      All     PM2.5  -0.092817   0.015321   -0.065588    0.086980
      All      PM10  -0.091373   0.016994   -0.064666    0.091520
      All       NO2  -0.064281   0.093474   -0.038677    0.313176
      All        NO  -0.053713   0.161162   -0.036090    0.346673
        0        CO  -0.098295   0.010214   -0.065375    0.088014
        0     PM2.5  -0.092817   0.015321   -0.065588    0.086980
        0      PM10  -0.091373   0.016994   -0.064666    0.091520
        0       NO2  -0.064281   0.093474   -0.038677    0.313176
        0        NO  -0.053713   0.161162   -0.036090    0.346673
        1        CO  -0.272821   0.160133   -0.220580    0.259342
        1     PM2.5  -0.154495   0.432473   -0.135194    0.492761
        1      PM10  -0.137753   0.484533   -0.109469    0.579225
        1       NO2  -0.079261   0.688479 

In [14]:
import xarray as xr
import pandas as pd
from scipy.stats import pearsonr, spearmanr

# --------------------
# 1. Pollutant files
# --------------------
pollutant_files = {
    "CO": r"D:\IPMA\CAMS\co_fire_Spain.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Spain.nc",
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Spain.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Spain.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Spain.nc"
}

pollutant_series = {}
fire_ds = None  # store the first dataset for fire labels

for name, file in pollutant_files.items():
    ds = xr.open_dataset(file)
    
    # Daily spatial mean (skip NaNs)
    ts = ds["Mean"].mean(dim=["latitude", "longitude"], skipna=True).to_series()
    pollutant_series[name] = ts
    
    if fire_ds is None:
        fire_ds = ds  # keep dataset with fire labels

# --------------------
# 2. Wind dataset
# --------------------
wind_file = r"D:\IPMA\ERA5\UV_wind\daily_wind_speed_stats_regrid.nc"
ds_wind = xr.open_dataset(wind_file)

# Build datetime index
years = ds_wind["Year"].values
months = ds_wind["Month"].values
days = ds_wind["Day"].values

time_index = pd.to_datetime(
    [f"{y}-{m:02d}-{d:02d}" for y in years for m in months for d in days],
    errors="coerce"
).dropna()

# Stack Year, Month, Day into a single dimension
ds_wind = ds_wind.stack(date=("Year","Month","Day"))
ds_wind = ds_wind.assign_coords(time=("date", time_index))
ds_wind = ds_wind.swap_dims({"date":"time"}).drop_vars("date")

# Daily spatial mean wind speed
wind_ts = ds_wind["Mean"].mean(dim=["latitude","longitude"], skipna=True).to_series()
wind_ts.name = "WindSpeed"

# --------------------
# 3. Prepare FireLabel masks for each label (0-6)
# --------------------
fire_labels = {}
for label in range(0,7):
    mask = fire_ds["fire_label_Spain"] == label
    # True if any grid cell has this label
    daily_label_present = mask.any(dim=["latitude","longitude"])
    fire_labels[label] = daily_label_present.to_series()

# --------------------
# 4. Correlation function
# --------------------
def correlation_with_pvalues(df, pollutants):
    results = []
    for pol in pollutants:
        x = df[pol]
        y = df["WindSpeed"]
        if len(df) < 2 or x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": "All",
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

def correlation_by_label(df, label_name, mask, pollutants):
    results = []
    subset = df[mask]
    if len(subset) < 2:
        return results
    for pol in pollutants:
        x = subset[pol]
        y = subset["WindSpeed"]
        if x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": label_name,
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

# --------------------
# 5. Build full DataFrame (pollutants + wind)
# --------------------
df_all = pd.concat(list(pollutant_series.values()) + [wind_ts], axis=1,
                   keys=list(pollutant_series.keys()) + ["WindSpeed"])
df_all = df_all.dropna()

pollutant_names = list(pollutant_series.keys())
all_results = []

# Overall correlation
all_results.extend(correlation_with_pvalues(df_all, pollutant_names))

# Correlation by fire label
for label, mask in fire_labels.items():
    # Align mask with df_all
    mask_aligned = mask.reindex(df_all.index, fill_value=False)
    all_results.extend(correlation_by_label(df_all, label, mask_aligned, pollutant_names))

# --------------------
# 6. Print results
# --------------------
results_df = pd.DataFrame(all_results)
results_df.to_csv(r"D:\IPMA\CAMS\pollutant_wind_correlations_by_fire_label_Spain.csv", index=False)
print("Correlation results saved to CSV.")
print(results_df.to_string(index=False))


Correlation results saved to CSV.
FireLabel Pollutant  Pearson_r  Pearson_p  Spearman_r  Spearman_p
      All        CO  -0.009730   0.799782   -0.013413    0.726593
      All     PM2.5   0.031709   0.408362   -0.005673    0.882430
      All      PM10   0.028359   0.459670   -0.005176    0.892681
      All       NO2   0.006745   0.860432    0.008857    0.817409
      All        NO   0.059052   0.123395    0.027330    0.476117
        0        CO  -0.009730   0.799782   -0.013413    0.726593
        0     PM2.5   0.031709   0.408362   -0.005673    0.882430
        0      PM10   0.028359   0.459670   -0.005176    0.892681
        0       NO2   0.006745   0.860432    0.008857    0.817409
        0        NO   0.059052   0.123395    0.027330    0.476117
        1        CO   0.024180   0.803826    0.021569    0.824652
        1     PM2.5  -0.151559   0.117401   -0.150333    0.120431
        1      PM10  -0.171136   0.076579   -0.179161    0.063557
        1       NO2   0.060803   0.531898 

In [16]:
import xarray as xr
import pandas as pd
from scipy.stats import pearsonr, spearmanr

# --------------------
# 1. Pollutant files
# --------------------
pollutant_files = {
    "CO": r"D:\IPMA\CAMS\co_fire_Greece.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Greece.nc",
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Greece.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Greece.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Greece.nc"
}

pollutant_series = {}
fire_ds = None  # store the first dataset for fire labels

for name, file in pollutant_files.items():
    ds = xr.open_dataset(file)
    
    # Daily spatial mean (skip NaNs)
    ts = ds["Mean"].mean(dim=["latitude", "longitude"], skipna=True).to_series()
    pollutant_series[name] = ts
    
    if fire_ds is None:
        fire_ds = ds  # keep dataset with fire labels

# --------------------
# 2. Wind dataset
# --------------------
wind_file = r"D:\IPMA\ERA5\UV_wind\daily_wind_speed_stats_regrid.nc"
ds_wind = xr.open_dataset(wind_file)

# Build datetime index
years = ds_wind["Year"].values
months = ds_wind["Month"].values
days = ds_wind["Day"].values

time_index = pd.to_datetime(
    [f"{y}-{m:02d}-{d:02d}" for y in years for m in months for d in days],
    errors="coerce"
).dropna()

# Stack Year, Month, Day into a single dimension
ds_wind = ds_wind.stack(date=("Year","Month","Day"))
ds_wind = ds_wind.assign_coords(time=("date", time_index))
ds_wind = ds_wind.swap_dims({"date":"time"}).drop_vars("date")

# Daily spatial mean wind speed
wind_ts = ds_wind["Mean"].mean(dim=["latitude","longitude"], skipna=True).to_series()
wind_ts.name = "WindSpeed"

# --------------------
# 3. Prepare FireLabel masks for each label (0-6)
# --------------------
fire_labels = {}
for label in range(0,7):
    mask = fire_ds["fire_label_Greece"] == label
    # True if any grid cell has this label
    daily_label_present = mask.any(dim=["latitude","longitude"])
    fire_labels[label] = daily_label_present.to_series()

# --------------------
# 4. Correlation function
# --------------------
def correlation_with_pvalues(df, pollutants):
    results = []
    for pol in pollutants:
        x = df[pol]
        y = df["WindSpeed"]
        if len(df) < 2 or x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": "All",
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

def correlation_by_label(df, label_name, mask, pollutants):
    results = []
    subset = df[mask]
    if len(subset) < 2:
        return results
    for pol in pollutants:
        x = subset[pol]
        y = subset["WindSpeed"]
        if x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": label_name,
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

# --------------------
# 5. Build full DataFrame (pollutants + wind)
# --------------------
df_all = pd.concat(list(pollutant_series.values()) + [wind_ts], axis=1,
                   keys=list(pollutant_series.keys()) + ["WindSpeed"])
df_all = df_all.dropna()

pollutant_names = list(pollutant_series.keys())
all_results = []

# Overall correlation
all_results.extend(correlation_with_pvalues(df_all, pollutant_names))

# Correlation by fire label
for label, mask in fire_labels.items():
    # Align mask with df_all
    mask_aligned = mask.reindex(df_all.index, fill_value=False)
    all_results.extend(correlation_by_label(df_all, label, mask_aligned, pollutant_names))

# --------------------
# 6. Print results
# --------------------
results_df = pd.DataFrame(all_results)
results_df.to_csv(r"D:\IPMA\CAMS\pollutant_wind_correlations_by_fire_label_Greece.csv", index=False)
print("Correlation results saved to CSV.")
print(results_df.to_string(index=False))


Correlation results saved to CSV.
FireLabel Pollutant  Pearson_r  Pearson_p  Spearman_r  Spearman_p
      All        CO  -0.041964   0.273789   -0.032589    0.395476
      All     PM2.5  -0.021325   0.578251   -0.049226    0.199159
      All      PM10  -0.017987   0.639132   -0.048786    0.203209
      All       NO2  -0.022947   0.549685   -0.034696    0.365618
      All        NO  -0.023791   0.535093   -0.058282    0.128373
        0        CO  -0.041964   0.273789   -0.032589    0.395476
        0     PM2.5  -0.021325   0.578251   -0.049226    0.199159
        0      PM10  -0.017987   0.639132   -0.048786    0.203209
        0       NO2  -0.022947   0.549685   -0.034696    0.365618
        0        NO  -0.023791   0.535093   -0.058282    0.128373
        1        CO  -0.038427   0.778572   -0.031237    0.819226
        1     PM2.5  -0.054892   0.687820   -0.002734    0.984044
        1      PM10  -0.056405   0.679677    0.004853    0.971683
        1       NO2   0.136106   0.317206 

Total Precipitation

In [17]:
import xarray as xr
import pandas as pd
from scipy.stats import pearsonr, spearmanr

# --------------------
# 1. Pollutant files
# --------------------
pollutant_files = {
    "CO": r"D:\IPMA\CAMS\co_fire_Portugal.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Portugal.nc",
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Portugal.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Portugal.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Portugal.nc"
}

pollutant_series = {}
fire_ds = None  # store the first dataset for fire labels

for name, file in pollutant_files.items():
    ds = xr.open_dataset(file)
    
    # Daily spatial mean (skip NaNs)
    ts = ds["Mean"].mean(dim=["latitude", "longitude"], skipna=True).to_series()
    pollutant_series[name] = ts
    
    if fire_ds is None:
        fire_ds = ds  # keep dataset with fire labels

# --------------------
# 2. Precipitation dataset
# --------------------
precip_file = r"D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_1999_2024_regrid.nc"
ds_precip = xr.open_dataset(precip_file)

# Create datetime index
years = ds_precip["Year"].values
months = ds_precip["Month"].values
days = ds_precip["Day"].values

time_index = pd.to_datetime(
    [f"{y}-{m:02d}-{d:02d}" for y in years for m in months for d in days],
    errors="coerce"
).dropna()

# Stack Year, Month, Day into single dimension and assign time
ds_precip = ds_precip.stack(date=("Year","Month","Day"))
ds_precip = ds_precip.assign_coords(time=("date", time_index))
ds_precip = ds_precip.swap_dims({"date":"time"}).drop_vars("date")

# Daily spatial mean of total precipitation
precip_ts = ds_precip["Total_Precipitation"].mean(dim=["latitude","longitude"], skipna=True).to_series()
precip_ts.name = "Precipitation"

# --------------------
# 3. Prepare FireLabel masks for each label (0-6)
# --------------------
fire_labels = {}
for label in range(0,7):
    mask = fire_ds["fire_label_Portugal"] == label
    # True if any grid cell has this label
    daily_label_present = mask.any(dim=["latitude","longitude"])
    fire_labels[label] = daily_label_present.to_series()

# --------------------
# 4. Correlation function
# --------------------
def correlation_with_pvalues(df, pollutants):
    results = []
    for pol in pollutants:
        x = df[pol]
        y = df["Precipitation"]
        if len(df) < 2 or x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": "All",
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

def correlation_by_label(df, label_name, mask, pollutants):
    results = []
    subset = df[mask]
    if len(subset) < 2:
        return results
    for pol in pollutants:
        x = subset[pol]
        y = subset["Precipitation"]
        if x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": label_name,
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

# --------------------
# 5. Build full DataFrame (pollutants + precipitation)
# --------------------
df_all = pd.concat(list(pollutant_series.values()) + [precip_ts], axis=1,
                   keys=list(pollutant_series.keys()) + ["Precipitation"])
df_all = df_all.dropna()

pollutant_names = list(pollutant_series.keys())
all_results = []

# Overall correlation
all_results.extend(correlation_with_pvalues(df_all, pollutant_names))

# Correlation by fire label
for label, mask in fire_labels.items():
    # Align mask with df_all
    mask_aligned = mask.reindex(df_all.index, fill_value=False)
    all_results.extend(correlation_by_label(df_all, label, mask_aligned, pollutant_names))

# --------------------
# 6. Print results
# --------------------
results_df = pd.DataFrame(all_results)
results_df.to_csv(r"D:\IPMA\CAMS\pollutant_precipitation_correlations_by_fire_label_Portugal.csv", index=False)
print("Correlation results saved to CSV.")
print(results_df.to_string(index=False))


Correlation results saved to CSV.
FireLabel Pollutant  Pearson_r  Pearson_p  Spearman_r   Spearman_p
      All        CO  -0.168645   0.000010   -0.206859 5.000222e-08
      All     PM2.5  -0.108194   0.004675   -0.170226 7.822294e-06
      All      PM10  -0.092997   0.015122   -0.152452 6.404751e-05
      All       NO2  -0.038436   0.316201   -0.084786 2.682008e-02
      All        NO  -0.093974   0.014086   -0.119761 1.729626e-03
        0        CO  -0.168645   0.000010   -0.206859 5.000222e-08
        0     PM2.5  -0.108194   0.004675   -0.170226 7.822294e-06
        0      PM10  -0.092997   0.015122   -0.152452 6.404751e-05
        0       NO2  -0.038436   0.316201   -0.084786 2.682008e-02
        0        NO  -0.093974   0.014086   -0.119761 1.729626e-03
        1        CO  -0.032914   0.842329   -0.029757 8.572888e-01
        1     PM2.5  -0.269779   0.096731   -0.327733 4.167644e-02
        1      PM10  -0.260060   0.109847   -0.283198 8.063808e-02
        1       NO2   0.1638

In [18]:
import xarray as xr
import pandas as pd
from scipy.stats import pearsonr, spearmanr

# --------------------
# 1. Pollutant files
# --------------------
pollutant_files = {
    "CO": r"D:\IPMA\CAMS\co_fire_Italy.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Italy.nc",
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Italy.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Italy.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Italy.nc"
}

pollutant_series = {}
fire_ds = None  # store the first dataset for fire labels

for name, file in pollutant_files.items():
    ds = xr.open_dataset(file)
    
    # Daily spatial mean (skip NaNs)
    ts = ds["Mean"].mean(dim=["latitude", "longitude"], skipna=True).to_series()
    pollutant_series[name] = ts
    
    if fire_ds is None:
        fire_ds = ds  # keep dataset with fire labels

# --------------------
# 2. Precipitation dataset
# --------------------
precip_file = r"D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_1999_2024_regrid.nc"
ds_precip = xr.open_dataset(precip_file)

# Create datetime index
years = ds_precip["Year"].values
months = ds_precip["Month"].values
days = ds_precip["Day"].values

time_index = pd.to_datetime(
    [f"{y}-{m:02d}-{d:02d}" for y in years for m in months for d in days],
    errors="coerce"
).dropna()

# Stack Year, Month, Day into single dimension and assign time
ds_precip = ds_precip.stack(date=("Year","Month","Day"))
ds_precip = ds_precip.assign_coords(time=("date", time_index))
ds_precip = ds_precip.swap_dims({"date":"time"}).drop_vars("date")

# Daily spatial mean of total precipitation
precip_ts = ds_precip["Total_Precipitation"].mean(dim=["latitude","longitude"], skipna=True).to_series()
precip_ts.name = "Precipitation"

# --------------------
# 3. Prepare FireLabel masks for each label (0-6)
# --------------------
fire_labels = {}
for label in range(0,7):
    mask = fire_ds["fire_label_Italy"] == label
    # True if any grid cell has this label
    daily_label_present = mask.any(dim=["latitude","longitude"])
    fire_labels[label] = daily_label_present.to_series()

# --------------------
# 4. Correlation function
# --------------------
def correlation_with_pvalues(df, pollutants):
    results = []
    for pol in pollutants:
        x = df[pol]
        y = df["Precipitation"]
        if len(df) < 2 or x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": "All",
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

def correlation_by_label(df, label_name, mask, pollutants):
    results = []
    subset = df[mask]
    if len(subset) < 2:
        return results
    for pol in pollutants:
        x = subset[pol]
        y = subset["Precipitation"]
        if x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": label_name,
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

# --------------------
# 5. Build full DataFrame (pollutants + precipitation)
# --------------------
df_all = pd.concat(list(pollutant_series.values()) + [precip_ts], axis=1,
                   keys=list(pollutant_series.keys()) + ["Precipitation"])
df_all = df_all.dropna()

pollutant_names = list(pollutant_series.keys())
all_results = []

# Overall correlation
all_results.extend(correlation_with_pvalues(df_all, pollutant_names))

# Correlation by fire label
for label, mask in fire_labels.items():
    # Align mask with df_all
    mask_aligned = mask.reindex(df_all.index, fill_value=False)
    all_results.extend(correlation_by_label(df_all, label, mask_aligned, pollutant_names))

# --------------------
# 6. Print results
# --------------------
results_df = pd.DataFrame(all_results)
results_df.to_csv(r"D:\IPMA\CAMS\pollutant_precipitation_correlations_by_fire_label_Italy.csv", index=False)
print("Correlation results saved to CSV.")
print(results_df.to_string(index=False))


Correlation results saved to CSV.
FireLabel Pollutant  Pearson_r  Pearson_p  Spearman_r  Spearman_p
      All        CO  -0.116311   0.002348   -0.119748    0.001732
      All     PM2.5  -0.112944   0.003141   -0.103675    0.006732
      All      PM10  -0.114785   0.002682   -0.103860    0.006634
      All       NO2  -0.086244   0.024300   -0.096616    0.011589
      All        NO  -0.039320   0.305198   -0.058572    0.126480
        0        CO  -0.116311   0.002348   -0.119748    0.001732
        0     PM2.5  -0.112944   0.003141   -0.103675    0.006732
        0      PM10  -0.114785   0.002682   -0.103860    0.006634
        0       NO2  -0.086244   0.024300   -0.096616    0.011589
        0        NO  -0.039320   0.305198   -0.058572    0.126480
        1        CO  -0.284711   0.141982   -0.350848    0.067167
        1     PM2.5  -0.310098   0.108286   -0.380405    0.045832
        1      PM10  -0.323218   0.093404   -0.395183    0.037401
        1       NO2  -0.137669   0.484800 

In [19]:
import xarray as xr
import pandas as pd
from scipy.stats import pearsonr, spearmanr

# --------------------
# 1. Pollutant files
# --------------------
pollutant_files = {
    "CO": r"D:\IPMA\CAMS\co_fire_Spain.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Spain.nc",
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Spain.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Spain.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Spain.nc"
}

pollutant_series = {}
fire_ds = None  # store the first dataset for fire labels

for name, file in pollutant_files.items():
    ds = xr.open_dataset(file)
    
    # Daily spatial mean (skip NaNs)
    ts = ds["Mean"].mean(dim=["latitude", "longitude"], skipna=True).to_series()
    pollutant_series[name] = ts
    
    if fire_ds is None:
        fire_ds = ds  # keep dataset with fire labels

# --------------------
# 2. Precipitation dataset
# --------------------
precip_file = r"D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_1999_2024_regrid.nc"
ds_precip = xr.open_dataset(precip_file)

# Create datetime index
years = ds_precip["Year"].values
months = ds_precip["Month"].values
days = ds_precip["Day"].values

time_index = pd.to_datetime(
    [f"{y}-{m:02d}-{d:02d}" for y in years for m in months for d in days],
    errors="coerce"
).dropna()

# Stack Year, Month, Day into single dimension and assign time
ds_precip = ds_precip.stack(date=("Year","Month","Day"))
ds_precip = ds_precip.assign_coords(time=("date", time_index))
ds_precip = ds_precip.swap_dims({"date":"time"}).drop_vars("date")

# Daily spatial mean of total precipitation
precip_ts = ds_precip["Total_Precipitation"].mean(dim=["latitude","longitude"], skipna=True).to_series()
precip_ts.name = "Precipitation"

# --------------------
# 3. Prepare FireLabel masks for each label (0-6)
# --------------------
fire_labels = {}
for label in range(0,7):
    mask = fire_ds["fire_label_Spain"] == label
    # True if any grid cell has this label
    daily_label_present = mask.any(dim=["latitude","longitude"])
    fire_labels[label] = daily_label_present.to_series()

# --------------------
# 4. Correlation function
# --------------------
def correlation_with_pvalues(df, pollutants):
    results = []
    for pol in pollutants:
        x = df[pol]
        y = df["Precipitation"]
        if len(df) < 2 or x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": "All",
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

def correlation_by_label(df, label_name, mask, pollutants):
    results = []
    subset = df[mask]
    if len(subset) < 2:
        return results
    for pol in pollutants:
        x = subset[pol]
        y = subset["Precipitation"]
        if x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": label_name,
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

# --------------------
# 5. Build full DataFrame (pollutants + precipitation)
# --------------------
df_all = pd.concat(list(pollutant_series.values()) + [precip_ts], axis=1,
                   keys=list(pollutant_series.keys()) + ["Precipitation"])
df_all = df_all.dropna()

pollutant_names = list(pollutant_series.keys())
all_results = []

# Overall correlation
all_results.extend(correlation_with_pvalues(df_all, pollutant_names))

# Correlation by fire label
for label, mask in fire_labels.items():
    # Align mask with df_all
    mask_aligned = mask.reindex(df_all.index, fill_value=False)
    all_results.extend(correlation_by_label(df_all, label, mask_aligned, pollutant_names))

# --------------------
# 6. Print results
# --------------------
results_df = pd.DataFrame(all_results)
results_df.to_csv(r"D:\IPMA\CAMS\pollutant_precipitation_correlations_by_fire_label_Spain.csv", index=False)
print("Correlation results saved to CSV.")
print(results_df.to_string(index=False))


Correlation results saved to CSV.
FireLabel Pollutant  Pearson_r  Pearson_p  Spearman_r   Spearman_p
      All        CO  -0.151702   0.000070   -0.190093 5.707578e-07
      All     PM2.5  -0.108822   0.004439   -0.144475 1.531337e-04
      All      PM10  -0.114336   0.002788   -0.141781 2.035151e-04
      All       NO2  -0.053782   0.160626   -0.099776 9.123294e-03
      All        NO  -0.097717   0.010670   -0.106399 5.412236e-03
        0        CO  -0.151702   0.000070   -0.190093 5.707578e-07
        0     PM2.5  -0.108822   0.004439   -0.144475 1.531337e-04
        0      PM10  -0.114336   0.002788   -0.141781 2.035151e-04
        0       NO2  -0.053782   0.160626   -0.099776 9.123294e-03
        0        NO  -0.097717   0.010670   -0.106399 5.412236e-03
        1        CO  -0.086023   0.376039   -0.152972 1.139811e-01
        1     PM2.5  -0.222280   0.020771   -0.283680 2.928422e-03
        1      PM10  -0.231396   0.015972   -0.275525 3.900476e-03
        1       NO2   0.0044

In [20]:
import xarray as xr
import pandas as pd
from scipy.stats import pearsonr, spearmanr

# --------------------
# 1. Pollutant files
# --------------------
pollutant_files = {
    "CO": r"D:\IPMA\CAMS\co_fire_Greece.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Greece.nc",
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Greece.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Greece.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Greece.nc"
}

pollutant_series = {}
fire_ds = None  # store the first dataset for fire labels

for name, file in pollutant_files.items():
    ds = xr.open_dataset(file)
    
    # Daily spatial mean (skip NaNs)
    ts = ds["Mean"].mean(dim=["latitude", "longitude"], skipna=True).to_series()
    pollutant_series[name] = ts
    
    if fire_ds is None:
        fire_ds = ds  # keep dataset with fire labels

# --------------------
# 2. Precipitation dataset
# --------------------
precip_file = r"D:\IPMA\ERA5\Precipitation\daily_precipitation_stats_1999_2024_regrid.nc"
ds_precip = xr.open_dataset(precip_file)

# Create datetime index
years = ds_precip["Year"].values
months = ds_precip["Month"].values
days = ds_precip["Day"].values

time_index = pd.to_datetime(
    [f"{y}-{m:02d}-{d:02d}" for y in years for m in months for d in days],
    errors="coerce"
).dropna()

# Stack Year, Month, Day into single dimension and assign time
ds_precip = ds_precip.stack(date=("Year","Month","Day"))
ds_precip = ds_precip.assign_coords(time=("date", time_index))
ds_precip = ds_precip.swap_dims({"date":"time"}).drop_vars("date")

# Daily spatial mean of total precipitation
precip_ts = ds_precip["Total_Precipitation"].mean(dim=["latitude","longitude"], skipna=True).to_series()
precip_ts.name = "Precipitation"

# --------------------
# 3. Prepare FireLabel masks for each label (0-6)
# --------------------
fire_labels = {}
for label in range(0,7):
    mask = fire_ds["fire_label_Greece"] == label
    # True if any grid cell has this label
    daily_label_present = mask.any(dim=["latitude","longitude"])
    fire_labels[label] = daily_label_present.to_series()

# --------------------
# 4. Correlation function
# --------------------
def correlation_with_pvalues(df, pollutants):
    results = []
    for pol in pollutants:
        x = df[pol]
        y = df["Precipitation"]
        if len(df) < 2 or x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": "All",
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

def correlation_by_label(df, label_name, mask, pollutants):
    results = []
    subset = df[mask]
    if len(subset) < 2:
        return results
    for pol in pollutants:
        x = subset[pol]
        y = subset["Precipitation"]
        if x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": label_name,
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

# --------------------
# 5. Build full DataFrame (pollutants + precipitation)
# --------------------
df_all = pd.concat(list(pollutant_series.values()) + [precip_ts], axis=1,
                   keys=list(pollutant_series.keys()) + ["Precipitation"])
df_all = df_all.dropna()

pollutant_names = list(pollutant_series.keys())
all_results = []

# Overall correlation
all_results.extend(correlation_with_pvalues(df_all, pollutant_names))

# Correlation by fire label
for label, mask in fire_labels.items():
    # Align mask with df_all
    mask_aligned = mask.reindex(df_all.index, fill_value=False)
    all_results.extend(correlation_by_label(df_all, label, mask_aligned, pollutant_names))

# --------------------
# 6. Print results
# --------------------
results_df = pd.DataFrame(all_results)
results_df.to_csv(r"D:\IPMA\CAMS\pollutant_precipitation_correlations_by_fire_label_Greece.csv", index=False)
print("Correlation results saved to CSV.")
print(results_df.to_string(index=False))


Correlation results saved to CSV.
FireLabel Pollutant  Pearson_r  Pearson_p  Spearman_r  Spearman_p
      All        CO  -0.146992   0.000117   -0.162598    0.000020
      All     PM2.5   0.017005   0.657540    0.068888    0.072199
      All      PM10   0.009579   0.802820    0.067591    0.077746
      All       NO2  -0.036521   0.340931   -0.050493    0.187823
      All        NO  -0.019001   0.620350   -0.068980    0.071820
        0        CO  -0.146992   0.000117   -0.162598    0.000020
        0     PM2.5   0.017005   0.657540    0.068888    0.072199
        0      PM10   0.009579   0.802820    0.067591    0.077746
        0       NO2  -0.036521   0.340931   -0.050493    0.187823
        0        NO  -0.019001   0.620350   -0.068980    0.071820
        1        CO  -0.134077   0.324542   -0.169105    0.212795
        1     PM2.5   0.163228   0.229357    0.060834    0.656042
        1      PM10   0.171747   0.205629    0.073001    0.592871
        1       NO2   0.056283   0.680331 

Temperature

In [21]:
import xarray as xr
import pandas as pd
from scipy.stats import pearsonr, spearmanr

# --------------------
# 1. Pollutant files
# --------------------
pollutant_files = {
    "CO": r"D:\IPMA\CAMS\co_fire_Portugal.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Portugal.nc",
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Portugal.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Portugal.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Portugal.nc"
}

pollutant_series = {}
fire_ds = None  # store the first dataset for fire labels

for name, file in pollutant_files.items():
    ds = xr.open_dataset(file)
    
    # Daily spatial mean (skip NaNs)
    ts = ds["Mean"].mean(dim=["latitude", "longitude"], skipna=True).to_series()
    pollutant_series[name] = ts
    
    if fire_ds is None:
        fire_ds = ds  # keep dataset with fire labels

# --------------------
# 2. Temperature dataset
# --------------------
temp_file = r"D:\IPMA\ERA5\Temperature\daily_temperature_stats_regrid.nc"
ds_temp = xr.open_dataset(temp_file)

# Build datetime index
years = ds_temp["Year"].values
months = ds_temp["Month"].values
days = ds_temp["Day"].values

time_index = pd.to_datetime(
    [f"{y}-{m:02d}-{d:02d}" for y in years for m in months for d in days],
    errors="coerce"
).dropna()

# Stack Year, Month, Day into a single dimension
ds_temp = ds_temp.stack(date=("Year","Month","Day"))
ds_temp = ds_temp.assign_coords(time=("date", time_index))
ds_temp = ds_temp.swap_dims({"date":"time"}).drop_vars("date")

# Daily spatial mean temperature (°C, mean across grid)
temp_ts = ds_temp["Mean"].mean(dim=["latitude","longitude"], skipna=True).to_series()
temp_ts.name = "Temperature"

# --------------------
# 3. Prepare FireLabel masks for each label (0-6)
# --------------------
fire_labels = {}
for label in range(0,7):
    mask = fire_ds["fire_label_Portugal"] == label
    # True if any grid cell has this label
    daily_label_present = mask.any(dim=["latitude","longitude"])
    fire_labels[label] = daily_label_present.to_series()

# --------------------
# 4. Correlation function
# --------------------
def correlation_with_pvalues(df, pollutants):
    results = []
    for pol in pollutants:
        x = df[pol]
        y = df["Temperature"]
        if len(df) < 2 or x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": "All",
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

def correlation_by_label(df, label_name, mask, pollutants):
    results = []
    subset = df[mask]
    if len(subset) < 2:
        return results
    for pol in pollutants:
        x = subset[pol]
        y = subset["Temperature"]
        if x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": label_name,
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

# --------------------
# 5. Build combined DataFrame (pollutants + temperature)
# --------------------
df_all = pd.concat(list(pollutant_series.values()) + [temp_ts], axis=1,
                   keys=list(pollutant_series.keys()) + ["Temperature"])
df_all = df_all.dropna()

pollutant_names = list(pollutant_series.keys())
all_results = []

# Overall correlation
all_results.extend(correlation_with_pvalues(df_all, pollutant_names))

# Correlation by fire label
for label, mask in fire_labels.items():
    # Align mask with df_all
    mask_aligned = mask.reindex(df_all.index, fill_value=False)
    all_results.extend(correlation_by_label(df_all, label, mask_aligned, pollutant_names))

# --------------------
# 6. Print results
# --------------------
results_df = pd.DataFrame(all_results)
results_df.to_csv(r"D:\IPMA\CAMS\pollutant_temperature_correlations_by_fire_label_Portugal.csv", index=False)
print("Correlation results saved to CSV.")
print(results_df.to_string(index=False))


Correlation results saved to CSV.
FireLabel Pollutant  Pearson_r    Pearson_p  Spearman_r   Spearman_p
      All        CO  -0.229190 1.402963e-09   -0.255818 1.188251e-11
      All     PM2.5  -0.088826 2.033879e-02   -0.125118 1.059087e-03
      All      PM10  -0.080395 3.581156e-02   -0.107063 5.128076e-03
      All       NO2  -0.205369 6.261099e-08   -0.213873 1.695910e-08
      All        NO  -0.117886 2.044256e-03   -0.125068 1.063978e-03
        0        CO  -0.229190 1.402963e-09   -0.255818 1.188251e-11
        0     PM2.5  -0.088826 2.033879e-02   -0.125118 1.059087e-03
        0      PM10  -0.080395 3.581156e-02   -0.107063 5.128076e-03
        0       NO2  -0.205369 6.261099e-08   -0.213873 1.695910e-08
        0        NO  -0.117886 2.044256e-03   -0.125068 1.063978e-03
        1        CO  -0.351791 2.807842e-02   -0.322065 4.555577e-02
        1     PM2.5  -0.065218 6.932451e-01    0.005668 9.726815e-01
        1      PM10  -0.052373 7.515133e-01    0.000405 9.980483e-01


In [22]:
import xarray as xr
import pandas as pd
from scipy.stats import pearsonr, spearmanr

# --------------------
# 1. Pollutant files
# --------------------
pollutant_files = {
    "CO": r"D:\IPMA\CAMS\co_fire_Italy.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Italy.nc",
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Italy.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Italy.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Italy.nc"
}

pollutant_series = {}
fire_ds = None  # store the first dataset for fire labels

for name, file in pollutant_files.items():
    ds = xr.open_dataset(file)
    
    # Daily spatial mean (skip NaNs)
    ts = ds["Mean"].mean(dim=["latitude", "longitude"], skipna=True).to_series()
    pollutant_series[name] = ts
    
    if fire_ds is None:
        fire_ds = ds  # keep dataset with fire labels

# --------------------
# 2. Temperature dataset
# --------------------
temp_file = r"D:\IPMA\ERA5\Temperature\daily_temperature_stats_regrid.nc"
ds_temp = xr.open_dataset(temp_file)

# Build datetime index
years = ds_temp["Year"].values
months = ds_temp["Month"].values
days = ds_temp["Day"].values

time_index = pd.to_datetime(
    [f"{y}-{m:02d}-{d:02d}" for y in years for m in months for d in days],
    errors="coerce"
).dropna()

# Stack Year, Month, Day into a single dimension
ds_temp = ds_temp.stack(date=("Year","Month","Day"))
ds_temp = ds_temp.assign_coords(time=("date", time_index))
ds_temp = ds_temp.swap_dims({"date":"time"}).drop_vars("date")

# Daily spatial mean temperature (°C, mean across grid)
temp_ts = ds_temp["Mean"].mean(dim=["latitude","longitude"], skipna=True).to_series()
temp_ts.name = "Temperature"

# --------------------
# 3. Prepare FireLabel masks for each label (0-6)
# --------------------
fire_labels = {}
for label in range(0,7):
    mask = fire_ds["fire_label_Italy"] == label
    # True if any grid cell has this label
    daily_label_present = mask.any(dim=["latitude","longitude"])
    fire_labels[label] = daily_label_present.to_series()

# --------------------
# 4. Correlation function
# --------------------
def correlation_with_pvalues(df, pollutants):
    results = []
    for pol in pollutants:
        x = df[pol]
        y = df["Temperature"]
        if len(df) < 2 or x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": "All",
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

def correlation_by_label(df, label_name, mask, pollutants):
    results = []
    subset = df[mask]
    if len(subset) < 2:
        return results
    for pol in pollutants:
        x = subset[pol]
        y = subset["Temperature"]
        if x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": label_name,
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

# --------------------
# 5. Build combined DataFrame (pollutants + temperature)
# --------------------
df_all = pd.concat(list(pollutant_series.values()) + [temp_ts], axis=1,
                   keys=list(pollutant_series.keys()) + ["Temperature"])
df_all = df_all.dropna()

pollutant_names = list(pollutant_series.keys())
all_results = []

# Overall correlation
all_results.extend(correlation_with_pvalues(df_all, pollutant_names))

# Correlation by fire label
for label, mask in fire_labels.items():
    # Align mask with df_all
    mask_aligned = mask.reindex(df_all.index, fill_value=False)
    all_results.extend(correlation_by_label(df_all, label, mask_aligned, pollutant_names))

# --------------------
# 6. Print results
# --------------------
results_df = pd.DataFrame(all_results)
results_df.to_csv(r"D:\IPMA\CAMS\pollutant_temperature_correlations_by_fire_label_Italy.csv", index=False)
print("Correlation results saved to CSV.")
print(results_df.to_string(index=False))


Correlation results saved to CSV.
FireLabel Pollutant  Pearson_r    Pearson_p  Spearman_r   Spearman_p
      All        CO  -0.406318 1.698899e-28   -0.440655 9.127474e-34
      All     PM2.5  -0.348999 5.753326e-21   -0.393604 1.075357e-26
      All      PM10  -0.361059 1.989205e-22   -0.404969 2.661344e-28
      All       NO2  -0.242623 1.356647e-10   -0.260010 5.322642e-12
      All        NO  -0.268901 9.236165e-13   -0.297314 2.188614e-15
        0        CO  -0.406318 1.698899e-28   -0.440655 9.127474e-34
        0     PM2.5  -0.348999 5.753326e-21   -0.393604 1.075357e-26
        0      PM10  -0.361059 1.989205e-22   -0.404969 2.661344e-28
        0       NO2  -0.242623 1.356647e-10   -0.260010 5.322642e-12
        0        NO  -0.268901 9.236165e-13   -0.297314 2.188614e-15
        1        CO  -0.398668 3.560717e-02   -0.449371 1.644087e-02
        1     PM2.5  -0.445756 1.743718e-02   -0.429666 2.249497e-02
        1      PM10  -0.439269 1.934977e-02   -0.412151 2.930309e-02


In [23]:
import xarray as xr
import pandas as pd
from scipy.stats import pearsonr, spearmanr

# --------------------
# 1. Pollutant files
# --------------------
pollutant_files = {
    "CO": r"D:\IPMA\CAMS\co_fire_Spain.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Spain.nc",
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Spain.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Spain.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Spain.nc"
}

pollutant_series = {}
fire_ds = None  # store the first dataset for fire labels

for name, file in pollutant_files.items():
    ds = xr.open_dataset(file)
    
    # Daily spatial mean (skip NaNs)
    ts = ds["Mean"].mean(dim=["latitude", "longitude"], skipna=True).to_series()
    pollutant_series[name] = ts
    
    if fire_ds is None:
        fire_ds = ds  # keep dataset with fire labels

# --------------------
# 2. Temperature dataset
# --------------------
temp_file = r"D:\IPMA\ERA5\Temperature\daily_temperature_stats_regrid.nc"
ds_temp = xr.open_dataset(temp_file)

# Build datetime index
years = ds_temp["Year"].values
months = ds_temp["Month"].values
days = ds_temp["Day"].values

time_index = pd.to_datetime(
    [f"{y}-{m:02d}-{d:02d}" for y in years for m in months for d in days],
    errors="coerce"
).dropna()

# Stack Year, Month, Day into a single dimension
ds_temp = ds_temp.stack(date=("Year","Month","Day"))
ds_temp = ds_temp.assign_coords(time=("date", time_index))
ds_temp = ds_temp.swap_dims({"date":"time"}).drop_vars("date")

# Daily spatial mean temperature (°C, mean across grid)
temp_ts = ds_temp["Mean"].mean(dim=["latitude","longitude"], skipna=True).to_series()
temp_ts.name = "Temperature"

# --------------------
# 3. Prepare FireLabel masks for each label (0-6)
# --------------------
fire_labels = {}
for label in range(0,7):
    mask = fire_ds["fire_label_Spain"] == label
    # True if any grid cell has this label
    daily_label_present = mask.any(dim=["latitude","longitude"])
    fire_labels[label] = daily_label_present.to_series()

# --------------------
# 4. Correlation function
# --------------------
def correlation_with_pvalues(df, pollutants):
    results = []
    for pol in pollutants:
        x = df[pol]
        y = df["Temperature"]
        if len(df) < 2 or x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": "All",
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

def correlation_by_label(df, label_name, mask, pollutants):
    results = []
    subset = df[mask]
    if len(subset) < 2:
        return results
    for pol in pollutants:
        x = subset[pol]
        y = subset["Temperature"]
        if x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": label_name,
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

# --------------------
# 5. Build combined DataFrame (pollutants + temperature)
# --------------------
df_all = pd.concat(list(pollutant_series.values()) + [temp_ts], axis=1,
                   keys=list(pollutant_series.keys()) + ["Temperature"])
df_all = df_all.dropna()

pollutant_names = list(pollutant_series.keys())
all_results = []

# Overall correlation
all_results.extend(correlation_with_pvalues(df_all, pollutant_names))

# Correlation by fire label
for label, mask in fire_labels.items():
    # Align mask with df_all
    mask_aligned = mask.reindex(df_all.index, fill_value=False)
    all_results.extend(correlation_by_label(df_all, label, mask_aligned, pollutant_names))

# --------------------
# 6. Print results
# --------------------
results_df = pd.DataFrame(all_results)
results_df.to_csv(r"D:\IPMA\CAMS\pollutant_temperature_correlations_by_fire_label_Spain.csv", index=False)
print("Correlation results saved to CSV.")
print(results_df.to_string(index=False))


Correlation results saved to CSV.
FireLabel Pollutant  Pearson_r    Pearson_p  Spearman_r   Spearman_p
      All        CO  -0.250556 3.191222e-11   -0.260221 5.109791e-12
      All     PM2.5  -0.101706 7.858730e-03   -0.160923 2.417646e-05
      All      PM10  -0.108378 4.604694e-03   -0.153397 5.759512e-05
      All       NO2  -0.117404 2.133184e-03   -0.125761 9.972067e-04
      All        NO  -0.085207 2.607055e-02   -0.091319 1.705963e-02
        0        CO  -0.250556 3.191222e-11   -0.260221 5.109791e-12
        0     PM2.5  -0.101706 7.858730e-03   -0.160923 2.417646e-05
        0      PM10  -0.108378 4.604694e-03   -0.153397 5.759512e-05
        0       NO2  -0.117404 2.133184e-03   -0.125761 9.972067e-04
        0        NO  -0.085207 2.607055e-02   -0.091319 1.705963e-02
        1        CO  -0.255383 7.641240e-03   -0.243658 1.104982e-02
        1     PM2.5  -0.097726 3.143224e-01   -0.091953 3.438996e-01
        1      PM10  -0.089229 3.584410e-01   -0.087437 3.682142e-01


In [24]:
import xarray as xr
import pandas as pd
from scipy.stats import pearsonr, spearmanr

# --------------------
# 1. Pollutant files
# --------------------
pollutant_files = {
    "CO": r"D:\IPMA\CAMS\co_fire_Greece.nc",
    "PM2.5": r"D:\IPMA\CAMS\pm2p5_fire_Greece.nc",
    "PM10": r"D:\IPMA\CAMS\pm10_fire_Greece.nc",
    "NO2": r"D:\IPMA\CAMS\no2_fire_Greece.nc",
    "NO": r"D:\IPMA\CAMS\no_fire_Greece.nc"
}

pollutant_series = {}
fire_ds = None  # store the first dataset for fire labels

for name, file in pollutant_files.items():
    ds = xr.open_dataset(file)
    
    # Daily spatial mean (skip NaNs)
    ts = ds["Mean"].mean(dim=["latitude", "longitude"], skipna=True).to_series()
    pollutant_series[name] = ts
    
    if fire_ds is None:
        fire_ds = ds  # keep dataset with fire labels

# --------------------
# 2. Temperature dataset
# --------------------
temp_file = r"D:\IPMA\ERA5\Temperature\daily_temperature_stats_regrid.nc"
ds_temp = xr.open_dataset(temp_file)

# Build datetime index
years = ds_temp["Year"].values
months = ds_temp["Month"].values
days = ds_temp["Day"].values

time_index = pd.to_datetime(
    [f"{y}-{m:02d}-{d:02d}" for y in years for m in months for d in days],
    errors="coerce"
).dropna()

# Stack Year, Month, Day into a single dimension
ds_temp = ds_temp.stack(date=("Year","Month","Day"))
ds_temp = ds_temp.assign_coords(time=("date", time_index))
ds_temp = ds_temp.swap_dims({"date":"time"}).drop_vars("date")

# Daily spatial mean temperature (°C, mean across grid)
temp_ts = ds_temp["Mean"].mean(dim=["latitude","longitude"], skipna=True).to_series()
temp_ts.name = "Temperature"

# --------------------
# 3. Prepare FireLabel masks for each label (0-6)
# --------------------
fire_labels = {}
for label in range(0,7):
    mask = fire_ds["fire_label_Greece"] == label
    # True if any grid cell has this label
    daily_label_present = mask.any(dim=["latitude","longitude"])
    fire_labels[label] = daily_label_present.to_series()

# --------------------
# 4. Correlation function
# --------------------
def correlation_with_pvalues(df, pollutants):
    results = []
    for pol in pollutants:
        x = df[pol]
        y = df["Temperature"]
        if len(df) < 2 or x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": "All",
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

def correlation_by_label(df, label_name, mask, pollutants):
    results = []
    subset = df[mask]
    if len(subset) < 2:
        return results
    for pol in pollutants:
        x = subset[pol]
        y = subset["Temperature"]
        if x.nunique() < 2 or y.nunique() < 2:
            continue
        pearson_r, pearson_p = pearsonr(x, y)
        spearman_r, spearman_p = spearmanr(x, y)
        results.append({
            "FireLabel": label_name,
            "Pollutant": pol,
            "Pearson_r": pearson_r,
            "Pearson_p": pearson_p,
            "Spearman_r": spearman_r,
            "Spearman_p": spearman_p
        })
    return results

# --------------------
# 5. Build combined DataFrame (pollutants + temperature)
# --------------------
df_all = pd.concat(list(pollutant_series.values()) + [temp_ts], axis=1,
                   keys=list(pollutant_series.keys()) + ["Temperature"])
df_all = df_all.dropna()

pollutant_names = list(pollutant_series.keys())
all_results = []

# Overall correlation
all_results.extend(correlation_with_pvalues(df_all, pollutant_names))

# Correlation by fire label
for label, mask in fire_labels.items():
    # Align mask with df_all
    mask_aligned = mask.reindex(df_all.index, fill_value=False)
    all_results.extend(correlation_by_label(df_all, label, mask_aligned, pollutant_names))

# --------------------
# 6. Print results
# --------------------
results_df = pd.DataFrame(all_results)
results_df.to_csv(r"D:\IPMA\CAMS\pollutant_temperature_correlations_by_fire_label_Greece.csv", index=False)
print("Correlation results saved to CSV.")
print(results_df.to_string(index=False))


Correlation results saved to CSV.
FireLabel Pollutant  Pearson_r    Pearson_p  Spearman_r   Spearman_p
      All        CO  -0.380754 5.946410e-25   -0.416848 4.767899e-30
      All     PM2.5  -0.055698 1.462158e-01   -0.132189 5.381690e-04
      All      PM10  -0.043077 2.612571e-01   -0.127948 8.109598e-04
      All       NO2  -0.071942 6.041447e-02   -0.086195 2.438264e-02
      All        NO  -0.062569 1.025544e-01   -0.076324 4.631982e-02
        0        CO  -0.380754 5.946410e-25   -0.416848 4.767899e-30
        0     PM2.5  -0.055698 1.462158e-01   -0.132189 5.381690e-04
        0      PM10  -0.043077 2.612571e-01   -0.127948 8.109598e-04
        0       NO2  -0.071942 6.041447e-02   -0.086195 2.438264e-02
        0        NO  -0.062569 1.025544e-01   -0.076324 4.631982e-02
        1        CO  -0.511357 5.635961e-05   -0.534655 2.188990e-05
        1     PM2.5  -0.076618 5.746250e-01   -0.224402 9.637754e-02
        1      PM10  -0.057104 6.759231e-01   -0.200684 1.380637e-01
