mean +- standard error of mean of pollutants on day without fire, with fire and up to 5 days after fire outbreak - PORTUGAL


In [None]:
import xarray as xr
import numpy as np
import warnings

# Suppress RuntimeWarnings for invalid SEM calculations (e.g., count=0 or 1)
warnings.filterwarnings("ignore", category=RuntimeWarning)

# Load dataset
ds = xr.open_dataset(r"D:\IPMA\CAMS\pm10_fire_Portugal.nc")

# Extract PM10 and fire labels
pm10 = ds['Mean']  # shape: (lat, lon, time)
labels = ds['fire_label_Portugal'].transpose('latitude', 'longitude', 'time')

print("\nPM₁₀ Mean ± SEM by Fire Label (spatial average across Portugal):")
print("-" * 60)
print(f"{'Label':<10}{'Mean (µg/m³)':<20}{'SEM (µg/m³)':<20}")
print("-" * 60)

# Loop over labels 0–6
for label in range(7):
    # Mask PM10 where fire label matches
    mask = labels == label
    masked_pm10 = pm10.where(mask)

    # Compute time-based stats per grid point
    mean = masked_pm10.mean(dim='time', skipna=True)
    std = masked_pm10.std(dim='time', skipna=True)
    count = masked_pm10.count(dim='time')
    sem = std / np.sqrt(count)

    # Spatial mean
    mean_val = mean.mean(skipna=True).item()
    sem_val = sem.mean(skipna=True).item()

    # Print formatted result
    print(f"{label:<10}{mean_val:<20.2f}{sem_val:<20.2f}")


In [None]:
import xarray as xr
import numpy as np
from scipy.stats import shapiro, levene
import warnings
from scipy import stats

# Suppress warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

# Load dataset
ds = xr.open_dataset(r"D:\IPMA\CAMS\pm10_fire_Portugal.nc")

pm10 = ds['Mean']  # Shape: (time, latitude, longitude)
labels = ds['fire_label_Portugal'].transpose('time', 'latitude', 'longitude')

# Compute spatial mean PM10 per day
pm10_daily_avg = pm10.mean(dim=['latitude', 'longitude'], skipna=True)

# Compute daily mode of fire labels (across lat/lon)
def compute_mode_2d(x):
    vals, counts = np.unique(x[~np.isnan(x)], return_counts=True)
    return vals[np.argmax(counts)] if len(counts) > 0 else np.nan

labels_daily_mode = xr.apply_ufunc(
    compute_mode_2d,
    labels,
    input_core_dims=[['latitude', 'longitude']],
    output_core_dims=[[]],
    vectorize=True,
    dask='parallelized',
    output_dtypes=[float]
)

# Collect PM10 values per label
label_pm10_values = {}

print("Collecting daily PM₁₀ averages for each fire label...\n")
for label in range(7):
    daily_vals = pm10_daily_avg.where(labels_daily_mode == label, drop=True).values
    daily_vals = daily_vals[~np.isnan(daily_vals)]

    if len(daily_vals) > 3:
        label_pm10_values[label] = daily_vals
        print(f"Label {label}: {len(daily_vals)} daily values collected.")

# -- Shapiro-Wilk Test (normality check) --
print("\n📊 Shapiro–Wilk Normality Test:")
for label, values in label_pm10_values.items():
    sample = values if len(values) <= 5000 else np.random.choice(values, 5000, replace=False)
    stat, p = shapiro(sample)
    result = "Normal" if p > 0.05 else "Not normal"
    print(f"Label {label}: W={stat:.3f}, p={p:.4f} → {result}")

# -- Levene’s Test (equal variances) --
print("\n📊 Levene's Test for Equal Variances (homoscedasticity):")
group_labels = list(label_pm10_values.keys())
grouped_values = [label_pm10_values[k] for k in group_labels if len(label_pm10_values[k]) > 10]

if len(grouped_values) >= 2:
    stat, p = levene(*grouped_values)
    result = "Equal variances" if p > 0.05 else "Unequal variances"
    print(f"Levene’s statistic={stat:.3f}, p={p:.4f} → {result}")
else:
    print("Not enough groups with sufficient data for Levene’s test.")
