In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.dates as mdates
import pandas as pd
from scipy import stats as ss
import pymannkendall as mk
pd.set_option('display.max_rows', 20)
plt.rcParams['figure.dpi'] = 100
plt.rcParams['lines.markersize'] = 4
plt.rcParams['font.size'] = 14
plt.rcParams['axes.prop_cycle'] = plt.cycler('color', 'brgmyk')


In [None]:
from pathlib import Path
DATA_DIR = Path("./monthly_averages")

In [None]:
# read files with monthly average values
data_68 = pd.read_csv(DATA_DIR / 'monthly_averages_68_n2o.csv')
data_46 = pd.read_csv(DATA_DIR / 'monthly_averages_46_n2o.csv')
data_32 = pd.read_csv(DATA_DIR / 'monthly_averages_32_n2o.csv')
data_22 = pd.read_csv(DATA_DIR / 'monthly_averages_22_n2o.csv')

In [None]:
data_68['date'] = pd.to_datetime(data_68['date'])
data_46['date'] = pd.to_datetime(data_46['date'])
data_32['date'] = pd.to_datetime(data_32['date'])
data_22['date'] = pd.to_datetime(data_22['date'])

In [None]:
o3_68 = pd.read_csv(DATA_DIR / 'monthly_averages_68_o3.csv')
o3_46 = pd.read_csv(DATA_DIR / 'monthly_averages_46_o3.csv')
o3_32 = pd.read_csv(DATA_DIR / 'monthly_averages_32_o3.csv')
o3_22 = pd.read_csv(DATA_DIR / 'monthly_averages_22_o3.csv')

In [None]:
o3_68['date'] = pd.to_datetime(o3_68['date'])
o3_46['date'] = pd.to_datetime(o3_46['date'])
o3_32['date'] = pd.to_datetime(o3_32['date'])
o3_22['date'] = pd.to_datetime(o3_22['date'])

In [None]:
data_68 = data_68[data_68['count'] >= 5]
data_46 = data_46[data_46['count'] >= 5]
data_32 = data_32[data_32['count'] >= 5]
data_22 = data_22[data_22['count'] >= 5]

In [None]:
o3_68 = o3_68[o3_68['count'] >= 5]
o3_46 = o3_46[o3_46['count'] >= 5]
o3_32 = o3_32[o3_32['count'] >= 5]
o3_22 = o3_22[o3_22['count'] >= 5]

In [None]:

def plot_monthly_zone_means_side_by_side(
    dfs_n2o: list,
    dfs_o3: list,
    altitudes: list,
    value_col: str = 'mean_concentration',
    date_col: str = 'date',
    zone_col: str = 'latitude_zone',
    confidence: float = 0.95,
    palette: list = None,
    zone_labels: dict = None,
    zone_ci_alpha: dict = None,
    save_path=None,
    save_dpi=300,
    ylabels: dict = None,
    ylims: dict = None
) -> None:
    """
    Plots monthly mean concentrations with 95% CI for latitude zones,
    in a grid with one row per altitude and two columns (N2O on left, O3 on right),
    using fixed y-limits per species.
    """
    # Defaults
    default_palette = ['#4daf4a', '#ff7f00', '#377eb8']
    palette = palette or default_palette
    zone_labels = zone_labels or {1: 'NH', 2: 'Tropics', 3: 'SH'}
    zone_ci_alpha = zone_ci_alpha or {}
    default_alpha = 0.3
    ylabels = ylabels or {
        'N2O': 'Mean N₂O concentration (ppbv)',
        'O3':  'Mean O₃ concentration (ppmv)'
    }
    # default y-limits
    ylims = ylims or {
        'N2O': (0, 325),
        'O3':  (0, 8)
    }

    def prepare(df):
        df = df.copy()
        df[date_col] = pd.to_datetime(df[date_col])
        df['year_month'] = df[date_col].dt.to_period('M').dt.to_timestamp()
        stats_df = (
            df.groupby([zone_col, 'year_month'])[value_col]
              .agg(['mean', 'std', 'count'])
              .reset_index()
        )
        alpha = 1 - confidence
        stats_df['ci_half'] = stats_df.apply(
            lambda r: ss.t.ppf(1 - alpha/2, df=r['count']-1) * r['std'] / (r['count']**0.5)
            if r['count'] > 1 else 0,
            axis=1
        )
        stats_df['ci_lower'] = stats_df['mean'] - stats_df['ci_half']
        stats_df['ci_upper'] = stats_df['mean'] + stats_df['ci_half']
        return stats_df

    stats_n2o = [prepare(df) for df in dfs_n2o]
    stats_o3  = [prepare(df) for df in dfs_o3]

    zones = sorted(
        {z for stats in stats_n2o + stats_o3 for z in stats[zone_col].unique()},
        reverse=True
    )

    n_alt = len(altitudes)
    fig, axes = plt.subplots(n_alt, 2,
                             figsize=(14, 4 * n_alt),
                             sharex=True)

    for row, alt in enumerate(altitudes):
        for col, (stats_list, species) in enumerate(zip((stats_n2o, stats_o3), ('N2O', 'O3'))):
            ax = axes[row, col]
            stats_df = stats_list[row]

            # plot each zone
            for idx, zone in enumerate(zones):
                grp = stats_df[stats_df[zone_col] == zone]
                color = palette[idx % len(palette)]
                alpha_ci = zone_ci_alpha.get(zone, default_alpha)
                ax.fill_between(
                    grp['year_month'], grp['ci_lower'], grp['ci_upper'],
                    color=color, alpha=alpha_ci, zorder=1
                )
                ax.plot(
                    grp['year_month'], grp['mean'],
                    color=color,
                    label=zone_labels.get(zone, f'Zone {zone}'),
                    zorder=2
                )

            # center title = pressure level
            ax.set_title(alt, fontsize=16)

            # y-label and y-limits
            ax.set_ylabel(ylabels[species])
            ax.set_ylim(ylims[species])

            # grid & x-axis formatting
            ax.grid(True)
            ax.xaxis.set_major_locator(mdates.YearLocator(2))
            ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
            if row != n_alt - 1:
                ax.tick_params(labelbottom=False)
            else:
                for lbl in ax.get_xticklabels():
                    lbl.set_rotation(45)
                    lbl.set_ha('right')

    # shared legend at bottom
    handles, labels = axes[0, 0].get_legend_handles_labels()
    fig.legend(handles, labels,
               loc='lower center', ncol=len(zones),
               fontsize=16, bbox_to_anchor=(0.5, 0.01))

    # tighten up spacing
    plt.tight_layout(rect=[0, 0.05, 1, 1])
    if save_path:
        plt.savefig(save_path, dpi=save_dpi)
    plt.show()


In [None]:
plot_monthly_zone_means_side_by_side(
    dfs_n2o=[data_22, data_32, data_46, data_68],
    dfs_o3 =[o3_22,  o3_32,  o3_46,  o3_68 ],
    altitudes=['22 hPa', '32 hPa', '46 hPa', '68 hPa'],
    value_col='mean_concentration',
    date_col='date',
    zone_col='latitude_zone',
    confidence=0.95,
    palette=None,
    zone_labels={1: 'Northern Hemisphere', 2: 'Tropics', 3: 'Southern Hemisphere'},
    zone_ci_alpha={3: 0.15},
    ylabels={
        'N2O': 'N₂O (ppbv)',
        'O3':  'O₃ (ppmv)'
    },
    save_path = None
)




In [None]:
def prepare_monthly_stats(
    df: pd.DataFrame,
    date_col: str = 'date',
    zone_col: str = 'latitude_zone',
    value_col: str = 'mean_concentration',
    confidence: float = 0.95
) -> pd.DataFrame:
    """
    Turn a raw DataFrame into monthly, per-zone means ± CI.
    """
    df = df.copy()
    df[date_col] = pd.to_datetime(df[date_col])
    df['year_month'] = df[date_col].dt.to_period('M').dt.to_timestamp()
    stats = (
        df.groupby([zone_col, 'year_month'])[value_col]
          .agg(['mean', 'std', 'count'])
          .reset_index()
    )
    alpha = 1 - confidence
    stats['ci_half'] = stats.apply(
        lambda r: ss.t.ppf(1 - alpha/2, df=r['count']-1) * r['std'] / (r['count']**0.5)
                  if r['count'] > 1 else 0,
        axis=1
    )
    stats['ci_lower'] = stats['mean'] - stats['ci_half']
    stats['ci_upper'] = stats['mean'] + stats['ci_half']

    
    return stats

In [None]:
stats68_n2o = prepare_monthly_stats(df = data_68)
stats46_n2o = prepare_monthly_stats(df = data_46)
stats32_n2o = prepare_monthly_stats(df = data_32)
stats22_n2o = prepare_monthly_stats(df = data_22)

In [None]:
stats68_n2o

In [None]:
stats68_o3 = prepare_monthly_stats(df = o3_68)
stats46_o3 = prepare_monthly_stats(df = o3_46)
stats32_o3 = prepare_monthly_stats(df = o3_32)
stats22_o3 = prepare_monthly_stats(df = o3_22)

In [None]:
def report_mann_kendall(
    stats_df,
    zone_col: str    = 'latitude_zone',
    date_col: str    = 'year_month',
    value_col: str   = 'mean'
):
    """
    Runs a Mann–Kendall test on stats_df[value_col] for each unique zone.
    Prints a formatted report per zone and returns a dict {zone: test_result}.
    """
    results = {}
    zones = sorted(stats_df[zone_col].unique())
    for zone in zones:
        df_zone = stats_df[stats_df[zone_col] == zone]
        series = (
            df_zone
            .sort_values(date_col)
            .set_index(date_col)[value_col]
        )
        res = mk.original_test(series)
        results[zone] = res

        # Print formatted report
        print(f"--- Zone {zone} ({value_col!r}) ---")
        print(f"Trend:      {res.trend}")
        print(f"P-value:    {res.p:.3e}")
        print(f"S (stat):   {res.s}")
        print(f"Z:          {res.z:.3f}")
        print(f"Tau:        {res.Tau}")
        print(f"Sen slope:  {res.slope}")
        print()

    return results


# N2O Mann-Kendall test results (also shown in Table S2)

In [None]:
report_mann_kendall(stats68_n2o)

In [None]:
report_mann_kendall(stats46_n2o)

In [None]:
report_mann_kendall(stats32_n2o)

In [None]:
report_mann_kendall(stats22_n2o)

# O3 Mann-Kendall test results (also shown in Table S2)

In [None]:
report_mann_kendall(stats68_o3)

In [None]:
report_mann_kendall(stats46_o3)

In [None]:
report_mann_kendall(stats32_o3)

In [None]:
report_mann_kendall(stats22_o3)

# Confidence interval statistics (also shown in Tables S3 and S4)

In [None]:
summary68 = stats68_n2o.groupby('latitude_zone').agg(
    conc_mean=('mean', 'mean'),
    ci_half_mean=('ci_half', 'mean'),
    ci_half_median=('ci_half', 'median'),
    ci_half_max=('ci_half', 'max'),
)
summary68['ci_norm'] = (
    summary68['ci_half_mean'] / summary68['conc_mean'] * 100 # in %
)
print(summary68)

In [None]:
summary46 = stats46_n2o.groupby('latitude_zone').agg(
    conc_mean=('mean', 'mean'),
    ci_half_mean=('ci_half', 'mean'),
    ci_half_median=('ci_half', 'median'),
    ci_half_max=('ci_half', 'max'),
)
summary46['ci_norm'] = (
    summary46['ci_half_mean'] / summary46['conc_mean'] * 100 # in %
)
print(summary46)

In [None]:
summary32 = stats32_n2o.groupby('latitude_zone').agg(
    conc_mean=('mean', 'mean'),
    ci_half_mean=('ci_half', 'mean'),
    ci_half_median=('ci_half', 'median'),
    ci_half_max=('ci_half', 'max'),
)
summary32['ci_norm'] = (
    summary32['ci_half_mean'] / summary32['conc_mean'] * 100 # in %
)
print(summary32)

In [None]:
summary22 = stats22_n2o.groupby('latitude_zone').agg(
    conc_mean=('mean', 'mean'),
    ci_half_mean=('ci_half', 'mean'),
    ci_half_median=('ci_half', 'median'),
    ci_half_max=('ci_half', 'max'),
)
summary22['ci_norm'] = (
    summary22['ci_half_mean'] / summary22['conc_mean'] * 100 # in %
)
print(summary22)

In [None]:
summary68_o3 = stats68_o3.groupby('latitude_zone').agg(
    conc_mean=('mean', 'mean'),
    ci_half_mean=('ci_half', 'mean'),
    ci_half_median=('ci_half', 'median'),
    ci_half_max=('ci_half', 'max'),
)
summary68_o3['ci_norm'] = (
    summary68_o3['ci_half_mean'] / summary68_o3['conc_mean'] * 100 # in %
)
print(summary68_o3)

In [None]:
summary46_o3 = stats46_o3.groupby('latitude_zone').agg(
    conc_mean=('mean', 'mean'),
    ci_half_mean=('ci_half', 'mean'),
    ci_half_median=('ci_half', 'median'),
    ci_half_max=('ci_half', 'max'),
)
summary46_o3['ci_norm'] = (
    summary46_o3['ci_half_mean'] / summary46_o3['conc_mean'] * 100 # in %
)
print(summary46_o3)

In [None]:
summary32_o3 = stats32_o3.groupby('latitude_zone').agg(
    conc_mean=('mean', 'mean'),
    ci_half_mean=('ci_half', 'mean'),
    ci_half_median=('ci_half', 'median'),
    ci_half_max=('ci_half', 'max'),
)
summary32_o3['ci_norm'] = (
    summary32_o3['ci_half_mean'] / summary32_o3['conc_mean'] * 100 # in %
)
print(summary32_o3)

In [None]:
summary22_o3 = stats22_o3.groupby('latitude_zone').agg(
    conc_mean=('mean', 'mean'),
    ci_half_mean=('ci_half', 'mean'),
    ci_half_median=('ci_half', 'median'),
    ci_half_max=('ci_half', 'max'),
)
summary22_o3['ci_norm'] = (
    summary22_o3['ci_half_mean'] / summary22_o3['conc_mean'] * 100 # in %
)
print(summary22_o3)