In [None]:
#This notebook produces seasonal cycles (monthly mean detrended concentrations ± 95% CI) for all individual locations in NH, tropical, and SH regions (Supplementary Figs. S2-S4).

import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import calendar
from scipy import stats as ss
plt.rcParams['figure.dpi'] = 100
plt.rcParams['lines.markersize'] = 4
plt.rcParams['font.size'] = 14
plt.rcParams['axes.prop_cycle'] = plt.cycler('color', 'brgmyk')


In [None]:
from pathlib import Path
DATA_DIR = Path("./trends/monthly_averages")

In [None]:
# read files with monthly average values
data_68 = pd.read_csv(DATA_DIR / 'monthly_averages_68_n2o.csv')
data_46 = pd.read_csv(DATA_DIR / 'monthly_averages_46_n2o.csv')
data_32 = pd.read_csv(DATA_DIR / 'monthly_averages_32_n2o.csv')
data_22 = pd.read_csv(DATA_DIR / 'monthly_averages_22_n2o.csv')

In [None]:
data_68['date'] = pd.to_datetime(data_68['date'])
data_46['date'] = pd.to_datetime(data_46['date'])
data_32['date'] = pd.to_datetime(data_32['date'])
data_22['date'] = pd.to_datetime(data_22['date'])

In [None]:
o3_68 = pd.read_csv(DATA_DIR / 'monthly_averages_68_o3.csv')
o3_46 = pd.read_csv(DATA_DIR / 'monthly_averages_46_o3.csv')
o3_32 = pd.read_csv(DATA_DIR / 'monthly_averages_32_o3.csv')
o3_22 = pd.read_csv(DATA_DIR / 'monthly_averages_22_o3.csv')

In [None]:
o3_68['date'] = pd.to_datetime(o3_68['date'])
o3_46['date'] = pd.to_datetime(o3_46['date'])
o3_32['date'] = pd.to_datetime(o3_32['date'])
o3_22['date'] = pd.to_datetime(o3_22['date'])

In [None]:
data_68 = data_68[data_68['count'] >= 5]
data_46 = data_46[data_46['count'] >= 5]
data_32 = data_32[data_32['count'] >= 5]
data_22 = data_22[data_22['count'] >= 5]

In [None]:
o3_68 = o3_68[o3_68['count'] >= 5]
o3_46 = o3_46[o3_46['count'] >= 5]
o3_32 = o3_32[o3_32['count'] >= 5]
o3_22 = o3_22[o3_22['count'] >= 5]

In [None]:
n2o_dataframes = [data_22, data_32, data_46, data_68]
o3_dataframes = [o3_22, o3_32, o3_46, o3_68]
#altitudes = [68, 46, 32, 22]


In [None]:
def add_detrended(df, date_column='date', group_col='location', 
                           value_col='mean_concentration', detrend_col='detrended_concentration'):
    """
    Adds a new column to the DataFrame that contains the values in value_col detrended
    by a linear fit performed separately for each group in group_col. The detrended values 
    are anchored at the center of the time series (so that the data remain on an absolute scale).
    
    NaN values in the value_col are ignored during the detrending calculation and remain NaN 
    in the resulting detrended column.
    
    Parameters:
      - df (DataFrame): Input DataFrame containing at least the date, group, and value columns.
      - date_column (str): Column name for the date. (Dates will be converted to datetime if needed.)
      - group_col (str): Column name on which to group the data (e.g. 'location').
      - value_col (str): Column name of the values to detrend (e.g. 'mean_concentration').
      - detrend_col (str): Name of the new column that will store the detrended values.
    
    Returns:
      - DataFrame: A copy of the original DataFrame with an additional column for detrended values.
    """
    # Create a copy of the dataframe to avoid modifying the original data.
    df = df.copy()
    
    # Ensure the date column is in datetime format.
    df[date_column] = pd.to_datetime(df[date_column])
    
    # Sort the DataFrame by group and date.
    df.sort_values([group_col, date_column], inplace=True)
    
    # Create the new column initialized with NaNs.
    df[detrend_col] = np.nan
    
    # Process each group separately.
    for loc, group in df.groupby(group_col):
        # Identify the valid (non-NaN) values.
        valid_mask = group[value_col].notna()
        if valid_mask.sum() < 2:
            # If there are fewer than 2 valid points, we can't fit a trend; skip this group.
            continue

        # Work only with the valid entries.
        sub = group.loc[valid_mask]
        # Convert dates to ordinal numbers for fitting.
        time_ord = sub[date_column].apply(lambda x: x.toordinal()).values
        values = sub[value_col].values
        
        # Fit a linear trend: values = a * time + b.
        coeffs = np.polyfit(time_ord, values, 1)
        
        # Define a baseline using the mean ordinal value so that the detrended data are 
        # adjusted relative to the center of the time period.
        t_center = time_ord.mean()
        baseline = np.polyval(coeffs, t_center)
        
        # Calculate the detrended values.
        # This subtracts the deviation of the trend from its baseline.
        detrended_values = values - (np.polyval(coeffs, time_ord) - baseline)
        
        # Assign the detrended values back to the original DataFrame.
        df.loc[sub.index, detrend_col] = detrended_values
    
    return df


In [None]:
# Define a mapping from internal location names to reader-friendly names.
display_names = {
    'bashkortostan': 'Bashkortostan',
    'bozeman': 'Montana',
    'brunei': 'Borneo',
    'california': 'California',
    'catalonia': 'Catalonia',
    'colombia': 'Colombia',
    'estonia': 'Estonia',
    'finland': 'Finland',
    'florianopolis': 'Santa Catarina',
    'florida': 'Florida',
    'france': 'France',
    'french_guiana': 'French Guiana',
    'huntingdon': 'Southern Québec',
    'iceland_e': 'Iceland (E)',
    'iceland_w': 'Iceland (W)',
    'khabarovsk': 'Khabarovsk',
    'kongo': 'Congo',
    'kyrgyzstan': 'Kyrgyzstan',
    'mexico': 'Xochimilco, Mexico City',
    'morocco': 'Morocco',
    'mukhrino': 'Mukhrino',
    'myanmar': 'Myanmar',
    'nz_n': 'New Zealand (N)',
    'nz_s': 'New Zealand (S)',
    'pantanal': 'Pantanal',
    'quistococha': 'Peruvian Amazon',
    'romania': 'Romania',
    'taiwan': 'Taiwan',
    'tarapoto': 'Tarapoto',
    'tasmania': 'Tasmania',
    'tierra_del_fuego': 'Tierra del Fuego',
    'uganda_e': 'Uganda (E)',
    'uganda_n': 'Uganda (N)',
    'uganda_s': 'Uganda (S)',
    'wales': 'Wales'
    # Add more mappings as needed.
}

In [None]:
# Initialize a new list to hold the updated DataFrames.
n2o_detrended = []

# Loop over each DataFrame, apply the detrending function, and append the result to the new list.
for df in n2o_dataframes:
    new_df = add_detrended(df, 
                           date_column='date', 
                           group_col='location', 
                           value_col='mean_concentration', 
                           detrend_col='detrended_concentration')
    n2o_detrended.append(new_df)

In [None]:
# Initialize a new list to hold the updated DataFrames.
o3_detrended = []

# Loop over each DataFrame, apply the detrending function, and append the result to the new list.
for df in o3_dataframes:
    new_df = add_detrended(df, 
                           date_column='date', 
                           group_col='location', 
                           value_col='mean_concentration', 
                           detrend_col='detrended_concentration')
    o3_detrended.append(new_df)

In [None]:


def plot_monthly_means_by_location_across_period(
    n2o_dfs: list,
    o3_dfs: list,
    location_inds: list,
    display_names: dict,
    altitudes: list = ('22 hPa', '32 hPa', '46 hPa', '68 hPa'),
    value_col: str = 'detrended_concentration',
    date_col: str = 'date',
    location_col: str = 'location',
    confidence: float = 0.95,
    palette: list = None,
    loc_ci_alpha: dict = None,
    show_errorbar: bool = False,
    save_path=None,
    save_dpi=300
) -> None:
    """
    Plots mean detrended concentrations and 95% CI by month-of-year for chosen locations
    across four altitudes (rows) and two species (cols: N2O left, O3 right).
    Uses a colorblind-friendly palette by default, and replaces raw indicators with display names.
    """
    # Default colorblind-safe palette (Paul Tol)
    default_palette = ['#377eb8', '#ff7f00', '#4daf4a', '#f781bf',
                       '#a65628', '#984ea3', '#999999', '#e41a1c', '#dede00']
    palette = palette or default_palette
    loc_ci_alpha = loc_ci_alpha or {}
    default_alpha = 0.3

    def prepare_monthly(df):
        df = df.copy()
        df[date_col] = pd.to_datetime(df[date_col])
        df['month'] = df[date_col].dt.month
        stats_df = (
            df.groupby([location_col, 'month'])[value_col]
              .agg(['mean', 'std', 'count'])
              .reset_index()
        )
        alpha = 1 - confidence
        stats_df['ci_half'] = stats_df.apply(
            lambda r: ss.t.ppf(1 - alpha/2, df=r['count']-1) * r['std'] / (r['count']**0.5)
            if r['count'] > 1 else 0,
            axis=1
        )
        stats_df['ci_lower'] = stats_df['mean'] - stats_df['ci_half']
        stats_df['ci_upper'] = stats_df['mean'] + stats_df['ci_half']
        return stats_df

    # Compute stats
    stats_n2o = [prepare_monthly(df) for df in n2o_dfs]
    stats_o3  = [prepare_monthly(df) for df in o3_dfs]

    # Month labels
    month_labels = list(calendar.month_abbr)[1:]

    # Setup figure
    fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(14, 16), sharex=True)

    # Plot per altitude and species
    for i, alt in enumerate(altitudes):
        for j, stats_list in enumerate((stats_n2o, stats_o3)):
            ax = axes[i, j]
            for idx, loc in enumerate(location_inds):
                display = display_names.get(loc, loc)
                df_stats = stats_list[i]
                grp = df_stats[df_stats[location_col] == loc].sort_values('month')
                color = palette[idx % len(palette)]
                a_ci = loc_ci_alpha.get(loc, default_alpha)

                ax.fill_between(grp['month'], grp['ci_lower'], grp['ci_upper'],
                                color=color, alpha=a_ci, zorder=1)
                if show_errorbar:
                    ax.errorbar(grp['month'], grp['mean'], yerr=grp['ci_half'],
                                fmt='none', ecolor=color, alpha=a_ci, capsize=3, zorder=2)
                ax.plot(grp['month'], grp['mean'], color=color, label=display, zorder=3)

            # Title and labels
            ax.set_title(f"{alt}")
            ax.grid(True)
            if j == 0:
                ax.set_ylabel("N2O Concentration (ppbv)")
            else:
                ax.set_ylabel("O3 Concentration (ppmv)")
            if i < 3:
                ax.tick_params(labelbottom=False)

    # X-axis labels on bottom row
    for j in range(2):
        axes[3, j].set_xticks(range(1, 13))
        axes[3, j].set_xticklabels(month_labels, rotation=45)

    # Shared legend
    handles, labels = axes[0, 0].get_legend_handles_labels()
    fig.subplots_adjust(bottom=0.12)
    fig.legend(handles, labels, loc='lower center',
               ncol=5, fontsize=10, bbox_to_anchor=(0.5, 0.02))
    if save_path:
        plt.savefig(save_path, dpi=save_dpi)
    plt.show()


In [None]:
locations_sh = ['tierra_del_fuego', 'tasmania', 'nz_n', 'nz_s']
locations_eq = ['brunei', 'colombia', 'french_guiana', 'kongo', 'mexico', 'myanmar', 'pantanal', 
                'quistococha', 'taiwan', 'tarapoto', 'uganda_e', 'uganda_n', 'uganda_s']
locations_nh = ['bashkortostan', 'bozeman', 'california', 'catalonia', 'estonia', 'finland',
                'florida', 'france', 'huntingdon', 'iceland_e', 'iceland_w', 'khabarovsk', 
                'kyrgyzstan', 'morocco', 'mukhrino', 'romania', 'wales']

In [None]:
plot_monthly_means_by_location_across_period(
    n2o_detrended,
    o3_detrended,
    location_inds=locations_sh,
    display_names=display_names,
    show_errorbar=True,
    save_path = None
)


In [None]:
plot_monthly_means_by_location_across_period(
    n2o_detrended,
    o3_detrended,
    location_inds=locations_eq,
    display_names=display_names,
    show_errorbar=True,
    save_path = None
)


In [None]:
plot_monthly_means_by_location_across_period(
    n2o_detrended,
    o3_detrended,
    location_inds=locations_nh,
    display_names=display_names,
    show_errorbar=True,
    save_path = None
)
