In [1]:
# File: preprocess_tnlwrf_monsoon.py

import xarray as xr

def preprocess_tnlwrf_monsoon(filepath):
    ds  = xr.open_dataset(filepath)
    var = ds['avg_tnlwrf']
    
    # rename if needed
    if 'valid_time' in var.dims:
        var = var.rename({'valid_time': 'time'})
    
    # compute daily mean
    var_daily   = var.resample(time='1D').mean()
    climatology = var_daily.mean(dim='time')
    anomalies   = var_daily - climatology
    anomalies.name = 'avg_tnlwrf'
    
    # select only monsoon months (June–September)
    monsoon = anomalies.sel(time=anomalies['time.month'].isin([6,7,8,9]))
    
    # write monsoon-only anomalies
    monsoon.to_netcdf('tnlwrf_anomalies_monsoon_2001_2015.nc')
    print("✅ Saved monsoon anomalies to 'tnlwrf_anomalies_monsoon_2001_2015.nc'")

if __name__ == "__main__":
    preprocess_tnlwrf_monsoon('OLR_data2.nc')


✅ Saved monsoon anomalies to 'tnlwrf_anomalies_monsoon_2001_2015.nc'


In [2]:
# File: eof_clustering_monsoon.py

import numpy as np
import xarray as xr
from eofs.xarray import Eof
from sklearn.cluster import KMeans

def run_eof_clustering_monsoon():
    # read monsoon anomalies
    data = xr.open_dataset('tnlwrf_anomalies_monsoon_2001_2015.nc')['avg_tnlwrf']
    if 'valid_time' in data.dims:
        data = data.rename({'valid_time': 'time'})
    
    # drop any day that's all-NaN
    data = data.dropna(dim='time', how='all')
    print("✅ Cleaned monsoon data shape:", data.shape)
    
    # EOF decomposition
    solver   = Eof(data)
    pcs      = solver.pcs(npcs=7, pcscaling=1)
    variance = solver.varianceFraction().values[:7]
    
    # save PCs & explained variance
    np.save('pcs_tnlwrf_monsoon.npy', pcs.values)
    np.save('explained_variance_tnlwrf_monsoon.npy', variance)
    
    # cluster in PC space
    pcs_norm = (pcs.values - pcs.values.mean(axis=0)) / pcs.values.std(axis=0)
    labels   = KMeans(n_clusters=4, n_init=10, random_state=42).fit(pcs_norm).labels_
    
    # write regimes
    regimes_ds = xr.Dataset(
        {'regime': (['time'], labels)},
        coords={'time': data['time']}
    )
    regimes_ds.to_netcdf('eof_weather_regimes_tnlwrf_monsoon.nc')
    print("✅ EOF clustering on monsoon data saved to 'eof_weather_regimes_tnlwrf_monsoon.nc'")

if __name__ == "__main__":
    run_eof_clustering_monsoon()


✅ Cleaned monsoon data shape: (1220, 129, 121)
✅ EOF clustering on monsoon data saved to 'eof_weather_regimes_tnlwrf_monsoon.nc'


In [9]:
# File: plot_monsoon_tnlwrf.py

import os
import numpy as np
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cartopy.crs as ccrs
from scipy.interpolate import interp1d

def ensure_time(ds):
    if 'valid_time' in ds.dims:
        return ds.rename({'valid_time': 'time'})
    return ds

def plot_regime_frequency(ds):
    labels, counts = np.unique(ds['regime'].values, return_counts=True)
    plt.figure(figsize=(7, 5))
    sns.barplot(x=labels, y=counts, palette="deep")
    plt.title("Monsoon Regime Frequency (JJAS 2001–2015)", fontsize=14)
    plt.xlabel("Regime", fontsize=12)
    plt.ylabel("Days", fontsize=12)
    plt.tight_layout()
    plt.savefig('plots_tnlwrf_monsoon/regime_frequency_monsoon.png', dpi=300)
    plt.close()

def plot_spatial_composites(ds):
    for r in np.unique(ds['regime'].values):
        with xr.open_dataset('tnlwrf_anomalies_monsoon_2001_2015.nc') as anomalies:
            anom = ensure_time(anomalies)['avg_tnlwrf'].dropna(dim='time', how='all')
            comp = anom.where(ds['regime'] == r).mean(dim='time')

            plt.figure(figsize=(8, 6))
            ax = plt.axes(projection=ccrs.PlateCarree())
            comp.plot.contourf(
                ax=ax, transform=ccrs.PlateCarree(),
                levels=np.arange(-20, 20.1, 2),
                cmap='RdBu_r', extend='both', add_colorbar=True
            )
            ax.coastlines()
            ax.set_title(f"Regime {r}: Mean Monsoon Anomaly", fontsize=14)
            plt.tight_layout()
            plt.savefig(f'plots_tnlwrf_monsoon/regime_{r}_composite_monsoon.png', dpi=300)
            plt.close()

def plot_pc_timeseries(pcs, nao_index, enso_df):
    with xr.open_dataset('tnlwrf_anomalies_monsoon_2001_2015.nc') as anomalies:
        times = ensure_time(anomalies).dropna(dim='time', how='all')['time'].values

    explained_variance = np.load('explained_variance_tnlwrf_monsoon.npy')
    pc1_var = explained_variance[0] * 100

    # prepare ENSO for full-year (we’ll re‐slice later)
    enso_df['date'] = pd.to_datetime(enso_df[['Year', 'Month']].assign(day=1)) + pd.offsets.MonthEnd(0)
    enso = enso_df.set_index('date')['Anomaly']

    plt.figure(figsize=(10, 5))
    plt.plot(times, pcs[:, 0], label='PC1 (EOF1)', linewidth=1.5)
    plt.plot(nao_index['time'].values, nao_index['nao'].values, label='NAO Index', linewidth=1.5)
    # plot full-year ENSO here if you want; but for JJAS you’ll do correlation separately
    plt.legend()
    plt.title(f"PC1 vs NAO & ENSO (JJAS)\nPC1 Var = {pc1_var:.2f}%", fontsize=14)
    plt.xlabel("Time", fontsize=12)
    plt.ylabel("Index Value", fontsize=12)
    plt.tight_layout()
    plt.savefig('plots_tnlwrf_monsoon/pc1_vs_enso_nao_monsoon.png', dpi=300)
    plt.close()

def plot_seasonal_cycle(ds):
    ds = ensure_time(ds)
    months  = ds['time'].dt.month.values
    regimes = ds['regime'].values
    plt.figure(figsize=(10, 5))
    for r in np.unique(regimes):
        monthly_counts = [( (months==m) & (regimes==r) ).sum() for m in range(1, 13)]
        plt.plot(range(1, 13), monthly_counts, label=f'Regime {r}')
    plt.xticks(range(1, 13), ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
    plt.legend()
    plt.title("Seasonal Cycle of Monsoon Regimes", fontsize=14)
    plt.xlabel("Month", fontsize=12)
    plt.ylabel("Days", fontsize=12)
    plt.tight_layout()
    plt.savefig('plots_tnlwrf_monsoon/seasonal_cycle_monsoon.png', dpi=300)
    plt.close()

def plot_pc_index_correlation(pcs, index_array, label, file_name,
                              time_values=None, index_time=None):
    # if monthly alignment is requested...
    if time_values is not None and index_time is not None \
       and index_array.shape[0] != pcs.shape[0]:

        # 1) build daily‐PC DataFrame
        df = pd.DataFrame(pcs, columns=[f"PC{i+1}" for i in range(pcs.shape[1])])
        df['date'] = pd.to_datetime(time_values)
        df = df.set_index('date')

        # 2) resample to end‐of‐month, THEN keep only JJAS
        pcs_monthly = df.resample('M').mean()
        pcs_monsoon = pcs_monthly[pcs_monthly.index.month.isin([6,7,8,9])]

        # 3) build ENSO/NAO series, indexed by those same JJAS months
        index_ser = pd.Series(index_array, index=index_time)
        index_ser = index_ser.loc[pcs_monsoon.index]

        # 4) compute correlations
        corrs = [
            np.corrcoef(pcs_monsoon.iloc[:, i], index_ser)[0, 1]
            for i in range(pcs.shape[1])
        ]

    else:
        # daily‐to‐daily correlation
        corrs = [
            np.corrcoef(pcs[:, i], index_array)[0, 1]
            for i in range(pcs.shape[1])
        ]

    # plotting unchanged
    plt.figure(figsize=(8, 5))
    sns.barplot(x=[f'PC{i+1}' for i in range(len(corrs))],
                y=corrs, palette="deep")
    plt.ylim(-1, 1)
    plt.title(f"Correlation between PCs and {label} (JJAS)", fontsize=14)
    plt.xlabel("Principal Components", fontsize=12)
    plt.ylabel("Pearson Correlation", fontsize=12)
    plt.tight_layout()
    plt.savefig(f'plots_tnlwrf_monsoon/{file_name}_monsoon.png', dpi=300)
    plt.close()


def plot_spatial_composites_new(ds):
    pcs = np.load('pcs_tnlwrf_monsoon.npy')
    var = np.load('explained_variance_tnlwrf_monsoon.npy')
    var = var / var.sum()
    labels = ds['regime'].values

    regime_map = {}
    for r in np.unique(labels):
        arr = pcs[labels == r]
        pc_var = np.var(arr, axis=0)
        weights = pc_var / pc_var.sum()
        regime_map[r] = np.sum(weights * var)

    for r in np.unique(labels):
        with xr.open_dataset('tnlwrf_anomalies_monsoon_2001_2015.nc') as anomalies:
            anom = ensure_time(anomalies)['avg_tnlwrf'].dropna(dim='time', how='all')
            comp = anom.where(ds['regime'] == r).mean(dim='time')

            plt.figure(figsize=(8, 6))
            ax = plt.axes(projection=ccrs.PlateCarree())
            comp.plot.contourf(
                ax=ax, transform=ccrs.PlateCarree(),
                levels=np.arange(-20,20.1,2),
                cmap='RdBu_r', extend='both', add_colorbar=True
            )
            ax.coastlines()
            pct = regime_map[r] * 100
            ax.set_title(f"Regime {r}: Mean Monsoon Anomaly\n(Weighted Var ≈ {pct:.2f}%)", fontsize=13)
            plt.tight_layout()
            plt.savefig(f'plots_tnlwrf_rand_monsoon/regime_{r}_composite_monsoon.png', dpi=300)
            plt.close()

def main():
    os.makedirs("plots_tnlwrf_monsoon", exist_ok=True)
    os.makedirs("plots_tnlwrf_rand_monsoon", exist_ok=True)

    # load your regimes + pcs
    with xr.open_dataset('eof_weather_regimes_tnlwrf_monsoon.nc') as ds:
        ds = ensure_time(ds)
        pcs = np.load('pcs_tnlwrf_monsoon.npy')

    # get the daily monsoon time‐axis
    with xr.open_dataset('tnlwrf_anomalies_monsoon_2001_2015.nc') as anom:
        anom = ensure_time(anom).dropna(dim='time', how='all')
        time_vals = anom['time'].values

    # plot frequency & composites
    plot_regime_frequency(ds)
    plot_spatial_composites(ds)

    # load NAO; interpolate to daily and then do daily correlation
    with xr.open_dataset('nao_index.nc') as nao_index:
        f_nao     = interp1d(
            nao_index['time'].values.astype(np.int64),
            nao_index['nao'].values,
            kind='linear', fill_value="extrapolate"
        )
        nao_daily = f_nao(time_vals.astype(np.int64))
        plot_pc_index_correlation(
            pcs, nao_daily,
            "NAO Index", "pc_nao_correlation"
        )

    # load ENSO, then filter to JJAS‐months only
    enso_df = pd.read_csv('Enso_Monthwise_Index.csv')
    enso_df['date'] = pd.to_datetime(enso_df[['Year','Month']].assign(day=1)) + pd.offsets.MonthEnd(0)
    enso = enso_df.set_index('date')['Anomaly']
    enso_monsoon = enso[enso.index.month.isin([6,7,8,9])]

    plot_pc_index_correlation(
        pcs,
        enso_monsoon.values,
        "ENSO Index", "pc_enso_correlation",
        time_values=time_vals,
        index_time=enso_monsoon.index.values
    )

    # seasonal cycle & weighted EOF composites
    plot_seasonal_cycle(ds)
    plot_spatial_composites_new(ds)

if __name__ == "__main__":
    main()



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=labels, y=counts, palette="deep")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=[f'PC{i+1}' for i in range(len(corrs))],
  pcs_monthly = df.resample('M').mean()

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=[f'PC{i+1}' for i in range(len(corrs))],
