This code loads the combined ground observations parquet saved from previous notebook. Then load direct from folders the TIE-GCM. Then compare the two ouputs. 

In [None]:
import numpy as np
import pandas as pd
import xarray as xr
import os
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import matplotlib.animation as animation
from scipy.interpolate import RegularGridInterpolator, interp1d

# --- LOAD OBSERVATIONAL DATA ---
df = pd.read_parquet("combined_ionex_vtec.parquet")
df= df*0.04
# --- LOAD MODEL DATA ---
model_directory = './ionosphere_central/CCMC/model_data/TIE-GCM/129/Akshay_Ramesh_042125_IT_5/'
model_files = sorted([f for f in os.listdir(model_directory) if not f.endswith('.json')])

model_grids, model_times = [], []
for fname in model_files:
    fpath = os.path.join(model_directory, fname)
    try:
        ds = xr.open_dataset(fpath)
        if 'TEC' not in ds.variables:
            print(f"Skipping {fname} (no 'TEC' variable)")
            continue
        tec = ds['TEC'].values
        if 'time' in ds:
            times = pd.to_datetime(ds['time'].values)
            for i in range(tec.shape[0]):
                model_grids.append(tec[i]/1e12)  # Convert to TECU if data is in 1e12 el/m^2
                model_times.append(times[i])
        else:
            model_grids.append(tec/1e12)
            model_times.append(None)
    except Exception as e:
        print(f"Failed to load {fname}: {e}")

if len(model_grids) == 0:
    raise RuntimeError("No valid model grids found! Please check files and data.")

model_grids = np.stack(model_grids)     # (n_times, lat, lon)
model_times = pd.to_datetime([t if t is not None else pd.NaT for t in model_times])
model_lats = ds['lat'].values
model_lons = ds['lon'].values

# --- PREP OBSERVATIONAL DATA ---
lats = sorted(set([lat for lat, _ in df.columns]))
lons = sorted(set([lon for _, lon in df.columns]))
obs_epochs = pd.to_datetime(df.index)
df = df.sort_index(axis=1, level=['lat', 'lon'])
obs_grids = np.stack([df.loc[epoch].values.reshape((len(lats), len(lons))) for epoch in df.index], axis=0)

# --- UNIT CONVERSION FOR OBS (if needed) ---
if np.nanmax(obs_grids) > 200:  # Likely in raw units, not TECU
    print("Converting obs_grids from VTEC units to TECU")
    obs_grids = obs_grids / 1e16  # Convert VTEC to TECU

# --- INTERPOLATE OBSERVATIONS ONTO MODEL GRID ---
obs_on_model = []
for g in obs_grids:
    interp_func = RegularGridInterpolator((lats, lons), g, bounds_error=False, fill_value=np.nan)
    mesh = np.meshgrid(model_lats, model_lons, indexing='ij')
    pts = np.column_stack([m.ravel() for m in mesh])
    out = interp_func(pts).reshape(len(model_lats), len(model_lons))
    obs_on_model.append(out)
obs_on_model = np.stack(obs_on_model)   # (n_obs_times, lat, lon)

# --- SYNC TIME AXES: Interpolate both datasets to the same time axis ---
common_times = sorted(set(model_times.dropna()).intersection(set(obs_epochs)))
if len(common_times) == 0:
    raise RuntimeError("No overlapping times found between obs and model!")
common_times = pd.to_datetime(common_times)

def interp_time(series_times, series_grids, new_times):
    series_times = pd.to_datetime(series_times)
    keep = ~pd.isnull(series_times)
    series_times = series_times[keep]
    series_grids = series_grids[keep]
    if len(series_grids) == 1:
        arr = np.repeat(series_grids, len(new_times), axis=0)
        return arr
    interp = interp1d(
        [pd.Timestamp(t).timestamp() for t in series_times],
        series_grids, axis=0, kind='linear', bounds_error=False, fill_value="extrapolate"
    )
    return interp([pd.Timestamp(t).timestamp() for t in new_times])

obs_interp = interp_time(obs_epochs, obs_on_model, common_times)
model_interp = interp_time(model_times, model_grids, common_times)

# --- DIAGNOSTICS ---
print("\nDIAGNOSTICS:")
print("Obs Interp Stats:   min", np.nanmin(obs_interp), "max", np.nanmax(obs_interp), "mean", np.nanmean(obs_interp))
print("Model Interp Stats: min", np.nanmin(model_interp), "max", np.nanmax(model_interp), "mean", np.nanmean(model_interp))
print("Obs shape:", obs_interp.shape, "Model shape:", model_interp.shape)

# --- ANIMATION OF OBSERVATION vs. MODEL ---
import matplotlib
matplotlib.use('Agg')

# Use a robust color scale
vmin = min(np.nanmin(obs_interp), np.nanmin(model_interp))
vmax = max(np.nanmax(obs_interp), np.nanmax(model_interp))

fig, axes = plt.subplots(
    1, 2, figsize=(14, 6), subplot_kw={'projection': ccrs.PlateCarree()}
)
titles = ["Observations", "TIE-GCM Model"]
for ax, title in zip(axes, titles):
    ax.coastlines()
    ax.set_global()
    ax.set_title(title)

# Use initial images then update in animation loop
im_obs = axes[0].pcolormesh(model_lons, model_lats, obs_interp[0], transform=ccrs.PlateCarree(),
                            shading='auto', vmin=vmin, vmax=vmax, cmap='viridis')
im_mod = axes[1].pcolormesh(model_lons, model_lats, model_interp[0], transform=ccrs.PlateCarree(),
                            shading='auto', vmin=vmin, vmax=vmax, cmap='viridis')

cb = fig.colorbar(im_obs, ax=axes, orientation='horizontal', fraction=0.04, pad=0.09, label='TEC (TECU)')

def update(idx):
    # For pcolormesh in Cartopy/Matplotlib, update data with flatten() or ravel()
    im_obs.set_array(obs_interp[idx].ravel())
    im_mod.set_array(model_interp[idx].ravel())
    axes[0].set_title(f"Observations\n{common_times[idx]}")
    axes[1].set_title(f"TIE-GCM Model\n{common_times[idx]}")
    return [im_obs, im_mod]

ani = animation.FuncAnimation(
    fig, update, frames=len(common_times), interval=150, blit=False
)
ani.save('obs_vs_model.gif', writer='pillow', fps=8)
plt.close(fig)
print(f"\nSaved synchronized animation: obs_vs_model.gif")
