# Radiative heating regressed with precipitation

## Import package

In [17]:
import h5py
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt

from tqdm import tqdm
from itertools import product

## Load data

In [18]:
# Load CloudSat data
CLOUDSAT_PATH = "/work/b11209013/2025_Research/CloudSat/CloudSat_sub/"

lw = {}; sw = {}

for central_lon in range(0, 341, 20):
    
    with xr.open_dataset(f"{CLOUDSAT_PATH}qlw.nc", chunks={}) as f:
        lon_centered = (f["lon"] - central_lon + 180) % 360 - 180
        f = f.assign_coords(lon=lon_centered).sortby("lon")

        lw[str(central_lon)] = f["qlw"].values
    
    with xr.open_dataset(f"{CLOUDSAT_PATH}qsw.nc", chunks={}) as f:
        lon_centered = (f["lon"] - central_lon + 180) % 360 - 180
        f = f.assign_coords(lon=lon_centered).sortby("lon")

        sw[str(central_lon)] = f["qsw"].values

# Load IMERG time series data
IMERG_PATH = "/home/b11209013/2025_Research/Obs/Files/IMERG/Hovmoller.h5"

Hov = {}

with h5py.File(IMERG_PATH, "r") as f:
    lon = np.array(f.get("lon"))

    hov_grp = f.get("precip")

    Hov = {key: np.array(hov_grp.get(key)) for key in hov_grp.keys()}

In [19]:
print(Hov.keys())

dict_keys(['kw_11_13', 'kw_1_3', 'kw_3_5', 'kw_5_7', 'kw_7_9', 'kw_9_11', 'mjo_1_4'])


## Compute regression

### Define functions

In [20]:
import numpy as np

def regression_slope(x, y):
    """Compute regression slope of y onto 1D x, vectorized over all grid points.
    
    Parameters
    ----------
    x : array_like, shape (nt,)
        Predictor time series.
    y : array_like, shape (nt, ...) 
        Predictand field with the same time dimension as x. Can be 2D, 3D, etc.
    
    Returns
    -------
    slope : ndarray, shape y.shape[1:]
        Regression slope at each grid point, with NaNs where regression is not defined.
    """
    # Convert to arrays
    x = np.asarray(x, dtype=float)
    y = np.asarray(y, dtype=float)

    # Basic checks
    if x.ndim != 1:
        # Allow x to be (nt, 1, ..., 1) but collapse it
        x = x.reshape(x.shape[0], -1)
        if x.shape[1] != 1:
            raise ValueError("This implementation assumes x is a 1D time series (nt,).")
        x = x[:, 0]

    if y.shape[0] != x.shape[0]:
        raise ValueError("Time dimension of x and y must match (x.shape[0] == y.shape[0]).")

    nt = x.shape[0]
    # Flatten spatial dims of y: (nt, npoints)
    y_flat = y.reshape(nt, -1)                      # (nt, M)
    M = y_flat.shape[1]

    # Broadcast x to shape (nt, M)
    x2d = x[:, None]                                # (nt, 1)

    # Valid (non-NaN) mask per time & point
    mask = ~np.isnan(y_flat) & ~np.isnan(x2d)       # (nt, M)

    # Number of valid samples per point
    n = np.sum(mask, axis=0)                        # (M,)

    # Zero out invalid entries for sums
    x_masked = np.where(mask, x2d, 0.0)             # (nt, M)
    y_masked = np.where(mask, y_flat, 0.0)          # (nt, M)

    # Sums over time for each grid point
    sum_x  = np.sum(x_masked, axis=0)               # (M,)
    sum_y  = np.sum(y_masked, axis=0)               # (M,)
    sum_xx = np.sum(x_masked * x_masked, axis=0)    # (M,)
    sum_xy = np.sum(x_masked * y_masked, axis=0)    # (M,)

    # Closed-form slope per grid point
    denom = n * sum_xx - sum_x**2
    numer = n * sum_xy - sum_x * sum_y

    slope_flat = np.full(M, np.nan, dtype=float)
    valid_reg = (n > 1) & (denom != 0.0)
    slope_flat[valid_reg] = numer[valid_reg] / denom[valid_reg]

    # Back to original spatial shape
    slope = slope_flat.reshape(y.shape[1:])
    return slope


### Compute regression

In [21]:
# set cross iters
cross_iters = list(product(lw.keys(), Hov.keys()))

# Compute regression
lw_reg = {}
sw_reg = {}

for lw_key in lw.keys():
    # preallocate dict
    lw_reg[str(lw_key)] = {}
    sw_reg[str(lw_key)] = {}

    # find longitude index
    lon_idx = np.argmin(np.abs(lon - (int(lw_key))))

    for hov_key in tqdm(Hov.keys(), desc=f"Processing LW key {lw_key}"):
        Hov_ts = Hov[hov_key][:, lon_idx]

        lw_reg[str(lw_key)][str(hov_key)] = regression_slope(Hov_ts[:,None,None], lw[lw_key])
        sw_reg[str(lw_key)][str(hov_key)] = regression_slope(Hov_ts[:,None,None], sw[lw_key])

Processing LW key 0: 100%|██████████| 7/7 [00:10<00:00,  1.45s/it]
Processing LW key 20: 100%|██████████| 7/7 [00:10<00:00,  1.44s/it]
Processing LW key 40: 100%|██████████| 7/7 [00:10<00:00,  1.44s/it]
Processing LW key 60: 100%|██████████| 7/7 [00:10<00:00,  1.44s/it]
Processing LW key 80: 100%|██████████| 7/7 [00:10<00:00,  1.44s/it]
Processing LW key 100: 100%|██████████| 7/7 [00:10<00:00,  1.44s/it]
Processing LW key 120: 100%|██████████| 7/7 [00:10<00:00,  1.45s/it]
Processing LW key 140: 100%|██████████| 7/7 [00:10<00:00,  1.45s/it]
Processing LW key 160: 100%|██████████| 7/7 [00:10<00:00,  1.45s/it]
Processing LW key 180: 100%|██████████| 7/7 [00:10<00:00,  1.46s/it]
Processing LW key 200: 100%|██████████| 7/7 [00:10<00:00,  1.46s/it]
Processing LW key 220: 100%|██████████| 7/7 [00:10<00:00,  1.45s/it]
Processing LW key 240: 100%|██████████| 7/7 [00:10<00:00,  1.47s/it]
Processing LW key 260: 100%|██████████| 7/7 [00:10<00:00,  1.45s/it]
Processing LW key 280: 100%|██████████| 

### Compute average values

In [22]:
lw_reg_composite = {
    hov_key: np.nanmean(
        np.array([lw_reg[lw_key][hov_key] for lw_key in lw.keys()]),
        axis=0
    )
    for hov_key in Hov.keys()
}

sw_reg_composite = {
    hov_key: np.nanmean(
        np.array([sw_reg[lw_key][hov_key] for lw_key in lw.keys()]),
        axis=0
    )
    for hov_key in Hov.keys()
}

## Save File

In [None]:
with h5py.File("/work/b11209013/2025_Research/regression/IMERG_CLOUDSAT.h5", "w") as f:
    lw_grp = f.create_group("lw")

    for hov_key in Hov.keys():
        lw_grp_hov = lw_grp.create_group(str(hov_key))
        for lw_key in lw.keys():
            lw_grp_hov.create_dataset(str(lw_key), data=np.array(lw_reg[str(lw_key)][str(hov_key)]))

    sw_grp = f.create_group("sw")

    for hov_key in Hov.keys():
        sw_grp_hov = sw_grp.create_group(str(hov_key))
        for sw_key in sw.keys():
            sw_grp_hov.create_dataset(str(sw_key), data=np.array(sw_reg[str(sw_key)][str(hov_key)]))

    lw_comp_grp = f.create_group("lw_composite")

    for hov_key in Hov.keys():
        lw_comp_grp.create_dataset(str(hov_key), data=np.array(lw_reg_composite[str(hov_key)]))

    sw_comp_grp = f.create_group("sw_composite")

    for hov_key in Hov.keys():
        sw_comp_grp.create_dataset(str(hov_key), data=np.array(sw_reg_composite[str(hov_key)]))

[[-0.00488785 -0.00319218 -0.0218375  ... -0.0135173  -0.02385384
  -0.02401111]
 [-0.0059989   0.00460006 -0.0165255  ...  0.02055635  0.01549107
   0.00964484]
 [-0.00544915  0.00278226 -0.01813823 ...  0.00576117  0.00980284
  -0.00925189]
 ...
 [ 0.01959025  0.01597927  0.03679337 ...  0.01454462  0.01751469
   0.00748313]
 [ 0.01065093  0.0064921   0.01521761 ...  0.01624153  0.01237952
   0.00653634]
 [ 0.00107868  0.00076695 -0.00044444 ...  0.00499815  0.00224997
   0.00394667]]
[[-0.02083491 -0.01164966  0.01877236 ...  0.00966246 -0.00228353
  -0.02459476]
 [ 0.0058368   0.01009616  0.01816164 ...  0.01613493 -0.00321765
   0.00671684]
 [-0.00289956  0.00978028  0.02039129 ...  0.01392519 -0.01058076
   0.00312892]
 ...
 [-0.00329654 -0.00157473 -0.0038394  ... -0.0214445  -0.00028398
  -0.00620511]
 [-0.00380835 -0.00604689 -0.00174176 ... -0.01118916 -0.00712596
  -0.01113752]
 [-0.00399349 -0.00116051  0.0017137  ... -0.00526984 -0.00505238
  -0.00755261]]
[[-0.02884815 -0