In [1]:
# Cell 1: Imports, Mixed Precision, GPU & DLL Setup, Configuration

import os, glob, ctypes
import h5py, numpy as np, pandas as pd, matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, Model, mixed_precision
from tensorflow.keras.callbacks import ModelCheckpoint
from tqdm import tqdm
from sklearn.metrics import (
    confusion_matrix, precision_score, recall_score,
    f1_score, accuracy_score, mean_squared_error, mean_absolute_error
)

# Enable mixed precision for speed on GPU
mixed_precision.set_global_policy('mixed_float16')

# DLL fix for Windows HDF5/zlib
try:
    dll = os.path.join(os.environ['CONDA_PREFIX'], 'Library', 'bin', 'zlibwapi.dll')
    ctypes.CDLL(dll)
    print("Loaded zlibwapi.dll")
except Exception:
    print("Could not load zlibwapi.dll")

# GPU config
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True)
    print(f"Using {len(gpus)} GPU(s)")
else:
    print("No GPU detected")

# Config — tuned for speed
DATA_DIR      = r"C:\college\CV\COSMOS\6C_full"
SEQ_LEN       = 4         # use 4 time steps instead of 6
PATCH_SIZE    = 32        # use 32×32 patches instead of 64×64
BATCH_SIZE    = 16        # larger batch if GPU memory allows
EPOCHS        = 20        # max epochs
THRESHOLD     = 265.0
CV_THRESHOLD  = 260.0
FOG_THRESHOLD = 270.0
MODEL_PATH    = r"C:\college\CV\COSMOS\multitask_nowcast_fast.h5"

# list files & build sliding windows
all_files = sorted(glob.glob(os.path.join(DATA_DIR, "*.h5")))
sequences = [all_files[i:i+SEQ_LEN+1] for i in range(len(all_files)-SEQ_LEN)]


INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3060 Laptop GPU, compute capability 8.6
Loaded zlibwapi.dll
Using 1 GPU(s)


Data-Loading & Generator Functions

In [2]:
# Cell 2: Data Loader & Generator

def load_multi(fp_seq):
    frames = []
    for fp in fp_seq[:SEQ_LEN]:
        with h5py.File(fp,'r') as f:
            cnt1, cnt2   = f['IMG_TIR1'][0][...], f['IMG_TIR2'][0][...]
            cnt_wv, cnt_mir = f['IMG_WV'][0][...], f['IMG_MIR'][0][...]
            cnt_vis      = f['IMG_VIS'][0][...]
            lut1, lut2   = f['IMG_TIR1_TEMP'][:],  f['IMG_TIR2_TEMP'][:]
            lut_wv, lut_mir = f['IMG_WV_TEMP'][:], f['IMG_MIR_TEMP'][:]
            lut_vis      = f['IMG_VIS_ALBEDO'][:]
        bt1 = lut1[cnt1]; bt2 = lut2[cnt2]
        wv  = lut_wv[cnt_wv]; mir = lut_mir[cnt_mir]
        vis = lut_vis[cnt_vis]
        frames.append(np.stack([bt1,bt2,wv,mir,vis],axis=-1)/300.0)
    X = np.stack(frames,axis=0).astype(np.float32)

    with h5py.File(fp_seq[-1],'r') as f:
        cnt1, cnt2   = f['IMG_TIR1'][0][...], f['IMG_TIR2'][0][...]
        cnt_wv, cnt_mir = f['IMG_WV'][0][...], f['IMG_MIR'][0][...]
        lut1, lut2   = f['IMG_TIR1_TEMP'][:],  f['IMG_TIR2_TEMP'][:]
        lut_wv, lut_mir = f['IMG_WV_TEMP'][:], f['IMG_MIR_TEMP'][:]
    bt1_t = lut1[cnt1]; bt2_t = lut2[cnt2]
    wv_t  = lut_wv[cnt_wv]; mir_t = lut_mir[cnt_mir]

    # temperature trend normalized
    last_mean  = bt1_t.mean()/300.0
    first_mean = X[0,...,0].mean()
    temp_trend = np.array([last_mean - first_mean],dtype=np.float32)

    return X, {
        'cloud'          : (bt1_t<THRESHOLD).astype(np.float32)[...,None],
        'convective'     : (bt1_t<CV_THRESHOLD).astype(np.float32)[...,None],
        'fog'            : (mir_t<FOG_THRESHOLD).astype(np.float32)[...,None],
        'moisture'       : (wv_t/300.0).astype(np.float32)[...,None],
        'thermo_contrast': ((bt2_t-bt1_t)/100.0).astype(np.float32)[...,None],
        'temp_trend'     : temp_trend
    }

def random_crop(X,y):
    H,W = X.shape[1], X.shape[2]
    i,j = np.random.randint(0,H-PATCH_SIZE), np.random.randint(0,W-PATCH_SIZE)
    Xc = X[:,i:i+PATCH_SIZE,j:j+PATCH_SIZE,:]
    yc = {}
    for k,v in y.items():
        yc[k] = v[i:i+PATCH_SIZE,j:j+PATCH_SIZE] if v.ndim==3 else v
    return Xc,yc

def generator(seqs):
    while True:
        np.random.shuffle(seqs)
        for seq in seqs:
            X,y = load_multi(seq)
            yield random_crop(X,y)


Dataset Split & steps_per_epoch

In [3]:
# Cell 3: Dataset Split, Caching & Pipelines

# train/val split
split       = int(0.9*len(sequences))
train_seqs  = sequences[:split]
val_seqs    = sequences[split:]

# steps per epoch
train_steps = len(train_seqs)//BATCH_SIZE
val_steps   = len(val_seqs)//BATCH_SIZE

# common output signature
output_signature = (
    tf.TensorSpec((SEQ_LEN,PATCH_SIZE,PATCH_SIZE,5),tf.float32),
    {
      'cloud': tf.TensorSpec((PATCH_SIZE,PATCH_SIZE,1),tf.float32),
      'convective': tf.TensorSpec((PATCH_SIZE,PATCH_SIZE,1),tf.float32),
      'fog': tf.TensorSpec((PATCH_SIZE,PATCH_SIZE,1),tf.float32),
      'moisture': tf.TensorSpec((PATCH_SIZE,PATCH_SIZE,1),tf.float32),
      'thermo_contrast': tf.TensorSpec((PATCH_SIZE,PATCH_SIZE,1),tf.float32),
      'temp_trend': tf.TensorSpec((1,),tf.float32)
    }
)

train_ds = tf.data.Dataset.from_generator(
    lambda: generator(train_seqs), output_signature=output_signature
).cache().batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

val_ds = tf.data.Dataset.from_generator(
    lambda: generator(val_seqs), output_signature=output_signature
).cache().batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)


Cell 4: Model Definition & Compilation

In [4]:
# Cell 4: Model Definition & Compilation

inp = layers.Input((SEQ_LEN,PATCH_SIZE,PATCH_SIZE,5))
x = layers.ConvLSTM2D(32,(3,3),padding='same',return_sequences=True,activation='relu')(inp)
x = layers.BatchNormalization()(x)
x = layers.ConvLSTM2D(16,(3,3),padding='same',return_sequences=False,activation='relu')(x)
x = layers.BatchNormalization()(x)

heads = {
  'cloud'          : layers.Conv2D(1,(1,1),activation='sigmoid',   name='cloud')(x),
  'convective'     : layers.Conv2D(1,(1,1),activation='sigmoid',   name='convective')(x),
  'fog'            : layers.Conv2D(1,(1,1),activation='sigmoid',   name='fog')(x),
  'moisture'       : layers.Conv2D(1,(1,1),activation='linear',    name='moisture')(x),
  'thermo_contrast': layers.Conv2D(1,(1,1),activation='linear',    name='thermo_contrast')(x),
}
temp_avg = layers.GlobalAveragePooling2D()(x)
heads['temp_trend'] = layers.Dense(1,activation='linear',name='temp_trend')(temp_avg)

model = Model(inputs=inp, outputs=heads, name='fast_multitask_nowcast')
model.compile(
  optimizer='adam',
  loss={
    'cloud':'binary_crossentropy','convective':'binary_crossentropy','fog':'binary_crossentropy',
    'moisture':'mse','thermo_contrast':'mse','temp_trend':'mse'
  },
  loss_weights={'cloud':1,'convective':1,'fog':1,'moisture':0.5,'thermo_contrast':0.5,'temp_trend':0.1},
  metrics={'cloud':'accuracy','convective':'accuracy','fog':'accuracy'}
)
model.summary()


Model: "fast_multitask_nowcast"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 4, 32, 32,   0           []                               
                                5)]                                                               
                                                                                                  
 conv_lstm2d (ConvLSTM2D)       (None, 4, 32, 32, 3  42752       ['input_1[0][0]']                
                                2)                                                                
                                                                                                  
 batch_normalization (BatchNorm  (None, 4, 32, 32, 3  128        ['conv_lstm2d[0][0]']            
 alization)                     2)                                           

Training with Progress Bars

## Cell 5b — Resume training from the most‑recent `.h5` checkpoint
This cell will  
1. look in the existing `checkpoints/` folder for the latest `model_epoch_XX.h5`,  
2. load the model (weights **and** optimizer state), then  
3. call `model.fit` so training continues right after that epoch.  


In [None]:
# Cell 5b — Resume training from latest .h5 checkpoint
import os, re, glob
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint

# ------------------------------------------------------------------
# 1️⃣  Locate the newest .h5 checkpoint
# ------------------------------------------------------------------
CHECKPOINT_DIR = "checkpoints"           # existing relative folder
ckpt_files = glob.glob(os.path.join(CHECKPOINT_DIR, "*.h5"))

if not ckpt_files:
    raise FileNotFoundError(
        f"No .h5 checkpoints found in '{CHECKPOINT_DIR}'. "
        "Run the training cell once to create them."
    )

def _epoch_num(fname):
    m = re.search(r"(\d+)", os.path.basename(fname))
    return int(m.group(1)) if m else -1

ckpt_files.sort(key=_epoch_num)
latest_ckpt   = ckpt_files[-1]
initial_epoch = _epoch_num(latest_ckpt)

print(f"🔄  Resuming from {latest_ckpt}  (completed epoch {initial_epoch})")

# ------------------------------------------------------------------
# 2️⃣  Load model (weights + optimizer + compile config)
# ------------------------------------------------------------------
model = tf.keras.models.load_model(latest_ckpt)

# If the compile information wasn't saved (rare older TF versions),
# re‑compile exactly as before:
# model.compile(optimizer=optimizer, loss=loss_fn, metrics=metrics)

# ------------------------------------------------------------------
# 3️⃣  Continue training
# ------------------------------------------------------------------
resume_ckpt = ModelCheckpoint(
    filepath=os.path.join(CHECKPOINT_DIR, "model_epoch_{epoch:02d}.h5"),
    save_freq="epoch",
    verbose=1
)

history_resume = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,            # same total target as before
    initial_epoch=initial_epoch,
    callbacks=[resume_ckpt],
    steps_per_epoch=train_steps,
    validation_steps=val_steps,
    verbose=1
)


🔄  Resuming from checkpoints\model_epoch_11.h5  (completed epoch 11)
Epoch 12/20


Evaluation & Metrics Display

In [None]:
# Cell 6 — Evaluation (fast, streamed) with total count in tqdm -----------
import tensorflow as tf, numpy as np, pandas as pd, matplotlib.pyplot as plt
from tqdm import tqdm

# ── 1. Load the checkpoint you want to evaluate ─────────────────────────────
SAVED_MODEL = r"C:\college\CV\COSMOS\checkpoints\model_epoch_11.h5"
model = tf.keras.models.load_model(SAVED_MODEL, compile=False)

# ── 2. Define tasks and initialise streaming accumulators ───────────────────
seg_keys   = ["cloud", "convective", "fog"]
reg_keys   = ["moisture", "thermo_contrast", "temp_trend"]
conf       = {k: np.zeros((2,2), dtype=np.int64) for k in seg_keys}
reg_sum_abs= {k: 0.0 for k in reg_keys}
reg_sum_sq = {k: 0.0 for k in reg_keys}
reg_n      = {k: 0    for k in reg_keys}

# ── 2.5 Compute total batches for tqdm ──────────────────────────────────────
card = tf.data.experimental.cardinality(val_ds)
try:
    total_batches = int(card.numpy()) if card.numpy() >= 0 else None
except:
    total_batches = None

# ── 3. Stream through the validation set once ───────────────────────────────
for Xb, yb in tqdm(val_ds, desc="Evaluation", total=total_batches):
    preds = model(Xb, training=False)

    # --- segmentation heads --------------------------------------------------
    for k in seg_keys:
        y_true = tf.reshape(yb[k],   (-1,)).numpy().astype(np.uint8)
        y_pred = (tf.reshape(preds[k], (-1,)) > 0.5).numpy().astype(np.uint8)
        conf[k][0,0] += np.sum((y_true==0)&(y_pred==0))
        conf[k][0,1] += np.sum((y_true==0)&(y_pred==1))
        conf[k][1,0] += np.sum((y_true==1)&(y_pred==0))
        conf[k][1,1] += np.sum((y_true==1)&(y_pred==1))

    # --- regression heads ----------------------------------------------------
    for k in reg_keys:
        y_true = tf.reshape(yb[k],   (-1,)).numpy()
        y_pred = tf.reshape(preds[k], (-1,)).numpy()
        diff   = y_pred - y_true
        reg_sum_abs[k] += np.abs(diff).sum()
        reg_sum_sq[k]  += np.square(diff).sum()
        reg_n[k]       += diff.size

# ── 4. Compute final metrics -------------------------------------------------
seg_rows = []
for k, cm in conf.items():
    TN,FP,FN,TP = cm.ravel()
    acc  = (TP+TN)/(TP+TN+FP+FN)
    prec = TP/(TP+FP) if TP+FP else 0
    rec  = TP/(TP+FN) if TP+FN else 0
    f1   = 2*prec*rec/(prec+rec) if prec+rec else 0
    seg_rows.append(dict(Task=k, Acc=acc, Prec=prec, Rec=rec, F1=f1,
                         TN=TN, FP=FP, FN=FN, TP=TP))
df_seg = pd.DataFrame(seg_rows).set_index("Task")

reg_rows = []
for k in reg_keys:
    n   = reg_n[k]
    mse = reg_sum_sq[k]/n
    mae = reg_sum_abs[k]/n
    reg_rows.append(dict(Task=k, MSE=mse, MAE=mae))
df_reg = pd.DataFrame(reg_rows).set_index("Task")

# ── 5. Display results -------------------------------------------------------
print("### Segmentation"); display(df_seg)
print("### Regression");   display(df_reg)

# ── 6. Confusion-matrix heat-maps --------------------------------------------
for k, cm in conf.items():
    plt.figure(figsize=(4,4))
    plt.title(f"{k.capitalize()} – Confusion matrix")
    plt.imshow(cm, cmap="Blues"); plt.xlabel("Pred"); plt.ylabel("True")
    for (i,j),v in np.ndenumerate(cm):
        plt.text(j, i, str(v), ha="center", va="center")
    plt.colorbar(); plt.show()


Evaluation: 1345it [18:55:11, 58.96s/it]