# ICU-Predictor (cleaned)

This notebook has been cleaned: filler, duplicate, and non-working cells were removed.
Sections kept: concise data-download attempt, preprocessing, sequence creation, and model training examples.

## Dataset
This notebook attempts to download the `salikhussaini49/prediction-of-sepsis` dataset using `kagglehub`. If automatic download fails, set `dataset_path` to a local path containing `Dataset.csv`.

In [8]:
# Setup: imports and kagglehub download
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import ParameterGrid
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (roc_auc_score, average_precision_score, roc_curve, 
                             precision_recall_curve, confusion_matrix, 
                             precision_score, recall_score, f1_score)

# Reproducibility
tf.random.set_seed(42)
np.random.seed(42)

# Try to download dataset
try:
    import kagglehub
    path = kagglehub.dataset_download("salikhussaini49/prediction-of-sepsis")
    print(f"Dataset downloaded to: {path}")
    dataset_path = path
except Exception as e:
    print(f"Download failed ({e}). Using local path or expecting Dataset.csv in working dir.")
    dataset_path = os.getcwd()

# Try multiple possible paths for Dataset.csv
possible_paths = [
    os.path.join(dataset_path, "Dataset.csv"),
    os.path.join(dataset_path, "dataset.csv"),
    os.path.join(os.path.expanduser("~"), "Downloads", "Dataset.csv"),
    os.path.join(os.path.expanduser("~"), "Downloads", "dataset.csv"),
    "Dataset.csv",
    "dataset.csv",
]

csv_file = None
for path in possible_paths:
    if os.path.exists(path):
        csv_file = path
        print(f"✓ Found dataset at: {csv_file}")
        break

if csv_file is None:
    print(f"✗ Dataset.csv not found in any of these locations:")
    for p in possible_paths:
        print(f"  - {p}")
    print("\nSearching for any .csv files in current dir and home...")
    for root, dirs, files in os.walk(os.getcwd()):
        for file in files:
            if file.endswith('.csv'):
                print(f"  Found: {os.path.join(root, file)}")
        break  # only check top level
    df_full = None
else:
    df_full = pd.read_csv(csv_file)
    print(f"✓ Loaded Dataset: {df_full.shape[0]} rows, {df_full.shape[1]} columns")
    print(f"Columns: {df_full.columns.tolist()[:10]}...")  # Show first 10 column names
    print(f"First 3 rows:\n{df_full.head(3)}")

Dataset downloaded to: /home/tired_atlas/.cache/kagglehub/datasets/salikhussaini49/prediction-of-sepsis/versions/2
✓ Found dataset at: /home/tired_atlas/.cache/kagglehub/datasets/salikhussaini49/prediction-of-sepsis/versions/2/Dataset.csv
✓ Loaded Dataset: 1552210 rows, 44 columns
Columns: ['Unnamed: 0', 'Hour', 'HR', 'O2Sat', 'Temp', 'SBP', 'MAP', 'DBP', 'Resp', 'EtCO2']...
First 3 rows:
   Unnamed: 0  Hour    HR  O2Sat  Temp  SBP   MAP  DBP  Resp  EtCO2  ...  \
0           0     0   NaN    NaN   NaN  NaN   NaN  NaN   NaN    NaN  ...   
1           1     1  65.0  100.0   NaN  NaN  72.0  NaN  16.5    NaN  ...   
2           2     2  78.0  100.0   NaN  NaN  42.5  NaN   NaN    NaN  ...   

   Fibrinogen  Platelets    Age  Gender  Unit1  Unit2  HospAdmTime  ICULOS  \
0         NaN        NaN  68.54       0    NaN    NaN        -0.02       1   
1         NaN        NaN  68.54       0    NaN    NaN        -0.02       2   
2         NaN        NaN  68.54       0    NaN    NaN        -0.02   

In [9]:
# Data preprocessing: per-patient imputation and scaling
if df_full is None:
    print("\n⚠ Creating synthetic dataset...")
    n_patients = 100
    max_time_steps = 200
    n_features = 15
    data_list = []
    for p_id in range(n_patients):
        n_steps = np.random.randint(50, max_time_steps)
        for t in range(n_steps):
            features = np.random.randn(n_features) * 0.5
            has_sepsis = np.random.rand() > 0.7
            if has_sepsis:
                features[:3] += t / max_time_steps * 2
            sepsis_label = 1 if (has_sepsis and t > n_steps * 0.6) else 0
            data_list.append({'Patient_ID': p_id, **{f'feature_{i}': features[i] for i in range(n_features)}, 'SepsisLabel': sepsis_label})
    df_full = pd.DataFrame(data_list)
    print(f"✓ Synthetic dataset: {df_full.shape[0]} rows, {df_full.shape[1]} columns")

if df_full is not None:
    # Drop unnamed index and standardize patient ID column
    if 'Unnamed: 0' in df_full.columns:
        df_full = df_full.drop(columns=['Unnamed: 0'])
    if 'Patient_ID' in df_full.columns:
        df_full.rename(columns={'Patient_ID': 'patient_id'}, inplace=True)
    
    print(f"Shape: {df_full.shape}")
    
    # Get numeric feature columns (exclude patient_id and SepsisLabel)
    df_imputed = df_full.copy()
    numeric_cols = df_imputed.select_dtypes(include=[np.number]).columns.tolist()
    if 'patient_id' in numeric_cols:
        numeric_cols.remove('patient_id')
    if 'SepsisLabel' in numeric_cols:
        numeric_cols.remove('SepsisLabel')
    
    print(f"✓ Features: {len(numeric_cols)}, Label: SepsisLabel")
    
    # Per-patient imputation: forward-fill, backward-fill, then patient mean
    print("⏳ Imputing missing values per-patient...")
    for col in numeric_cols:
        # Forward-fill within each patient group
        df_imputed[col] = df_imputed.groupby('patient_id')[col].transform(
            lambda x: x.ffill().bfill()
        )
        # Fill remaining NaNs with patient-specific mean
        df_imputed[col] = df_imputed.groupby('patient_id')[col].transform(
            lambda x: x.fillna(x.mean())
        )
        # Fill any remaining NaNs with global mean
        df_imputed[col] = df_imputed[col].fillna(df_imputed[col].mean())
    
    print(f"✓ Imputation complete. NaNs remaining: {df_imputed[numeric_cols].isna().sum().sum()}")
    
    # Per-patient scaling (StandardScaler on each patient's data)
    print("⏳ Scaling features per-patient...")
    df_scaled = df_imputed.copy()
    scaler_dict = {}
    
    for patient_id in df_scaled['patient_id'].unique():
        mask = df_scaled['patient_id'] == patient_id
        scaler = StandardScaler()
        df_scaled.loc[mask, numeric_cols] = scaler.fit_transform(df_scaled.loc[mask, numeric_cols])
        scaler_dict[patient_id] = scaler
    
    print("✓ Scaling complete.")
else:
    print("ERROR: df_full is None.")

Shape: (1552210, 43)
✓ Features: 41, Label: SepsisLabel
⏳ Imputing missing values per-patient...
✓ Imputation complete. NaNs remaining: 0
⏳ Scaling features per-patient...
✓ Imputation complete. NaNs remaining: 0
⏳ Scaling features per-patient...


 -1.20873445 -1.12815215 -1.04756985 -0.96698756 -0.88640526 -0.80582296
 -0.72524067 -0.64465837 -0.56407607 -0.48349378 -0.40291148 -0.32232919
 -0.24174689 -0.16116459 -0.0805823   0.          0.0805823   0.16116459
  0.24174689  0.32232919  0.40291148  0.48349378  0.56407607  0.64465837
  0.72524067  0.80582296  0.88640526  0.96698756  1.04756985  1.12815215
  1.20873445  1.28931674  1.36989904  1.45048134  1.53106363  1.61164593
  1.69222822]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df_scaled.loc[mask, numeric_cols] = scaler.fit_transform(df_scaled.loc[mask, numeric_cols])
 -1.20873445 -1.12815215 -1.04756985 -0.96698756 -0.88640526 -0.80582296
 -0.72524067 -0.64465837 -0.56407607 -0.48349378 -0.40291148 -0.32232919
 -0.24174689 -0.16116459 -0.0805823   0.          0.0805823   0.16116459
  0.24174689  0.32232919  0.40291148  0.48349378  0.56407607  0.64465837
  0.72524067  0.80582296  0.88640526  0.96698756  1.04756985  1.12815215
 

✓ Scaling complete.


In [10]:
# Sequence creation: variable-length sequences per patient, padded post-padding to max_seq_len
if df_full is not None:
    # Extract sequences per patient
    max_seq_len = 256
    
    X_seq_list = []
    y_seq_list = []
    patient_ids = []
    
    for patient_id in sorted(df_scaled['patient_id'].unique()):
        mask = df_scaled['patient_id'] == patient_id
        X_pat = df_scaled.loc[mask, numeric_cols].values  # shape: (time_steps, features)
        y_pat = df_scaled.loc[mask, 'SepsisLabel'].values if 'SepsisLabel' in df_scaled.columns else np.ones(mask.sum())  # shape: (time_steps,)
        
        # Post-pad or truncate to max_seq_len
        if len(X_pat) > max_seq_len:
            X_pat = X_pat[-max_seq_len:, :]
            y_pat = y_pat[-max_seq_len:]
        else:
            pad_len = max_seq_len - len(X_pat)
            X_pat = np.vstack([X_pat, np.zeros((pad_len, X_pat.shape[1]))])
            y_pat = np.concatenate([y_pat, np.zeros(pad_len)])
        
        X_seq_list.append(X_pat)
        y_seq_list.append(y_pat)
        patient_ids.append(patient_id)
    
    X_seq = np.array(X_seq_list)  # shape: (n_patients, max_seq_len, n_features)
    y_seq = np.array(y_seq_list)  # shape: (n_patients, max_seq_len)
    
    print(f"Sequence data created: X_seq {X_seq.shape}, y_seq {y_seq.shape}")
    
    # Train/val/test split (80/10/10)
    n = len(X_seq)
    idx = np.arange(n)
    np.random.shuffle(idx)
    n_train = int(0.8 * n)
    n_val = int(0.1 * n)
    
    X_train_seq = X_seq[idx[:n_train]]
    y_train_seq = y_seq[idx[:n_train]]
    X_val_seq = X_seq[idx[n_train:n_train+n_val]]
    y_val_seq = y_seq[idx[n_train:n_train+n_val]]
    X_test_seq = X_seq[idx[n_train+n_val:]]
    y_test_seq = y_seq[idx[n_train+n_val:]]
    
    # Also keep unpadded sequences for test set (for PhysioNet scoring)
    y_test_list = [y_seq_list[i][:np.sum(y_seq_list[i] != 0) + 1] if np.sum(y_seq_list[i] != 0) > 0 else y_seq_list[i] for i in range(len(patient_ids))]
    
    print(f"Train: {X_train_seq.shape}, Val: {X_val_seq.shape}, Test: {X_test_seq.shape}")
else:
    print("ERROR: Cannot create sequences. df_full is None.")

Sequence data created: X_seq (40336, 256, 41), y_seq (40336, 256)
Train: (32268, 256, 41), Val: (4033, 256, 41), Test: (4035, 256, 41)
Train: (32268, 256, 41), Val: (4033, 256, 41), Test: (4035, 256, 41)


In [11]:
# Evaluation functions (MUST COME BEFORE TUNING CELL)
def plot_confusion_matrix(cm, classes=['Neg','Pos'], title='Confusion matrix', cmap=plt.cm.Blues):
    """Plot confusion matrix."""
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes)
    plt.yticks(tick_marks, classes)
    
    thresh = cm.max() / 2.
    for i, j in np.ndindex(cm.shape):
        plt.text(j, i, format(cm[i, j], 'd'),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

def evaluate_sequence_model(model, X_seq, y_seq, threshold=0.5, name='model'):
    """Evaluate sequence model on flattened predictions."""
    preds = model.predict(X_seq, verbose=0)
    preds_flat = preds.reshape(-1)
    y_flat = y_seq.reshape(-1)
    
    auc = roc_auc_score(y_flat, preds_flat)
    avg_prec = average_precision_score(y_flat, preds_flat)
    y_pred_bin = (preds_flat >= threshold).astype(int)
    prec = precision_score(y_flat, y_pred_bin, zero_division=0)
    rec = recall_score(y_flat, y_pred_bin, zero_division=0)
    f1 = f1_score(y_flat, y_pred_bin, zero_division=0)
    cm = confusion_matrix(y_flat, y_pred_bin)
    
    print(f"Evaluation for {name}:")
    print(f"  AUC: {auc:.4f}, AP: {avg_prec:.4f}")
    print(f"  Precision: {prec:.4f}, Recall: {rec:.4f}, F1: {f1:.4f}")
    print('  Confusion matrix:\n', cm)
    
    fpr, tpr, _ = roc_curve(y_flat, preds_flat)
    precision_vals, recall_vals, _ = precision_recall_curve(y_flat, preds_flat)
    
    plt.figure(figsize=(14,4))
    plt.subplot(1,3,1)
    plt.plot(fpr, tpr, label=f'AUC={auc:.3f}')
    plt.plot([0,1],[0,1],'k--')
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    plt.title(f'ROC Curve ({name})')
    plt.legend()
    
    plt.subplot(1,3,2)
    plt.plot(recall_vals, precision_vals, label=f'AP={avg_prec:.3f}')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f'Precision-Recall ({name})')
    plt.legend()
    
    plt.subplot(1,3,3)
    plot_confusion_matrix(cm, classes=['Neg','Pos'], title=f'Confusion Matrix ({name})')
    
    plt.tight_layout()
    plt.show()
    
    return {'auc': auc, 'ap': avg_prec, 'precision': prec, 'recall': rec, 'f1': f1, 'confusion_matrix': cm}

In [2]:
# Model builders for two-stage tuning
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, LSTM, Conv1D, TimeDistributed, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC, Precision, Recall

input_shape = (X_train_seq.shape[1], X_train_seq.shape[2])
os.makedirs('/tmp/icu_tune', exist_ok=True)

def get_optimizer(opt_name, lr):
    """Get optimizer by name."""
    if (opt_name or '').lower() == 'adam':
        return Adam(lr)
    return Adam(lr)

def map_output_to_prob(preds, final_act):
    """Map model outputs to probability-like [0,1] range based on activation."""
    preds = np.asarray(preds)
    if final_act == 'tanh':
        return (preds + 1.0) / 2.0
    if final_act == 'relu':
        return np.clip(preds, 0.0, 1.0)
    return preds

def safe_roc_auc(y_true_flat, pred_flat):
    """Safe AUC computation."""
    try:
        return float(roc_auc_score(y_true_flat, pred_flat))
    except Exception:
        return float('nan')

def build_rnn_model(hp):
    """Build RNN model with hyperparams hp."""
    final_act = hp.get('final_activation', 'sigmoid')
    loss_fn = hp.get('loss', 'binary_crossentropy')
    m = Sequential([
        tf.keras.Input(shape=input_shape),
        SimpleRNN(hp['units'], return_sequences=True),
        Dropout(hp['dropout']),
        TimeDistributed(Dense(1, activation=final_act))
    ])
    opt = get_optimizer(hp.get('optimizer','adam'), hp.get('lr',1e-3))
    m.compile(optimizer=opt, loss=loss_fn, metrics=['accuracy', Precision(), Recall(), AUC(name='auc')])
    return m

def build_cnn_model(hp):
    """Build CNN model with hyperparams hp."""
    final_act = hp.get('final_activation', 'sigmoid')
    loss_fn = hp.get('loss', 'binary_crossentropy')
    m = Sequential([
        tf.keras.Input(shape=input_shape),
        Conv1D(hp['f1'], 3, activation='relu', padding='same'),
        Dropout(hp['dropout']),
        Conv1D(hp['f2'], 3, activation='relu', padding='same'),
        Dropout(hp['dropout']),
        TimeDistributed(Dense(1, activation=final_act))
    ])
    opt = get_optimizer(hp.get('optimizer','adam'), hp.get('lr',1e-3))
    m.compile(optimizer=opt, loss=loss_fn, metrics=['accuracy', Precision(), Recall(), AUC(name='auc')])
    return m

def build_lgstm_model(hp):
    """Build stacked LSTM model with hyperparams hp."""
    final_act = hp.get('final_activation', 'sigmoid')
    loss_fn = hp.get('loss', 'binary_crossentropy')
    m = Sequential([
        tf.keras.Input(shape=input_shape),
        LSTM(hp['u1'], return_sequences=True),
        Dropout(hp['dropout']),
        LSTM(hp['u2'], return_sequences=True),
        Dropout(hp['dropout']),
        TimeDistributed(Dense(1, activation=final_act))
    ])
    opt = get_optimizer(hp.get('optimizer','adam'), hp.get('lr',1e-3))
    m.compile(optimizer=opt, loss=loss_fn, metrics=['accuracy', Precision(), Recall(), AUC(name='auc')])
    return m

2025-12-17 05:11:03.886874: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-12-17 05:11:03.950146: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765944663.961409    5258 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765944663.965820    5258 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1765944664.013288    5258 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

NameError: name 'X_train_seq' is not defined

In [1]:
# STAGE 1: Small grids (fast) to pick best model family
print("=== TWO-STAGE HYPERPARAMETER TUNING ===\n")
print("STAGE 1: Fast model-family selection\n")

small_rnn_grid = { 
    'units':[64], 'dropout':[0.2], 'lr':[1e-3], 'batch_size':[64], 
    'final_activation':['relu'], 'loss':['binary_crossentropy'], 'optimizer':['adam'] 
}
small_cnn_grid = { 
    'f1':[32], 'f2':[64], 'dropout':[0.2], 'lr':[1e-3], 'batch_size':[64], 
    'final_activation':['relu'], 'loss':['binary_crossentropy'], 'optimizer':['adam'] 
}
small_lgstm_grid = { 
    'u1':[64], 'u2':[32], 'dropout':[0.2], 'lr':[1e-3], 'batch_size':[64], 
    'final_activation':['relu'], 'loss':['binary_crossentropy'], 'optimizer':['adam'] 
}

# Helper function for scoring
def trial_score_from_model(m, hp, X_val, y_val):
    """Score a model on validation data."""
    preds = m.predict(X_val, verbose=0)[:,:,0]
    probs = map_output_to_prob(preds, hp.get('final_activation','sigmoid'))
    return safe_roc_auc(y_val.reshape(-1), probs)

def grid_search(build_fn, grid, name, epochs=10, X_train=None, y_train=None, X_val=None, y_val=None):
    """Run grid search over hyperparameters."""
    # Use provided data or fall back to globals
    if X_train is None:
        try:
            X_train = globals()['X_train_seq']
        except KeyError:
            print(f'[{name}] ERROR: X_train_seq not defined. Did you run preprocessing cells first?')
            return None, pd.DataFrame()
    if y_train is None:
        y_train = globals()['y_train_seq']
    if X_val is None:
        X_val = globals()['X_val_seq']
    if y_val is None:
        y_val = globals()['y_val_seq']
    
    # Check if data is empty or invalid
    if X_train is None or X_val is None or X_train.shape[0] == 0 or X_val.shape[0] == 0:
        print(f'[{name}] Skipping: No training or validation data available')
        print(f'  X_train shape: {X_train.shape if X_train is not None else "None"}')
        print(f'  X_val shape: {X_val.shape if X_val is not None else "None"}')
        return None, pd.DataFrame()
    
    results = []
    best_hp = None
    best_score = None
    for hp in ParameterGrid(grid):
        print(f'[{name}] trying {hp}')
        m = build_fn(hp)
        if hp.get('loss') == 'hinge':
            # Hinge loss requires {-1, +1} labels
            y_train_fit = (y_train * 2.0) - 1.0
            y_val_fit = (y_val * 2.0) - 1.0
            try:
                m.fit(X_train, y_train_fit, validation_data=(X_val, y_val_fit), 
                      epochs=epochs, batch_size=hp['batch_size'], verbose=0)
            except Exception as e:
                print('  fit failed:', e)
                continue
            score = trial_score_from_model(m, hp, X_val, y_val)
        else:
            try:
                hist = m.fit(X_train, y_train, validation_data=(X_val, y_val), 
                            epochs=epochs, batch_size=hp['batch_size'], verbose=0)
            except Exception as e:
                print('  fit failed:', e)
                continue
            h = hist.history
            if 'val_auc' in h:
                score = max(h['val_auc'])
            else:
                score = trial_score_from_model(m, hp, X_val, y_val)
        results.append({**hp, 'score': score})
        print(f'  score={score:.4f}')
        if best_score is None or (np.nan_to_num(score) > np.nan_to_num(best_score)):
            best_score = score
            best_hp = hp
            try:
                m.save_weights(f'/tmp/icu_tune/{name}_best_weights.h5')
            except Exception:
                pass
    df = pd.DataFrame(results).sort_values('score', ascending=False).reset_index(drop=True)
    return best_hp, df

best_rnn_hp, rnn_df_small = grid_search(build_rnn_model, small_rnn_grid, 'RNN', epochs=1)
best_cnn_hp, cnn_df_small = grid_search(build_cnn_model, small_cnn_grid, 'CNN', epochs=1)
best_lgstm_hp, lgstm_df_small = grid_search(build_lgstm_model, small_lgstm_grid, 'LGSTM', epochs=1)

# Choose winner
stage1_rows = []
if rnn_df_small.shape[0]>0: stage1_rows.append(('RNN', rnn_df_small.iloc[0].to_dict()))
if cnn_df_small.shape[0]>0: stage1_rows.append(('CNN', cnn_df_small.iloc[0].to_dict()))
if lgstm_df_small.shape[0]>0: stage1_rows.append(('LGSTM', lgstm_df_small.iloc[0].to_dict()))
best_stage1 = None
best_stage1_score = None
for name, row in stage1_rows:
    s = row.get('score', np.nan)
    if best_stage1 is None or (np.nan_to_num(s) > np.nan_to_num(best_stage1_score)):
        best_stage1 = name
        best_stage1_score = s

if best_stage1 is not None:
    print(f'\n✓ Stage-1 winner: {best_stage1} (score={best_stage1_score:.4f})\n')
else:
    print('\n✗ No stage-1 winner (all models failed). Skipping stage-2.\n')

# STAGE 2: Large grids (include relu, hinge, adam)
print("STAGE 2: Exhaustive search for winner\n")
big_rnn_grid = { 
    'units':[64,128], 'dropout':[0.2,0.4], 'lr':[1e-3,1e-4], 'batch_size':[32,64], 
    'final_activation':['sigmoid','tanh','relu'], 'loss':['binary_crossentropy','mse','hinge'], 
    'optimizer':['adam'] 
}
big_cnn_grid = { 
    'f1':[32,64], 'f2':[64,128], 'dropout':[0.2,0.4], 'lr':[1e-3,1e-4], 'batch_size':[32,64], 
    'final_activation':['sigmoid','tanh','relu'], 'loss':['binary_crossentropy','mse','hinge'], 
    'optimizer':['adam'] 
}
big_lgstm_grid = { 
    'u1':[64,128], 'u2':[32,64], 'dropout':[0.2,0.4], 'lr':[1e-3,1e-4], 'batch_size':[32,64], 
    'final_activation':['sigmoid','tanh','relu'], 'loss':['binary_crossentropy','mse','hinge'], 
    'optimizer':['adam'] 
}

if best_stage1 == 'RNN':
    best_big_hp, big_df = grid_search(build_rnn_model, big_rnn_grid, 'RNN_big', epochs=10)
    chosen_builder = build_rnn_model
elif best_stage1 == 'CNN':
    best_big_hp, big_df = grid_search(build_cnn_model, big_cnn_grid, 'CNN_big', epochs=10)
    chosen_builder = build_cnn_model
elif best_stage1 == 'LGSTM':
    best_big_hp, big_df = grid_search(build_lgstm_model, big_lgstm_grid, 'LGSTM_big', epochs=10)
    chosen_builder = build_lgstm_model
else:
    best_big_hp, big_df = None, pd.DataFrame()
    chosen_builder = None

# Save stage results
rnn_df_small.to_csv('/tmp/icu_tune/rnn_stage1.csv', index=False)
cnn_df_small.to_csv('/tmp/icu_tune/cnn_stage1.csv', index=False)
lgstm_df_small.to_csv('/tmp/icu_tune/lgstm_stage1.csv', index=False)
big_df.to_csv('/tmp/icu_tune/big_grid_results.csv', index=False)

if not big_df.empty:
    plt.figure(figsize=(10,6))
    sns.barplot(data=big_df.head(8), x='score', y=big_df.head(8).index)
    plt.title(f'{best_stage1} - Top 8 configs (stage 2)')
    plt.xlabel('Score (val ROC AUC)')
    plt.ylabel('Config rank')
    plt.tight_layout()
    plt.show()
else:
    print('No big-grid results to plot.')

print(f'\n✓ Stage-2 best config: {best_big_hp}' if best_big_hp else '\n✗ No stage-2 results')

=== TWO-STAGE HYPERPARAMETER TUNING ===

STAGE 1: Fast model-family selection



NameError: name 'build_rnn_model' is not defined

In [None]:
# Final experiments: train best found for multiple epochs
if best_big_hp is not None:
    print("\n=== FINAL EXPERIMENTS ===\n")
    final_epochs = [50,100,200]
    final_results = {}
    
    def safe_build_and_load(build_fn, hp, name):
        m = build_fn(hp)
        try:
            m.load_weights(f'/tmp/icu_tune/{name}_best_weights.h5')
            print(f'  loaded warm-start weights for {name}')
        except Exception:
            pass
        return m
    
    for e in final_epochs:
        print(f'Training {best_stage1} for {e} epochs')
        m = safe_build_and_load(chosen_builder, best_big_hp, f'{best_stage1}_big')
        if best_big_hp.get('loss') == 'hinge':
            y_train_fit = (y_train_seq * 2.0) - 1.0
            y_val_fit = (y_val_seq * 2.0) - 1.0
            hist = m.fit(X_train_seq, y_train_fit, validation_data=(X_val_seq, y_val_fit), 
                        epochs=e, batch_size=best_big_hp['batch_size'], verbose=1)
        else:
            hist = m.fit(X_train_seq, y_train_seq, validation_data=(X_val_seq, y_val_seq), 
                        epochs=e, batch_size=best_big_hp['batch_size'], verbose=1)
        met = evaluate_sequence_model(m, X_test_seq, y_test_seq, threshold=0.5, 
                                      name=f'{best_stage1}_final_e{e}')
        final_results[e] = {'history': hist.history, 'metrics': met}
        try:
            m.save_weights(f'./{best_stage1}_final_e{e}_weights.h5')
        except Exception:
            pass
    
    # Plot final AUC vs epochs
    plt.figure(figsize=(8,5))
    xs = sorted(final_results.keys())
    ys = [final_results[x]['metrics']['auc'] for x in xs]
    plt.plot(xs, ys, marker='o', linewidth=2, markersize=8)
    plt.xlabel('Epochs')
    plt.ylabel('Test AUC')
    plt.title(f'{best_stage1} - Final Model: Test AUC vs Epochs')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Save summary
    rows = []
    for e, d in final_results.items():
        rows.append({
            'model': best_stage1, 'epochs': e, 'auc': d['metrics']['auc'], 
            'ap': d['metrics']['ap'], 'precision': d['metrics']['precision'], 
            'recall': d['metrics']['recall'], 'f1': d['metrics']['f1']
        })
    pd.DataFrame(rows).to_csv('/tmp/icu_tune/final_experiments_summary.csv', index=False)
    print('\nTwo-stage tuning + final experiments complete. Results in /tmp/icu_tune/')
else:
    print("No best_big_hp found; skipping final experiments.")

No best_big_hp found; skipping final experiments.
