In [None]:
import joblib
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Multimodal late-fusion / stacking demo
# Saves a meta-classifier that learns from per-modality model probabilities.
# Assumes maternal models were trained on X_resampled/X_test_scaled (variables exist in notebook).
# If other modality inputs are available and aligned (CTG, ECG LSTM, Ultrasound), extend the predict_* functions and
# include their probability outputs when building train/test feature matrices.


# --- load saved models (skip if not present) ---
loaded = {}
def try_joblib(path, key):
    try:
        loaded[key] = joblib.load(path)
        print(f"Loaded {path}")
    except Exception as e:
        print(f"Skip loading {path}: {e}")

def try_keras(path, key):
    try:
        loaded[key] = load_model(path, compile=False)
        print(f"Loaded {path}")
    except Exception as e:
        print(f"Skip loading {path}: {e}")

try_joblib(r"C:\Users\KIIT\Desktop\VS_Code\GitHub\Major_Project\model_artifacts\maternal_best_rf.joblib", "maternal_best_rf")
try_joblib(r"C:\Users\KIIT\Desktop\VS_Code\GitHub\Major_Project\model_artifacts\maternal_xgboost.joblib", "maternal_xgb")
try_joblib(r"C:\Users\KIIT\Desktop\VS_Code\GitHub\Major_Project\model_artifacts\maternal_scaler.joblib", "maternal_scaler")

try_joblib(r"C:\Users\KIIT\Desktop\VS_Code\GitHub\Major_Project\model_artifacts\ctg_randomforest_model.joblib", "ctg_rf")
try_joblib(r"C:\Users\KIIT\Desktop\VS_Code\GitHub\Major_Project\model_artifacts\ctg_xgboost_model.joblib", "ctg_xgb")
try_joblib(r"C:\Users\KIIT\Desktop\VS_Code\GitHub\Major_Project\model_artifacts\ctg_scaler.joblib", "ctg_scaler")

try_keras(r"C:\Users\KIIT\Desktop\VS_Code\GitHub\Major_Project\model_artifacts\fetal_ecg_lstm_model.h5", "ecg_lstm")
try_keras(r"C:\Users\KIIT\Desktop\VS_Code\GitHub\Major_Project\model_artifacts\fetal_ultrasound_classifier.h5", "us_cnn")

# --- helper to get probability matrix for any model (n_samples, n_classes) ---
def model_proba(model, X):
    """
    Return a (n_samples, n_classes) probability array for given model.
    Works for sklearn estimators and keras models.
    If predict_proba is missing, falls back to predict (one-hot).
    """
    if model is None:
        raise ValueError("model is None")
    if hasattr(model, "predict_proba"):
        return model.predict_proba(X)
    # keras model: assume outputs probabilities for multiclass or sigmoid for binary
    if hasattr(model, "predict"):
        p = model.predict(X)
        p = np.asarray(p)
        # If binary and shape (n,1), convert to two-column probs
        if p.ndim == 2 and p.shape[1] == 1:
            p = np.concatenate([1 - p, p], axis=1)
        # If outputs logits, assume already probabilities
        return p
    # fallback to predict -> one-hot
    preds = np.asarray(model.predict(X))
    classes = np.unique(preds)
    onehot = np.zeros((len(preds), classes.max() + 1), dtype=float)
    onehot[np.arange(len(preds)), preds] = 1.0
    return onehot

# --- Build maternal modality probability features (train + test) ---
maternal_models = []
if "maternal_best_rf" in loaded: maternal_models.append(loaded["maternal_best_rf"])
if "maternal_xgb" in loaded: maternal_models.append(loaded["maternal_xgb"])
# include rf from notebook if in memory (falls back to loaded or existing variable)
try:
    # rf may already exist in notebook namespace
    if 'rf' in globals() and globals()['rf'] not in maternal_models:
        maternal_models.append(globals()['rf'])
except Exception:
    pass

if len(maternal_models) == 0:
    raise RuntimeError("No maternal models available for fusion. Ensure maternal_best_rf or maternal_xgboost or rf exist.")

# Prepare train (X_resampled) and test (X_test_scaled) features using saved/available scaler:
# prefer loaded scaler, else use existing 'scaler' variable
if "maternal_scaler" in loaded:
    mat_scaler = loaded["maternal_scaler"]
else:
    mat_scaler = globals().get("scaler", None)

if mat_scaler is None:
    raise RuntimeError("Maternal scaler not found in disk or notebook. Needed to transform tabular maternal features.")

# X_resampled and X_test_scaled should be available in the notebook (from previous cells)
X_train_tab = globals().get("X_resampled", None)
X_test_tab = globals().get("X_test_scaled", None)
y_train_tab = globals().get("y_resampled", None)
y_test_tab = globals().get("y_true", None)  # y_true is maternal test labels exposed in notebook

if X_train_tab is None or X_test_tab is None or y_train_tab is None or y_test_tab is None:
    raise RuntimeError("Required variables X_resampled, X_test_scaled, y_resampled, y_true must exist in the notebook.")

# Compute per-model probabilities and concatenate
train_probas_parts = []
test_probas_parts = []
for m in maternal_models:
    try:
        p_train = model_proba(m, X_train_tab)
        p_test = model_proba(m, X_test_tab)
    except Exception as e:
        # if model expects raw (unscaled) input, try inverse-transform then predict (rare)
        raise RuntimeError(f"Failed to get probabilities from a maternal model: {e}")
    # ensure same number of classes across parts by padding if needed
    train_probas_parts.append(p_train)
    test_probas_parts.append(p_test)

# pad columns so that every part has same number of columns = max_classes
max_cols = max(part.shape[1] for part in train_probas_parts + test_probas_parts)
def pad_cols(arr, max_cols):
    if arr.shape[1] < max_cols:
        pad = np.zeros((arr.shape[0], max_cols - arr.shape[1]))
        return np.hstack([arr, pad])
    return arr

train_parts = [pad_cols(p, max_cols) for p in train_probas_parts]
test_parts = [pad_cols(p, max_cols) for p in test_probas_parts]

X_meta_train = np.hstack(train_parts)
X_meta_test = np.hstack(test_parts)

print("Meta features shape -- train:", X_meta_train.shape, "test:", X_meta_test.shape)

# --- Train a simple meta-classifier on concatenated probabilities ---
meta = LogisticRegression(multi_class='multinomial', max_iter=2000, solver='lbfgs')
meta.fit(X_meta_train, y_train_tab)
y_meta_pred = meta.predict(X_meta_test)

print("Meta fusion results:")
print("Accuracy:", accuracy_score(y_test_tab, y_meta_pred))
print(classification_report(y_test_tab, y_meta_pred, digits=4))

# Save meta model and note which base models were used
joblib.dump({"meta": meta, "base_models": ["maternal_best_rf","maternal_xgb","rf"]}, "multimodal_fusion_meta.joblib")
print("Saved multimodal_fusion_meta.joblib")

# --- Notes / extension points ---
# - To incorporate CTG, ECG (LSTM) and Ultrasound models you must provide aligned training samples across modalities.
#   For each sample produce per-modality probability vectors (using model_proba), then concatenate similarly
#   to X_meta_train/X_meta_test and retrain meta.
# - For image model (us_cnn) you can pass arrays of preprocessed images to model_proba(us_cnn, imgs).
# - For sequence (ecg_lstm) pass shape (n, timesteps, channels) arrays to model_proba(ecg_lstm, X_seq).
# - If modalities have different class orders, ensure consistent class ordering when concatenating probabilities.

Loaded C:\Users\KIIT\Desktop\VS_Code\GitHub\Major_Project\model_artifacts\maternal_best_rf.joblib
Loaded C:\Users\KIIT\Desktop\VS_Code\GitHub\Major_Project\model_artifacts\maternal_xgboost.joblib
Loaded C:\Users\KIIT\Desktop\VS_Code\GitHub\Major_Project\model_artifacts\maternal_scaler.joblib
Loaded C:\Users\KIIT\Desktop\VS_Code\GitHub\Major_Project\model_artifacts\ctg_randomforest_model.joblib
Loaded C:\Users\KIIT\Desktop\VS_Code\GitHub\Major_Project\model_artifacts\ctg_xgboost_model.joblib
Loaded C:\Users\KIIT\Desktop\VS_Code\GitHub\Major_Project\model_artifacts\ctg_scaler.joblib
Loaded C:\Users\KIIT\Desktop\VS_Code\GitHub\Major_Project\model_artifacts\fetal_ecg_lstm_model.h5
Loaded C:\Users\KIIT\Desktop\VS_Code\GitHub\Major_Project\model_artifacts\fetal_ultrasound_classifier.h5


RuntimeError: Required variables X_resampled, X_test_scaled, y_resampled, y_true must exist in the notebook.