In [2]:
import sys

import joblib
import json
import numpy as np
from pathlib import Path
import torch
import xgboost as xgb

from skorch.classifier import NeuralNetClassifier
import torch
import torch.nn as nn
from scipy.special import expit


In [4]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=5, reduction='mean'):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction
    
    def forward(self, logits, targets):
        targets = targets.view(-1,1).type_as(logits)
        bce_loss = nn.functional.binary_cross_entropy_with_logits(logits, targets, reduction='none')
        pt = torch.exp(-bce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * bce_loss
        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss
        
# Class for the ANN model (binary classification)
class DeepBinary(nn.Module):
    def __init__(self, hidden_dim=64, num_layers=4, dropout_rate=0.25):
        super().__init__()
        layers = []
        layers.append(nn.LazyLinear(hidden_dim))
        layers.append(nn.LayerNorm(hidden_dim))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout_rate))

        for _ in range(num_layers - 1):
            layers.append(nn.Linear(hidden_dim, hidden_dim))
            layers.append(nn.LayerNorm(hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))

        layers.append(nn.Linear(hidden_dim, 1))  # final logit
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

# Defining NeuralNet class which will be necessary for use with skorch and skopt
class NeuralNetBinaryClassifier(NeuralNetClassifier):
    def predict_proba(self, X):
        logits = self.forward(X).detach().cpu().numpy()
        probs = expit(logits)
        return np.hstack((1 - probs, probs))

_main = sys.modules.get("__main__")
_this = sys.modules.get(__name__)  # pyGSLModel.GSL_score module object
for _name in ("FocalLoss", "DeepBinary", "NeuralNetBinaryClassifier"):
    if hasattr(_this, _name) and _main is not None:
        setattr(_main, _name, getattr(_this, _name))

In [None]:
SAVE = Path("re_exported_models")
(SAVE / "models").mkdir(parents=True, exist_ok=True)
(SAVE / "ensemble").mkdir(parents=True, exist_ok=True)
(SAVE / "thresholds").mkdir(parents=True, exist_ok=True)

# --- 1) load your existing artifacts (these load fine in your original env) ---
svm_pipe = joblib.load("./models/SVM.pkl")
rf_pipe  = joblib.load("./models/RandomForest.pkl")
lr_pipe  = joblib.load("./models/LogisticRegression.pkl")

# XGBoost: if you previously saved via joblib, load it
xgb_est = joblib.load("./models/XGBoost.pkl")

# ANN: pipeline that contains preproc + skorch classifier
ann_pipeline = joblib.load("./models/ANN_pipeline.pkl")
ann_clf = ann_pipeline.named_steps["clf"]

# Ensemble artifacts
ensemble_scaler = joblib.load("./ensemble/scaler.pkl")
beta_vec = np.load("./ensemble/beta_vec.npy")
ens_json = json.load(open("./ensemble/ens_threshold.json"))
per_model_thresholds = json.load(open("./thresholds/per_model_thresholds.json"))

# --- 2) Save sklearn pipelines (ok to joblib) ---
joblib.dump(svm_pipe, SAVE / "models/SVM_pipeline.joblib")
joblib.dump(rf_pipe,  SAVE / "models/RandomForest_pipeline.joblib")
joblib.dump(lr_pipe,  SAVE / "models/LogisticRegression_pipeline.joblib")

# --- 3) Save XGBoost in native format (recommended) ---
try:
    booster = xgb_est.get_booster()
    booster.save_model(str(SAVE / "models/XGBoost.json"))
    print("Saved XGBoost booster JSON.")
except Exception:
    # fallback to joblib if it's not an XGBClassifier/Booster object
    joblib.dump(xgb_est, SAVE / "models/XGBoost.joblib")
    print("Saved XGBoost via joblib (fallback).")

# --- 4) Save ANN weights-only and preprocessor parts separately ---
# Save skorch weights via save_params (skorch-style)
ann_clf.save_params(f_params=str(SAVE / "models/ANN_skorch_weights.pt"))
# Also save the raw module state_dict (very portable)
torch.save(ann_clf.module_.state_dict(), SAVE / "models/ANN_state_dict.pt")

# Save preprocessing steps from the ANN pipeline (if present)
ann_scaler = ann_pipeline.named_steps.get("scaler", None)
ann_vt = ann_pipeline.named_steps.get("low_var", None)
if ann_scaler is not None:
    joblib.dump(ann_scaler, SAVE / "models/ANN_preprocessor_scaler.joblib")
if ann_vt is not None:
    joblib.dump(ann_vt, SAVE / "models/ANN_preprocessor_varthresh.joblib")

# Build and save a small curated config for the ANN (do NOT dump get_params()).
ann_module = ann_clf.module_
ann_config = {
    "module_class": "DeepBinary",   # document the class name you will import in your package
    "module_args": {
        # These attributes might not exist on module_; fill in if you used them when creating it
        "hidden_dim": getattr(ann_module, "hidden_dim", 117),
        "num_layers": getattr(ann_module, "num_layers", 1),
        "dropout_rate": getattr(ann_module, "dropout_rate", 0.32177566932368323),
    },
    "skorch": {
        "lr": float(getattr(ann_clf, "lr", 1e-3)),
        "max_epochs": int(getattr(ann_clf, "max_epochs", 100)),
        "batch_size": int(getattr(ann_clf, "batch_size", 128)),
        "criterion": ann_clf.criterion.__class__.__name__ if hasattr(ann_clf, "criterion") else None
    },
    "notes": "Recreate DeepBinary with module_args and then load ANN_state_dict.pt"
}
json.dump(ann_config, open(SAVE / "models/ANN_config.json", "w"), indent=2)

# --- 5) Save ensemble artifacts (scaler, beta_vec, thresholds) ---
joblib.dump(ensemble_scaler, SAVE / "ensemble/scaler.joblib")
np.save(SAVE / "ensemble/beta_vec.npy", beta_vec)
json.dump(ens_json, open(SAVE / "ensemble/ens_threshold.json", "w"), indent=2)
json.dump(per_model_thresholds, open(SAVE / "thresholds/per_model_thresholds.json", "w"), indent=2)

# --- 6) Save a small environment manifest to help future reproducibility ---
import platform, pkg_resources
env_info = {
    "python": platform.python_version(),
    "platform": platform.platform(),
    "packages": {p.key: p.version for p in pkg_resources.working_set
                 if p.key in ("torch", "skorch", "scikit-learn", "xgboost", "numpy", "pandas", "joblib")}
}
json.dump(env_info, open(SAVE / "metadata_env.json", "w"), indent=2)

print("Re-export complete into:", SAVE)


AttributeError: Can't get attribute 'NeuralNetBinaryClassifier' on <module '__main__'>

In [5]:
# ANN: pipeline that contains preproc + skorch classifier
ann = joblib.load("./models/ANN_pipeline.pkl")
ann.named_steps["clf"].load_params(f_params="./models/ANN_params.pt")
ann_clf = ann.named_steps["clf"]
ann_module = ann_clf.module_

import torch
import torch.nn as nn

def extract_deepbinary_architecture(module: nn.Module):
    # hidden_dim: from LayerNorm or Linear weights
    hidden_dim = None
    dropout_rate = None
    linear_layers = 0

    for m in module.modules():
        if isinstance(m, nn.LayerNorm) and hidden_dim is None:
            # LayerNorm(normalized_shape=(hidden_dim,))
            hidden_dim = int(m.normalized_shape[0])

        if isinstance(m, nn.Linear):
            # count all linear layers
            linear_layers += 1

        if isinstance(m, nn.Dropout) and dropout_rate is None:
            dropout_rate = float(m.p)

    if hidden_dim is None:
        raise RuntimeError("Could not infer hidden_dim from trained ANN")

    if dropout_rate is None:
        dropout_rate = 0.25  # safe fallback

    # final Linear(hidden_dim → 1) → num_layers = total - 1
    num_layers = max(1, linear_layers - 1)

    return {
        "hidden_dim": hidden_dim,
        "num_layers": num_layers,
        "dropout_rate": dropout_rate,
    }

arch = extract_deepbinary_architecture(ann_module)
print(arch)

{'hidden_dim': 117, 'num_layers': 1, 'dropout_rate': 0.32177566932368323}
