<a href="https://colab.research.google.com/github/MestDash/PID/blob/main/notebooks/NN_optimizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing some stuff

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler,MinMaxScaler, OneHotEncoder, RobustScaler, QuantileTransformer, PowerTransformer
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.tree import DecisionTreeClassifier
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline
from sklearn.model_selection import train_test_split
import tensorflow as tf, random
from tensorflow.keras import layers, models, optimizers, regularizers, losses, metrics
import os
from collections import Counter, defaultdict
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import balanced_accuracy_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import roc_curve
import seaborn as sns
import statistics
import matplotlib.patches as patches
from sklearn.metrics import balanced_accuracy_score, f1_score, precision_score, recall_score

!pip install bayesian-optimization
from bayes_opt import BayesianOptimization

# Some functions

In [3]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module='tensorflow')

def find_optimal_threshold(y_true, y_prob, metric="balanced_accuracy", num_thresholds=200):
    """
    Find the threshold that maximizes a given metric.

    Parameters
    ----------
    y_true : array-like
        True binary labels (0/1).
    y_prob : array-like
        Predicted probabilities for the positive class.
    metric : str
        Metric to optimize. One of {"balanced_accuracy", "f1", "precision", "recall"}.
    num_thresholds : int
        Number of thresholds to evaluate between 0 and 1.

    Returns
    -------
    best_thresh : float
        Threshold that maximizes the chosen metric.
    best_score : float
        Score achieved at the best threshold.
    """
    # Choose metric function
    metric_funcs = {
        "balanced_accuracy": balanced_accuracy_score,
        "f1": f1_score,
        "precision": precision_score,
        "recall": recall_score,
    }
    if metric not in metric_funcs:
        raise ValueError(f"Unknown metric '{metric}'. Choose from {list(metric_funcs.keys())}")

    best_thresh, best_score = 0.5, -1
    thresholds = np.linspace(0.0, 1.0, num_thresholds)

    for t in thresholds:
        preds = (y_prob >= t).astype(int)
        try:
            score = metric_funcs[metric](y_true, preds)
        except ValueError:
            # Can happen if no positive predictions
            continue
        if score > best_score:
            best_thresh, best_score = t, score

    #print(f"[Threshold Optimization] Best {metric}: {best_score:.4f} at threshold={best_thresh:.3f}")
    return best_thresh

def stratified_split(df: pd.DataFrame, test_size: float = 0.2, random_state: int = 17):
    X = df.drop(columns=['IUIS', 'IUIS extended', 'PCODE','y'])
    y = df['y']

    X_train, X_test, y_train, y_test = train_test_split(
        X, y,
        test_size=test_size,
        stratify=y,
        random_state=random_state
    )

    return X_train, X_test, y_train, y_test


def labelize(df):
    df = df.copy()

    def assign_label(row):
        # Handle both dash and no-dash variants
        if row['IUIS'] in ['No arguments for lymphoid-PID', 'No arguments for lymphoid PID']:
            return 'DC'
        else:
            main_label = row['IUIS'].split(':')[0].strip()
            if main_label == 'III':
                try:
                    first_digit = int(str(row['IUIS extended'])[0])
                    if first_digit == 4:
                        return 'IIIb'
                    else:
                        return 'IIIa'
                except (ValueError, IndexError):
                    return 'IIIa'  # default if parsing fails
            else:
                return main_label

    df['y'] = df.apply(assign_label, axis=1)
    return df


RANDOM_STATE = 42
N_SPLITS = 5
CORR_THRESHOLD = 0.95
N_EPOCHS_BIN = 50
N_EPOCHS_MULTI = 50
BATCH_SIZE = 32
SMOTE_IMBALANCE_THRESHOLD = 0.5  # trigger SMOTE when minority/majority ratio < this

np.random.seed(RANDOM_STATE)
random.seed(RANDOM_STATE)
tf.random.set_seed(RANDOM_STATE)

# Target classes (canonical order)
ALL_CLASSES = ["DC", "I", "II", "IIIa", "IIIb", "IV"]
STAGE3_CLASSES = ["II", "IIIa", "IIIb", "IV"]  # in this order for softmax head

class FeatureFilter(BaseEstimator, TransformerMixin):
    def __init__(self, corr_threshold=0.95):
        self.corr_threshold = corr_threshold
        self.to_drop_ = []

    def fit(self, X, y=None):
        # Keep only numeric columns
        numeric_df = X.select_dtypes(include=[np.number])

        # Drop constant features
        constant_features = [col for col in numeric_df.columns if numeric_df[col].nunique() <= 1]

        # Drop highly correlated features
        corr_matrix = numeric_df.corr().abs()
        upper_triangle = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
        high_corr = [column for column in upper_triangle.columns if any(upper_triangle[column] > self.corr_threshold)]

        # Merge and store
        self.to_drop_ = list(set(constant_features + high_corr))
        #print('These features will be dropped:')
        #print(self.to_drop_)
        return self

    def transform(self, X):
        return X.drop(columns=self.to_drop_, errors='ignore')


def build_preprocessor_binary(X_train: pd.DataFrame):
    X_train = X_train.copy()
    num_cols = X_train.select_dtypes(include=[np.number]).columns.tolist()
    cat_cols = X_train.select_dtypes(exclude=[np.number]).columns.tolist()

    cat_pipeline = Pipeline(
        steps=[
            ("impute", SimpleImputer(strategy="constant", fill_value="NA")),
            ("onehot", OneHotEncoder(handle_unknown="ignore", sparse_output=False)),
        ]
    )

    numeric_pipe = Pipeline(steps=[
        ("scale",     QuantileTransformer(n_quantiles=X_train.shape[0])),
        ("impute", KNNImputer(n_neighbors=5)),
        ("minmax",      MinMaxScaler())
    ])

    pre = ColumnTransformer(
        transformers=[
            ("num", numeric_pipe, num_cols),
            ("cat", cat_pipeline, cat_cols)
        ],
        remainder="drop"
    )
    pre.fit(X_train)
    return pre


def build_preprocessor_multi(X_train: pd.DataFrame):
    X_train = X_train.copy()
    num_cols = X_train.select_dtypes(include=[np.number]).columns.tolist()
    cat_cols = X_train.select_dtypes(exclude=[np.number]).columns.tolist()

    cat_pipeline = Pipeline(
        steps=[
            ("impute", SimpleImputer(strategy="constant", fill_value="NA")),
            ("onehot", OneHotEncoder(handle_unknown="ignore", sparse_output=False)),
        ]
    )

    numeric_pipe = Pipeline(steps=[
        ("scale",     PowerTransformer()),
        ("impute", KNNImputer(n_neighbors=5))
    ])

    pre = ColumnTransformer(
        transformers=[
            ("num", numeric_pipe, num_cols),
            ("cat", cat_pipeline, cat_cols)
        ],
        remainder="drop"
    )
    pre.fit(X_train)
    return pre

In [4]:
def build_binary_nn(input_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(1e-3), loss='binary_crossentropy', metrics=['accuracy'])
    return model

def build_multiclass_nn(input_dim, n_classes):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(128, activation='relu'),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(n_classes, activation='softmax')
    ])
    model.compile(
        optimizer=Adam(1e-3),
        loss='sparse_categorical_crossentropy',  # <-- fixed here
        metrics=['accuracy']
    )
    return model

In [5]:
def build_stage1_labels(y):
    # I vs Rest
    return (y == "I").astype(int)

def build_stage2_labels(y):
    # DC vs Rest (but we train Stage2 on the subset y != I)
    mask = (y != "I")
    y2 = (y[mask] == "DC").astype(int)
    return mask, y2

def build_stage3_labels(y):
    # Multiclass on {II, IIIa, IIIb, IV}; we train Stage3 on y not in {I, DC}
    mask = ~y.isin(["I", "DC"])
    y3 = y[mask]
    # Map to indices in STAGE3_CLASSES order
    y3_idx = y3.map({c: i for i, c in enumerate(STAGE3_CLASSES)})
    return mask, y3_idx

def soft_gated_combine_probs(pI, pDC, pStage3_rows):
    """
    pI:    (n,) probability of "I"
    pDC:   (n,) probability of "DC"
    pS3:   (n,4) probabilities over [II, IIIa, IIIb, IV]
    Returns:
      final_probs: (n, 6) in class order ALL_CLASSES
    """
    n = len(pI)
    final = np.zeros((n, 6), dtype=float)
    # indices in ALL_CLASSES
    idx = {c: i for i, c in enumerate(ALL_CLASSES)}
    for i in range(n):
        p_a = pI[i]
        p_b = (1 - p_a) * pDC[i]
        rest = (1 - p_a) * (1 - pDC[i]) * pStage3_rows[i]  # length 4
        final[i, idx["I"]] = p_a
        final[i, idx["DC"]] = p_b
        # map stage3 probs to class indices
        for j, cls in enumerate(STAGE3_CLASSES):
            final[i, idx[cls]] = rest[j]
        # normalize to sum 1 (numerical safety)
        s = final[i].sum()
        if s > 0:
            final[i] /= s
        else:
            final[i, :] = 1.0 / 6.0
    return final

def maybe_smote(X_tr, y_tr, is_multiclass=False):
    cnt = Counter(y_tr)
    if len(cnt) <= 1:
        return X_tr, y_tr  # nothing to do
    minc, maxc = min(cnt.values()), max(cnt.values())
    if (minc / maxc) < SMOTE_IMBALANCE_THRESHOLD:
        if is_multiclass:
            sm = SMOTE(random_state=RANDOM_STATE)
        else:
            sm = SMOTE(random_state=RANDOM_STATE)
        X_tr, y_tr = sm.fit_resample(X_tr, y_tr)
        print("SMOTE oversampling:", Counter(y_tr))
    return X_tr, y_tr


# Optimization

In [6]:
# Custom transformer for conditional log1p
class ConditionalLog1p(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.cols_to_transform = None

    def fit(self, X, y=None):
        X = pd.DataFrame(X)
        # store which columns need log1p (max > 1)
        self.cols_to_transform = [
            col for col in X.columns if X[col].max() > 1
        ]
        return self

    def transform(self, X):
        X = pd.DataFrame(X).copy()
        for col in self.cols_to_transform:
            X[col] = np.log1p(X[col])
        return X.values

# Preprocessing pipeline
def build_preprocessor(X_train: pd.DataFrame, stage):
    X_train = X_train.copy()
    num_cols = X_train.select_dtypes(include=[np.number]).columns.tolist()
    cat_cols = X_train.select_dtypes(exclude=[np.number]).columns.tolist()

    cat_pipeline = Pipeline(
        steps=[
            ("impute", SimpleImputer(strategy="constant", fill_value="NA")),
            ("onehot", OneHotEncoder(handle_unknown="ignore", sparse_output=False)),
        ]
    )

    if stage == 1:
        numeric_pipe = Pipeline(steps=[
            ("scale",     QuantileTransformer(n_quantiles=X_train.shape[0])),
            ("impute", KNNImputer(n_neighbors=5)),
            ("minmax",      MinMaxScaler())
        ])

    elif stage == 2:
        numeric_pipe = Pipeline(steps=[
            ("scale",     ConditionalLog1p()),
            ("impute", KNNImputer(n_neighbors=5)),
            ("minmax",      MinMaxScaler())
        ])

    elif stage == 3:
        numeric_pipe = Pipeline(steps=[
            ("scale",     PowerTransformer()),
            ("impute", KNNImputer(n_neighbors=5))
        ])

    elif stage == 4:
        numeric_pipe = Pipeline(steps=[
            ("scale",     RobustScaler()),
            ("impute", KNNImputer(n_neighbors=5)),
            ("minmax",      MinMaxScaler())
        ])

    pre = ColumnTransformer(
        transformers=[
            ("num", numeric_pipe, num_cols),
            ("cat", cat_pipeline, cat_cols)
        ],
        remainder="drop"
    )
    pre.fit(X_train)
    return pre

# Preprocessing pipeline
def build_preprocessor_multi(X_train: pd.DataFrame):
    X_train = X_train.copy()
    num_cols = X_train.select_dtypes(include=[np.number]).columns.tolist()
    cat_cols = X_train.select_dtypes(exclude=[np.number]).columns.tolist()

    cat_pipeline = Pipeline(
        steps=[
            ("impute", SimpleImputer(strategy="constant", fill_value="NA")),
            ("onehot", OneHotEncoder(handle_unknown="ignore", sparse_output=False)),
        ]
    )

    numeric_pipe = Pipeline(steps=[
        ("scale",     PowerTransformer()),
        ("impute", KNNImputer(n_neighbors=5))
    ])

    pre = ColumnTransformer(
        transformers=[
            ("num", numeric_pipe, num_cols),
            ("cat", cat_pipeline, cat_cols)
        ],
        remainder="drop"
    )
    pre.fit(X_train)
    return pre

In [7]:
from tensorflow.keras import backend as K
import gc
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.3,
    patience=3,
    min_lr=1e-7
)

callbacks = [early_stop, reduce_lr]

def run_bayesopt_all(
    X,
    y,
    stage=1,
    label_builder=None,
    mask=None,
    init_points=25,
    n_iter=50,
    random_state=17,
    corr_threshold=0.9,
):
    # --- Apply mask if provided ---
    if mask is not None:
        X = X.loc[mask]
        y = y.loc[mask]

    # --- Build labels (stage-specific if provided) ---
    if label_builder is not None:
        y_labels = label_builder(y)
    else:
        y_labels = y

    # --- Encode labels ---
    le = LabelEncoder()
    y_encoded = le.fit_transform(y_labels)
    num_classes = len(le.classes_)
    is_binary = num_classes == 2

    # --- Categorical search spaces ---
    activation_choices = ["relu", "selu", "elu"]
    optimizer_choices = ["adam", "sgd", "rmsprop"]

    # --- Model builder ---
    def create_model(input_dim, stage, units, learning_rate, dropout_rate, num_hidden_layers, l2_reg, activation, optimizer_name):
        model = Sequential()
        model.add(Input(shape=(input_dim,)))

        # Hidden layers
        for _ in range(num_hidden_layers - 1):
            model.add(Dense(units, activation=activation, kernel_regularizer=l2(l2_reg)))
            if not stage == 1 or stage == 3:
                model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))

        if stage == 1 or stage == 2:
            model.add(Dense(64, activation=activation, kernel_regularizer=l2(l2_reg)))
            if stage == 1:
                model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))
            model.add(Dense(32, activation=activation, kernel_regularizer=l2(l2_reg)))
            if stage == 1:
                model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))
        else:
            model.add(Dense(128, activation=activation, kernel_regularizer=l2(l2_reg)))
            if stage == 3:
                model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))
            model.add(Dense(64, activation=activation, kernel_regularizer=l2(l2_reg)))
            if stage == 3:
                model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))
            model.add(Dense(32, activation=activation, kernel_regularizer=l2(l2_reg)))
            if stage == 3:
                model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))

        # Output layer
        if is_binary:
            model.add(Dense(1, activation="sigmoid", kernel_regularizer=l2(l2_reg)))
            loss_fn = "binary_crossentropy"
        else:
            model.add(Dense(num_classes, activation="softmax", kernel_regularizer=l2(l2_reg)))
            loss_fn = "sparse_categorical_crossentropy"

        # Select optimizer
        if optimizer_name == "adam":
            optimizer = Adam(learning_rate=learning_rate)
        elif optimizer_name == "sgd":
            optimizer = SGD(learning_rate=learning_rate, momentum=0.9)
        elif optimizer_name == "rmsprop":
            optimizer = RMSprop(learning_rate=learning_rate)
        else:
            raise ValueError(f"Unknown optimizer: {optimizer_name}")

        model.compile(
            optimizer=optimizer,
            loss=loss_fn,
            metrics=["accuracy"],
        )

        return model

    def train_and_evaluate(units, learning_rate, dropout_rate, num_hidden_layers,
                       batch_size, epochs, l2_reg, activation_idx, optimizer_idx,
                       threshold=0.5):
        units = int(units)
        num_hidden_layers = int(num_hidden_layers)
        batch_size = int(batch_size)
        epochs = int(epochs)
        activation = activation_choices[int(round(activation_idx))]
        optimizer_name = optimizer_choices[int(round(optimizer_idx))]

        scores_upper = []

        for i in range(5):
            skf_outer = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)
            scores = []

            # Outer loop → test fold
            for trainval_idx, test_idx in skf_outer.split(X, y_encoded):
                X_trainval, X_test = X.iloc[trainval_idx], X.iloc[test_idx]
                y_trainval, y_test = y_encoded[trainval_idx], y_encoded[test_idx]

                # Inner split → train vs val (3 vs 1)
                skf_inner = StratifiedKFold(n_splits=4, shuffle=True, random_state=random_state)
                inner_train_idx, val_idx = next(skf_inner.split(X_trainval, y_trainval))

                X_tr, X_val = X_trainval.iloc[inner_train_idx], X_trainval.iloc[val_idx]
                y_tr, y_val = y_trainval[inner_train_idx], y_trainval[val_idx]

                # --- Fit preprocessing only on training ---
                ff = FeatureFilter(corr_threshold=corr_threshold)
                ff.fit(X_tr)
                X_tr = ff.transform(X_tr)
                X_val = ff.transform(X_val)
                X_test = ff.transform(X_test)

                pre = build_preprocessor(X_tr, stage)

                X_tr_prep = pre.fit_transform(X_tr)
                X_val_prep = pre.transform(X_val)
                X_test_prep = pre.transform(X_test)

                input_dim = X_tr_prep.shape[1]

                # --- Compute class weights ---
                classes = np.unique(y_tr)
                class_weights = compute_class_weight("balanced", classes=classes, y=y_tr)
                class_weight_dict = dict(zip(classes, class_weights))

                # --- Build model ---
                model = create_model(input_dim, stage, units, learning_rate, dropout_rate,
                                 num_hidden_layers, l2_reg, activation, optimizer_name)

                # --- Train model (monitor val fold) ---
                model.fit(
                    X_tr_prep,
                    y_tr,
                    epochs=epochs,
                    batch_size=batch_size,
                    verbose=0,
                    class_weight=class_weight_dict,
                    validation_data=(X_val_prep, y_val),
                    callbacks=callbacks,
                )

                # --- Evaluate on test fold ---
                y_pred_prob = model.predict(X_test_prep, verbose=0)
                if is_binary:
                    p_val = model.predict(X_val_prep, verbose=0).ravel()
                    thresh = find_optimal_threshold(y_val, p_val)
                    y_pred = (y_pred_prob.ravel() >= thresh).astype(int)
                else:
                    y_pred = np.argmax(y_pred_prob, axis=1)

                bal_acc = balanced_accuracy_score(y_test, y_pred)
                scores.append(bal_acc)

            scores_upper.append(np.mean(scores))
            del X_tr, X_val, X_test, X_tr_prep, X_val_prep, X_test_prep, model
            K.clear_session()
            gc.collect()

        return np.mean(scores_upper)


    # --- Define search space ---
    pbounds = {
        "units": (32, 256),
        "learning_rate": (1e-4, 1e-2),
        "dropout_rate": (0.0, 0.5),
        "num_hidden_layers": (1, 5),
        "batch_size": (16, 128),
        "epochs": (30, 120),
        "l2_reg": (1e-6, 1e-2),
        "activation_idx": (0, len(activation_choices) - 1),
        "optimizer_idx": (0, len(optimizer_choices) - 1),
    }


    optimizer = BayesianOptimization(
        f=train_and_evaluate,
        pbounds=pbounds,
        random_state=random_state,
        verbose=2,
    )

    optimizer.maximize(init_points=init_points, n_iter=n_iter)

    return optimizer, le

In [10]:
df = pd.read_csv("df_cluster.csv")
df = labelize(df)
X_train = df.drop(columns=['IUIS', 'IUIS extended', 'PCODE','y'])
y_train = df['y']

optimizer_stage1, le_stage1 = run_bayesopt_all(
    X_train,
    y_train,
    stage=1,
    label_builder=build_stage1_labels,
    init_points=15,
    n_iter=50,
    random_state=17,
    corr_threshold=0.9
)

print("Best Stage 1 result:", optimizer_stage1.max)
print("Stage 1 classes:", le_stage1.classes_)

|   iter    |  target   |   units   | learni... | dropou... | num_hi... | batch_... |  epochs   |  l2_reg   | activa... | optimi... |
-------------------------------------------------------------------------------------------------------------------------------------




| [39m1        [39m | [39m0.8048151[39m | [39m98.004960[39m | [39m0.0053528[39m | [39m0.0957603[39m | [39m1.2716014[39m | [39m104.14237[39m | [39m89.070016[39m | [39m0.0063755[39m | [39m1.1512057[39m | [39m0.0781258[39m |
| [35m2        [39m | [35m0.8532400[39m | [35m112.15024[39m | [35m0.0094622[39m | [35m0.0300223[39m | [35m4.4561684[39m | [35m114.25653[39m | [35m34.607429[39m | [35m0.0065245[39m | [35m1.1035027[39m | [35m1.1950265[39m |


KeyboardInterrupt: 

In [11]:
df = pd.read_csv("df_cluster.csv")
df = labelize(df)
X_train = df.drop(columns=['IUIS', 'IUIS extended', 'PCODE','y'])
y_train = df['y']

mask2, y2 = build_stage2_labels(y_train)

optimizer_stage2, le_stage2 = run_bayesopt_all(
    X_train,
    y_train,
    stage=2,
    label_builder=lambda yy: y2,
    mask=mask2,
    init_points=30,
    n_iter=60,
    corr_threshold=0.9
)

print("Best Stage 2 result:", optimizer_stage2.max)
print("Stage 2 classes:", le_stage2.classes_)

|   iter    |  target   |   units   | learni... | dropou... | num_hi... | batch_... |  epochs   |  l2_reg   | activa... | optimi... |
-------------------------------------------------------------------------------------------------------------------------------------


KeyboardInterrupt: 

In [9]:
df = pd.read_csv("df_cluster.csv")
df = labelize(df)
X_train = df.drop(columns=['IUIS', 'IUIS extended', 'PCODE','y'])
y_train = df['y']

mask3, y3 = build_stage3_labels(y_train)

optimizer_stage3, le_stage3 = run_bayesopt_all(
    X_train,
    y_train,
    stage=3,
    label_builder=lambda yy: y3,
    mask=mask1,
    init_points=15,
    n_iter=50,
    corr_threshold=0.9
)

print("Best Stage 3 result:", optimizer_stage3.max)
print("Stage 3 classes:", le_stage3.classes_)

NameError: name 'mask1' is not defined

In [12]:
df = pd.read_csv("df_cluster.csv")
df = labelize(df)
X_train = df.drop(columns=['IUIS', 'IUIS extended', 'PCODE','y'])
y_train = df['y']

optimizer_all, le_ = run_bayesopt_all(
    X_train,
    y_train,
    stage=4,
    label_builder=None,
    init_points=30,
    n_iter=60,
    corr_threshold=0.9
)

print("Best result:", optimizer_all.max)
print("Classes:", le_.classes_)

|   iter    |  target   |   units   | learni... | dropou... | num_hi... | batch_... |  epochs   |  l2_reg   | activa... | optimi... |
-------------------------------------------------------------------------------------------------------------------------------------


KeyboardInterrupt: 