In [1]:
import warnings
warnings.filterwarnings('ignore')

import torch
from torch.utils.data import Dataset
import pandas as pd

In [2]:
class TextTabularDataset(Dataset):
    def __init__(
        self,
        df,
        text_col="text",
        label_col="sepsis",
        drop_cols=("text", "hadm_id", "sepsis"),
        tokenizer=None,
        max_len=256
    ):
        """
        Args:
            df: pandas DataFrame (train / val / test)
            text_col: column containing clinical text
            label_col: target label (sepsis)
            drop_cols: columns to exclude from tabular features
            tokenizer: HuggingFace tokenizer
            max_len: max token length for text
        """
        self.df = df.reset_index(drop=True)
        self.text_col = text_col
        self.label_col = label_col
        self.drop_cols = drop_cols
        self.tokenizer = tokenizer
        self.max_len = max_len

        # Identify tabular feature columns automatically
        self.tabular_cols = [
            c for c in df.columns if c not in drop_cols
        ]

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # -------- TEXT --------
        text = str(row[self.text_col])
        text_enc = self.tokenizer(
            text,
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )

        # -------- TABULAR --------
        tab_features = torch.tensor(
            [float(row[c]) for c in self.tabular_cols],
            dtype=torch.float32
        )

        # -------- LABEL --------
        label = torch.tensor(int(row[self.label_col]), dtype=torch.long)

        return {
            "input_ids": text_enc["input_ids"].squeeze(0),
            "attention_mask": text_enc["attention_mask"].squeeze(0),
            "tab": tab_features,
            "label": label
        }


In [5]:
import os

print(os.getcwd())
print(os.path.exists("saved_text_models/clinicalbert_sepsis"))
print(os.listdir("saved_text_models"))

/home/firuz/Firuz/MM-DES/MIMIC
True
['biobert_sepsis', 'clinicalbert_sepsis', 'pubmedbert_sepsis']


In [7]:
import os
os.listdir("saved_text_models/clinicalbert_sepsis")

['config.json', 'model.safetensors']

In [12]:
import os
from transformers import AutoTokenizer

assert os.path.exists("saved_text_models/clinicalbert_sepsis")

tokenizer = AutoTokenizer.from_pretrained(
    "saved_text_models/clinicalbert_sepsis",
    use_fast=False
)

In [13]:
import pandas as pd
from torch.utils.data import DataLoader

train_df = pd.read_csv("data/train.csv")
val_df   = pd.read_csv("data/dsel.csv")
test_df  = pd.read_csv("data/test.csv")

train_ds = TextTabularDataset(train_df, tokenizer=tokenizer)
val_ds   = TextTabularDataset(val_df, tokenizer=tokenizer)
test_ds  = TextTabularDataset(test_df, tokenizer=tokenizer)

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=16)
test_loader  = DataLoader(test_ds, batch_size=16)


### Encoder 

In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from transformers import AutoModel

In [21]:
class TextTabularEmbeddingModel(nn.Module):
    def __init__(
        self,
        text_model_path,
        tab_input_dim,
        tab_hidden_dim=128,
        embed_dim=128,
        freeze_text=True
    ):
        super().__init__()

        # ---------- TEXT ENCODER ----------
        self.text_encoder = AutoModel.from_pretrained(text_model_path)

        if freeze_text:
            for p in self.text_encoder.parameters():
                p.requires_grad = False

        text_hidden_dim = self.text_encoder.config.hidden_size
        self.text_proj = nn.Linear(text_hidden_dim, embed_dim)

        # ---------- TABULAR ENCODER ----------
        self.tabular_encoder = nn.Sequential(
            nn.Linear(tab_input_dim, tab_hidden_dim),
            nn.ReLU(),
            nn.Linear(tab_hidden_dim, embed_dim)
        )

        # ---------- LOGIT SCALE ----------
        self.logit_scale = nn.Parameter(
            torch.ones([]) * np.log(1 / 0.07)
        )

    def forward(self, input_ids, attention_mask, tabular):
        # Text embedding
        text_out = self.text_encoder(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        cls_embed = text_out.last_hidden_state[:, 0]
        text_embed = self.text_proj(cls_embed)

        # Tabular embedding
        tab_embed = self.tabular_encoder(tabular)

        # Normalize (Symile expects normalized embeddings)
        text_embed = F.normalize(text_embed, dim=1)
        tab_embed = F.normalize(tab_embed, dim=1)

        return [text_embed, tab_embed], self.logit_scale.exp()


In [22]:
from tqdm import tqdm

def train_embedding_model(
    embedding_model,
    train_loader,
    val_loader=None,
    loss_fn=None,
    optimizer=None,
    device="cuda",
    num_epochs=20,
    print_every=1
):
    embedding_model.to(device)
    history = []

    for epoch in range(num_epochs):
        embedding_model.train()
        total_loss = 0

        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            tab = batch["tab"].to(device)

            optimizer.zero_grad()

            embeds, logit_scale = embedding_model(
                input_ids, attention_mask, tab
            )

            loss = loss_fn(embeds, logit_scale)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        history.append(avg_loss)

        if (epoch + 1) % print_every == 0:
            print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {avg_loss:.4f}")

            if val_loader is not None:
                embedding_model.eval()
                val_loss = 0
                with torch.no_grad():
                    for batch in val_loader:
                        input_ids = batch["input_ids"].to(device)
                        attention_mask = batch["attention_mask"].to(device)
                        tab = batch["tab"].to(device)

                        embeds, logit_scale = embedding_model(
                            input_ids, attention_mask, tab
                        )
                        val_loss += loss_fn(embeds, logit_scale).item()

                val_loss /= len(val_loader)
                print(f"Validation Loss: {val_loss:.4f}")
                embedding_model.train()

    return embedding_model, history


In [24]:
from symile import Symile

device = "cuda" if torch.cuda.is_available() else "cpu"

embedding_model = TextTabularEmbeddingModel(
    text_model_path="saved_text_models/clinicalbert_sepsis",
    tab_input_dim=train_ds[0]["tab"].shape[0],
    freeze_text=True
)

optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, embedding_model.parameters()),
    lr=1e-3
)

symile_loss_fn = Symile()

embedding_model, history = train_embedding_model(
    embedding_model=embedding_model,
    train_loader=train_loader,
    val_loader=val_loader,   # DSEL is perfect here
    loss_fn=symile_loss_fn,
    optimizer=optimizer,
    device=device,
    num_epochs=5
)


Epoch 1/5: 100%|████████████████████████████████████████████| 357/357 [00:31<00:00, 11.46it/s]


Epoch 1/5 | Train Loss: 2.7993
Validation Loss: 2.7627


Epoch 2/5: 100%|████████████████████████████████████████████| 357/357 [00:31<00:00, 11.49it/s]


Epoch 2/5 | Train Loss: 2.7732
Validation Loss: 2.7621


Epoch 3/5: 100%|████████████████████████████████████████████| 357/357 [00:31<00:00, 11.51it/s]


Epoch 3/5 | Train Loss: 2.7722
Validation Loss: 2.7623


Epoch 4/5: 100%|████████████████████████████████████████████| 357/357 [00:30<00:00, 11.56it/s]


Epoch 4/5 | Train Loss: 2.7712
Validation Loss: 2.7623


Epoch 5/5: 100%|████████████████████████████████████████████| 357/357 [00:31<00:00, 11.47it/s]


Epoch 5/5 | Train Loss: 2.7712
Validation Loss: 2.7625


### DSEL (RoC Defining)

In [25]:
import torch
import torch.nn.functional as F
from tqdm import tqdm

def fit_dsel_embeddings(embedding_model, dsel_loader, device="cuda"):
    """
    Compute text and tabular embeddings for the DSEL dataset.
    
    Returns:
        dsel_embeddings: dict with 'text' and 'tab'
        logit_scale_exp: learned temperature
    """
    embedding_model.eval()
    embedding_model.to(device)

    dsel_embeddings = {
        "text": [],
        "tab": []
    }

    with torch.no_grad():
        for batch in tqdm(dsel_loader, desc="Computing DSEL embeddings"):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            tab = batch["tab"].to(device)

            embeds, logit_scale_exp = embedding_model(
                input_ids, attention_mask, tab
            )

            text_embed, tab_embed = embeds

            dsel_embeddings["text"].append(
                F.normalize(text_embed, dim=1)
            )
            dsel_embeddings["tab"].append(
                F.normalize(tab_embed, dim=1)
            )

    # Concatenate
    dsel_embeddings["text"] = torch.cat(dsel_embeddings["text"], dim=0)
    dsel_embeddings["tab"]  = torch.cat(dsel_embeddings["tab"], dim=0)

    return dsel_embeddings, logit_scale_exp


In [None]:
def get_roc_faiss(
    embedding_model,
    test_sample,
    faiss_index_text,
    faiss_index_tab,
    logit_scale_exp,
    k=7,
    weights={"text": 0.5, "tab": 0.5},
    device="cuda"
):
    """
    Compute Region of Competence (RoC) using FAISS for fast NN search.
    Assumes FAISS indices are built on normalized DSEL embeddings.
    """

    embedding_model.eval()

    # --- Prepare test input ---
    input_ids = test_sample["input_ids"].unsqueeze(0).to(device)
    attention_mask = test_sample["attention_mask"].unsqueeze(0).to(device)
    tab = test_sample["tab"].unsqueeze(0).to(device)

    with torch.no_grad():
        embeds, _ = embedding_model(input_ids, attention_mask, tab)
        text_embed, tab_embed = embeds

        # Normalize → cosine similarity via inner product
        text_embed = F.normalize(text_embed, dim=1)
        tab_embed  = F.normalize(tab_embed,  dim=1)

    # --- FAISS search ---
    q_text = text_embed.cpu().numpy()
    q_tab  = tab_embed.cpu().numpy()

    D_text, I_text = faiss_index_text.search(q_text, k)
    D_tab,  I_tab  = faiss_index_tab.search(q_tab,  k)

    # --- Fuse similarities ---
    scores = (
        weights["text"] * D_text +
        weights["tab"]  * D_tab
    ) * logit_scale_exp

    return I_text[0], scores[0]


In [26]:
def get_roc(
    embedding_model,
    test_sample,
    dsel_embeddings,
    logit_scale_exp,
    k=7,
    weights={"text": 0.5, "tab": 0.5},
    device="cuda"
):
    """
    Compute Region of Competence (RoC) for a test sample.
    """
    embedding_model.eval()

    # --- Prepare test input ---
    input_ids = test_sample["input_ids"].unsqueeze(0).to(device)
    attention_mask = test_sample["attention_mask"].unsqueeze(0).to(device)
    tab = test_sample["tab"].unsqueeze(0).to(device)

    with torch.no_grad():
        embeds, _ = embedding_model(
            input_ids, attention_mask, tab
        )
        text_embed, tab_embed = embeds

        text_embed = F.normalize(text_embed, dim=1)
        tab_embed  = F.normalize(tab_embed, dim=1)

    # --- Similarity computation ---
    sim_text = logit_scale_exp * torch.matmul(
        text_embed, dsel_embeddings["text"].T
    )
    sim_tab = logit_scale_exp * torch.matmul(
        tab_embed, dsel_embeddings["tab"].T
    )

    similarity_scores = (
        weights["text"] * sim_text +
        weights["tab"] * sim_tab
    ).squeeze()

    # --- Top-K neighbors ---
    k = min(k, similarity_scores.numel())
    topk_values, topk_indices = torch.topk(similarity_scores, k)

    return topk_indices.cpu().numpy(), topk_values.cpu().numpy()


In [28]:
# Step 1: Compute DSEL embeddings
dsel_embeddings, logit_scale_exp = fit_dsel_embeddings(
    embedding_model,
    val_loader,
    device=device
)

# Step 2: Pick a test sample
test_sample = test_ds[100]

# Step 3: Get RoC
topk_indices, topk_scores = get_roc(
    embedding_model,
    test_sample,
    dsel_embeddings,
    logit_scale_exp,
    k=7,
    weights={"text": 0.6, "tab": 0.4}
)

print(f"True label: {test_sample['label']}")
print("Top-K DSEL indices:", topk_indices)
print("Similarity scores:", topk_scores)


Computing DSEL embeddings: 100%|██████████████████████████████| 77/77 [00:04<00:00, 18.47it/s]


True label: 0
Top-K DSEL indices: [1101 1207  227  310  729 1151  817]
Similarity scores: [13.640659  13.6345005 13.617228  13.615385  13.610577  13.609999
 13.607555 ]


In [33]:
def print_topk_text_from_tokens(
    test_sample,
    topk_indices,
    topk_scores,
    dsel_dataset,
    tokenizer,
    max_chars=400
):
    """
    Print test sample and Top-K DSEL samples by decoding input_ids.
    """

    def decode(sample):
        return tokenizer.decode(
            sample["input_ids"],
            skip_special_tokens=True,
            clean_up_tokenization_spaces=True
        )

    def truncate(txt, n=max_chars):
        return txt[:n] + ("..." if len(txt) > n else "")

    print("\n" + "=" * 120)
    print("TEST SAMPLE")
    print("=" * 120)
    print(f"Label: {test_sample['label'].item() if torch.is_tensor(test_sample['label']) else test_sample['label']}")
    print("-" * 120)
    print(truncate(decode(test_sample)))
    print("=" * 120)

    print("\nTOP-K REGION OF COMPETENCE (DSEL)")
    print("=" * 120)

    for rank, (idx, score) in enumerate(zip(topk_indices, topk_scores), start=1):
        dsel_sample = dsel_dataset[idx]

        print(f"\n--- Rank {rank} ---")
        print(f"DSEL index: {idx}")
        print(f"Label: {dsel_sample['label'].item() if torch.is_tensor(dsel_sample['label']) else dsel_sample['label']}")
        print(f"Similarity score: {float(score):.4f}")
        print("-" * 100)
        print(truncate(decode(dsel_sample)))
        print("-" * 100)

    print("=" * 120)


In [34]:
print_topk_text_from_tokens(
    test_sample=test_sample,
    topk_indices=topk_indices,
    topk_scores=topk_scores,
    dsel_dataset=val_ds,
    tokenizer=tokenizer
)



TEST SAMPLE
Label: 1
------------------------------------------------------------------------------------------------------------------------
indication : acute renal insufficiency. history of esophageal mass. technique : renal sonogram. comparison : none available. findings : the right kidney measures 10. 7 cm and appears within normal limits without stones, masses or overt hydronephrosis. the left kidney measures 11. 6 cm and demonstrates moderate hydronephrosis and proximal hydroureter level to which it can be traced on this examinat...

TOP-K REGION OF COMPETENCE (DSEL)

--- Rank 1 ---
DSEL index: 1101
Label: 0
Similarity score: 13.6407
----------------------------------------------------------------------------------------------------
indication : desaturation to 60 %. comparison : no relevant comparisons available. findings : limited study due to patient position and 2 ap portable semi - upright images were taken. there are diffuse bilateral pulmonary opacities, worst at the bas

### Load Base Models 

In [35]:
import numpy as np

X_train_tab = []
y_train = []

for i in range(len(train_ds)):
    sample = train_ds[i]
    X_train_tab.append(sample["tab"].numpy())
    y_train.append(sample["label"].item())

X_train_tab = np.array(X_train_tab)
y_train = np.array(y_train)

print(X_train_tab.shape, y_train.shape)


(5709, 74) (5709,)


In [36]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_tab_scaled = scaler.fit_transform(X_train_tab)


In [37]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
import xgboost as xgb
from catboost import CatBoostClassifier
from tqdm import tqdm

models = {
    "RandomForest": RandomForestClassifier(
        n_estimators=200, random_state=42
    ),
    "CatBoost": CatBoostClassifier(
        iterations=200, verbose=0, random_state=42
    ),
    "XGBoost": xgb.XGBClassifier(
        n_estimators=200,
        eval_metric="logloss",
        random_state=42
    ),
    "MLP": MLPClassifier(
        hidden_layer_sizes=(128, 64),
        max_iter=500,
        random_state=42
    ),
    "SVC": SVC(
        probability=True,
        random_state=42
    ),
}


In [38]:
base_tabular_models = []

for name, model in tqdm(models.items(), desc="Training tabular models"):
    print(f"\nTraining {name}")
    model.fit(X_train_tab_scaled, y_train)
    base_tabular_models.append(model)

Training tabular models:   0%|                                          | 0/5 [00:00<?, ?it/s]


Training RandomForest


Training tabular models:  20%|██████▊                           | 1/5 [00:03<00:14,  3.53s/it]


Training CatBoost


Training tabular models:  40%|█████████████▌                    | 2/5 [00:04<00:05,  1.80s/it]


Training XGBoost


Training tabular models:  60%|████████████████████▍             | 3/5 [00:04<00:02,  1.13s/it]


Training MLP


Training tabular models:  80%|███████████████████████████▏      | 4/5 [00:07<00:01,  1.93s/it]


Training SVC


Training tabular models: 100%|██████████████████████████████████| 5/5 [00:10<00:00,  2.15s/it]


### Language Base models 

In [42]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

base_language_models = []
base_language_tokenizers = []

MODEL_ROOT = "saved_text_models"

for model_dir in sorted(os.listdir(MODEL_ROOT)):
    model_path = os.path.join(MODEL_ROOT, model_dir)

    print(f"Loading {model_dir}...")

    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForSequenceClassification.from_pretrained(model_path).to(device)
    model.eval()

    base_language_models.append(model)
    base_language_tokenizers.append(tokenizer)

print(f"\nLoaded {len(base_language_models)} language models")


Loading biobert_sepsis...
Loading clinicalbert_sepsis...
Loading pubmedbert_sepsis...

Loaded 3 language models


### MM-DES Inference Time 

In [43]:
import torch
import torch.nn.functional as F
import numpy as np

def compute_dsel_outputs_text_tabular(
    language_models,
    tabular_models,
    dsel_loader,
    scaler_tabular,
    device="cuda"
):
    """
    Compute predictions on DSEL dataset for competence estimation
    """
    dsel_preds = {"language": [], "tabular": []}
    dsel_labels = []

    for batch in dsel_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        tab = batch["tab"].cpu().numpy()
        labels = batch["label"].cpu().numpy()

        dsel_labels.extend(labels)

        # -----------------
        # Language models
        # -----------------
        lang_batch = []
        for model in language_models:
            model.eval()
            with torch.no_grad():
                outputs = model(
                    input_ids=input_ids,
                    attention_mask=attention_mask
                )
                probs = F.softmax(outputs.logits, dim=1).cpu().numpy()
                lang_batch.append(probs)

        dsel_preds["language"].append(
            np.stack(lang_batch, axis=0)
        )  # [n_models, batch, n_classes]

        # -----------------
        # Tabular models
        # -----------------
        tab_scaled = scaler_tabular.transform(tab)
        tab_batch = []

        for model in tabular_models:
            probs = model.predict_proba(tab_scaled)
            tab_batch.append(probs)

        dsel_preds["tabular"].append(
            np.stack(tab_batch, axis=0)
        )

    # Concatenate batches
    dsel_preds["language"] = np.concatenate(dsel_preds["language"], axis=1)
    dsel_preds["tabular"] = np.concatenate(dsel_preds["tabular"], axis=1)

    return dsel_preds, np.array(dsel_labels)


In [44]:
def predict_des_weighted_text_tabular(
    test_sample,
    language_models,
    tabular_models,
    dsel_preds,
    dsel_labels,
    topk_indices,
    scaler_tabular,
    device="cuda",
    weights={"language": 0.5, "tabular": 0.5}
):
    """
    DES prediction using text + tabular models
    """

    roc_labels = dsel_labels[topk_indices]

    # -----------------
    # Language competence
    # -----------------
    lang_comp = []
    for i in range(len(language_models)):
        preds = dsel_preds["language"][i][topk_indices].argmax(1)
        comp = (preds == roc_labels).mean()
        lang_comp.append(comp)

    # -----------------
    # Tabular competence
    # -----------------
    tab_comp = []
    for i in range(len(tabular_models)):
        preds = dsel_preds["tabular"][i][topk_indices].argmax(1)
        comp = (preds == roc_labels).mean()
        tab_comp.append(comp)

    # Normalize competences
    lang_w = torch.tensor(lang_comp, device=device)
    tab_w = torch.tensor(tab_comp, device=device)

    if lang_w.sum() > 0:
        lang_w = lang_w / lang_w.sum()
    if tab_w.sum() > 0:
        tab_w = tab_w / tab_w.sum()

    lang_w *= weights["language"]
    tab_w *= weights["tabular"]

    # -----------------
    # Final fusion
    # -----------------
    combined = 0

    # Language inference
    input_ids = test_sample["input_ids"].unsqueeze(0).to(device)
    attention_mask = test_sample["attention_mask"].unsqueeze(0).to(device)

    for w, model in zip(lang_w, language_models):
        with torch.no_grad():
            logits = model(
                input_ids=input_ids,
                attention_mask=attention_mask
            ).logits
        combined += w * logits

    # Tabular inference
    tab = test_sample["tab"].cpu().numpy().reshape(1, -1)
    tab_scaled = scaler_tabular.transform(tab)

    for w, model in zip(tab_w, tabular_models):
        probs = torch.tensor(
            model.predict_proba(tab_scaled),
            device=device
        )
        combined += w * probs

    final_probs = combined / combined.sum(dim=1, keepdim=True)
    pred = final_probs.argmax(dim=1).item()

    return pred, final_probs


In [47]:
# Step 0: Compute DSEL predictions once
dsel_preds, dsel_labels = compute_dsel_outputs_text_tabular(
    language_models=base_language_models,
    tabular_models=base_tabular_models,
    dsel_loader=val_loader,
    scaler_tabular=scaler,
    device=device
)

In [55]:
results, true_labels, results_probs = [], [], []

for test_sample in tqdm(test_ds):
    topk_indices, _ = get_roc(
        embedding_model,
        test_sample,
        dsel_embeddings,
        logit_scale_exp,
        k=10
    )

    y_pred, y_proba = predict_des_weighted_text_tabular(
        test_sample,
        base_language_models,
        base_tabular_models,
        dsel_preds,
        dsel_labels,
        topk_indices,
        scaler,
        device=device,
        weights={"language": 0.4, "tabular": 0.6}
    )

    results.append(y_pred)
    results_probs.append(y_proba.cpu().numpy())

    label = test_sample["label"]
    true_labels.append(label.item() if torch.is_tensor(label) else label)

100%|█████████████████████████████████████████████████████| 1224/1224 [00:33<00:00, 36.39it/s]


### Evaluation 

In [56]:
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    balanced_accuracy_score,
    roc_auc_score
)
import numpy as np

y_true = np.array(true_labels)
y_pred = np.array(results)

# FIX HERE
y_proba = np.array(results_probs).squeeze(1)

# Metrics
acc = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average="macro", zero_division=0)
bal_acc = balanced_accuracy_score(y_true, y_pred)

# AUROC
if y_proba.shape[1] == 2:
    auroc = roc_auc_score(y_true, y_proba[:, 1])
else:
    auroc = roc_auc_score(y_true, y_proba, multi_class="ovr")

print(f"Accuracy:           {acc:.4f}")
print(f"F1 (macro):         {f1:.4f}")
print(f"Balanced Accuracy:  {bal_acc:.4f}")
print(f"AUROC:              {auroc:.4f}")


Accuracy:           0.7598
F1 (macro):         0.7578
Balanced Accuracy:  0.7598
AUROC:              0.8386
