In [1]:
import torch
import pandas as pd
import numpy as np
import random
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    AutoModel, AdamW, get_scheduler, set_seed
)
from torch.nn import functional as F
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm
from sklearn.model_selection import train_test_split, KFold
from scipy.stats import mannwhitneyu
from sklearn.model_selection import StratifiedKFold

# Seed setup
def seed_everything(seed=6):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(6)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Dataset
# class TextDataset(Dataset):
#     def __init__(self, texts, labels, tokenizer, max_len=128):
#         self.texts = texts
#         self.labels = labels
#         self.tokenizer = tokenizer
#         self.max_len = max_len

#     def __len__(self):
#         return len(self.texts)

#     def __getitem__(self, idx):
#         encoding = self.tokenizer.encode_plus(
#             self.texts[idx],
#             add_special_tokens=True,
#             max_length=self.max_len,
#             padding="max_length",
#             truncation=True,
#             return_attention_mask=True,
#             return_tensors="pt",
#         )
#         return {
#             "input_ids": encoding["input_ids"].squeeze(0),
#             "attention_mask": encoding["attention_mask"].squeeze(0),
#             "label": torch.tensor(self.labels[idx], dtype=torch.long),
#         }
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=512, overlap=128, cases=None):
        self.samples = []

        for idx, (text, label) in enumerate(zip(texts, labels)):
            case_id = cases[idx] if cases is not None else idx
            encoding = tokenizer.encode_plus(
                text,
                add_special_tokens=True,
                return_attention_mask=False,
                return_tensors=None,
            )
            input_ids = encoding["input_ids"]
            
            # Chunking
            start = 0
            while start < len(input_ids):
                end = start + max_len
                chunk = input_ids[start:end]
                
                if len(chunk) < max_len:
                    chunk += [tokenizer.pad_token_id] * (max_len - len(chunk))
                
                self.samples.append({
                    "input_ids": chunk,
                    "label": label,
                    "case_id": case_id,
                    "chunk_id": start // (max_len - overlap)  # optional: which chunk in that case
                })

                if end >= len(input_ids):
                    break
                start += max_len - overlap

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        sample = self.samples[idx]
        input_ids = torch.tensor(sample["input_ids"], dtype=torch.long)
        attention_mask = (input_ids != 0).long()  # assuming pad_token_id = 0
        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "label": torch.tensor(sample["label"], dtype=torch.long),
            "case_id": sample["case_id"],
            "chunk_id": sample["chunk_id"],  # 可用于后续组合分析
        }


# Fallback for models without classification head
class CustomClassifier(torch.nn.Module):
    def __init__(self, base_model, hidden_size, num_labels=2):
        super().__init__()
        self.base = base_model
        self.dropout = torch.nn.Dropout(0.3)
        self.classifier = torch.nn.Linear(hidden_size, num_labels)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.base(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.last_hidden_state[:, 0]  # CLS
        logits = self.classifier(self.dropout(pooled_output))
        loss = None
        if labels is not None:
            loss = F.cross_entropy(logits, labels)
        return type('Output', (), {'loss': loss, 'logits': logits})
def dataset_to_dataframe(dataset, tokenizer):
    records = []
    for sample in dataset:
        input_ids = sample["input_ids"]
        text = tokenizer.decode(input_ids, skip_special_tokens=True)
        records.append({
            "case_id": sample["case_id"],
            "chunk_id": sample["chunk_id"],
            "label": sample["label"].item(),
            "chunk_text": text
        })
    return pd.DataFrame(records)
# Metrics
from sklearn.metrics import (
    roc_auc_score, f1_score, accuracy_score,
    precision_score, recall_score, confusion_matrix
)
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_predict
import pandas as pd
import numpy as np

def compute_metrics(y_true, y_prob, y_pred):
    auc = roc_auc_score(y_true, y_prob)
    f1 = f1_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    specificity = tn / (tn + fp)
    npv = tn / (tn + fn) if (tn + fn) > 0 else 0.0
    return {
        "AUC": auc,
        "F1": f1,
        "Sensitivity": recall,
        "Specificity": specificity,
        "PPV": precision,
        "NPV": npv
    }

def evaluate_case_level(fold_results):
    results = {}

    # 1. Majority Voting
    voting_df = (
        fold_results
        .assign(pred=(fold_results["prob1"] > 0.5).astype(int))
        .groupby("cased")
        .agg({
            "true_label": "first",
            "pred": lambda x: x.value_counts().idxmax()
        })
        .reset_index()
    )
    y_true = voting_df["true_label"]
    y_pred = voting_df["pred"]
    # For AUC we average prob1 by case
    prob_df = fold_results.groupby("cased").agg({"prob1": "mean"}).reset_index()
    y_prob = prob_df["prob1"]
    results["Majority Voting"] = compute_metrics(y_true, y_prob, y_pred)

    # 2. Soft Voting (average prob)
    soft_df = (
        fold_results
        .groupby("cased")
        .agg({
            "true_label": "first",
            "prob1": "mean"
        })
        .reset_index()
    )
    y_true = soft_df["true_label"]
    y_prob = soft_df["prob1"]
    y_pred = (y_prob > 0.5).astype(int)
    results["Soft Voting"] = compute_metrics(y_true, y_prob, y_pred)

    # 3. Stacking (Logistic Regression)
    stack_df = (
        fold_results
        .groupby("cased")
        .agg({
            "true_label": "first",
            "prob0": "mean",
            "prob1": "mean"
        })
        .reset_index()
    )
    X = stack_df[["prob0", "prob1"]]
    y = stack_df["true_label"]

    # 5-fold CV predicted probabilities
    clf = LogisticRegression()
    y_prob = cross_val_predict(clf, X, y, cv=5, method="predict_proba")[:, 1]
    y_pred = (y_prob > 0.5).astype(int)
    results["Stacking (LR)"] = compute_metrics(y, y_prob, y_pred)

    # Print all results
    for method, metrics in results.items():
        print(f"\n=== {method} ===")
        for k, v in metrics.items():
            print(f"{k}: {v:.4f}")


# Training
def train_epoch(model, data_loader, optimizer, scheduler, device, scaler):
    model.train()
    losses = []
    correct_predictions = 0

    for batch in tqdm(data_loader, desc="Training"):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)

        optimizer.zero_grad()

        with autocast():
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            logits = outputs.logits

        _, preds = torch.max(logits, dim=1)
        correct_predictions += torch.sum(preds == labels)
        losses.append(loss.item())

        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

    return correct_predictions.double() / len(data_loader.dataset), np.mean(losses)

# Evaluation
def eval_model(model, data_loader, device):
    model.eval()
    true_labels, pred_probs, cased = [], [], []

    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Validation"):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            probs = F.softmax(outputs.logits, dim=1)

            true_labels.extend(labels.cpu().numpy())
            pred_probs.extend(probs.cpu().numpy())
            cased.extend(input_ids.cpu().numpy())

    return np.array(cased), np.array(true_labels), np.array(pred_probs)

# Model list
# namess = ['deepseekr1','Cluade-3.7','qwen','chatgpt_4o','gemini1.5_pro','LLama3.3']
models = [
    'ckiplab/albert-base-chinese',
    'indiejoseph/bert-base-cantonese',
    'zwzzz/Chinese-MentalBERT',
    'Geotrend/distilbert-base-zh-cased',
    'hfl/chinese-roberta-wwm-ext',
    'hfl/chinese-xlnet-base',
    'hfl/chinese-electra-base-discriminator'
]

names=['text','interviewee_text','new_text','new_interviewee_text']

all_model_metrics = []

for name in names:
    originaldata = pd.read_csv("combine.csv")
    texts = originaldata[name].tolist()
    labels = originaldata["label"].tolist()
    cases = originaldata["case"].tolist()

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    for model_name in models:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        result_all_folds = pd.DataFrame()

        for fold_idx, (train_index, val_index) in enumerate(skf.split(texts, labels)):
            train_texts = [texts[i] for i in train_index]
            train_labels = [labels[i] for i in train_index]

            val_texts = [texts[i] for i in val_index]
            val_labels = [labels[i] for i in val_index]
            val_cases = [cases[i] for i in val_index]

            train_dataset = TextDataset(train_texts, train_labels, tokenizer)
            val_dataset = TextDataset(val_texts, val_labels, tokenizer, cases=val_cases)

            train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, generator=torch.Generator().manual_seed(6))
            val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

            try:
                model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2).to(device)
            except:
                base = AutoModel.from_pretrained(model_name)
                model = CustomClassifier(base_model=base, hidden_size=base.config.hidden_size).to(device)

            optimizer = AdamW(model.parameters(), lr=1e-5, correct_bias=False)
            scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=len(train_loader) * 5)
            scaler = GradScaler()

            for epoch in range(20):
                print(f"Model: {model_name}, Fold: {fold_idx + 1}, Epoch {epoch + 1}")
                train_acc, train_loss = train_epoch(model, train_loader, optimizer, scheduler, device, scaler)
                print(f"Train loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}")

            # Evaluation
            cased, true_labels, pred_probs = eval_model(model, val_loader, device)
            val_chunks = dataset_to_dataframe(val_dataset, tokenizer)

            fold_results = pd.DataFrame({
                "cased": val_chunks["case_id"],
                "true_label": val_chunks["label"],
                "prob0": pred_probs[:, 0],
                "prob1": pred_probs[:, 1],
            })

            result_all_folds = pd.concat([result_all_folds, fold_results], ignore_index=True)

        # Majority Voting
        voting_df = (
            result_all_folds
            .assign(pred=(result_all_folds["prob1"] > 0.5).astype(int))
            .groupby("cased")
            .agg({
                "true_label": "first",
                "pred": lambda x: x.value_counts().idxmax(),
                "prob1": "mean"
            })
            .reset_index()
            .rename(columns={"cased": "case", "true_label": "label", "prob1": "prob"})
        )

        y_true = voting_df["label"]
        y_pred = voting_df["pred"]
        y_prob = voting_df["prob"]
        y_case = voting_df["case"]
        metrics = compute_metrics(y_true, y_prob, y_pred)
        model_short = model_name.split("/")[-1]
        print(f"\n=== {model_short} on {name} ===")
        for k, v in metrics.items():
            print(f"{k}: {v:.4f}")

        metrics["Model"] = model_short
        metrics["TextType"] = name
        all_model_metrics.append(metrics)

        # Save majority voting results
#         voting_df.to_csv(f"./newresult/{name}_{model_short}_majority_voting_results.csv", index=False)

# # Save all metrics
# metrics_df = pd.DataFrame(all_model_metrics)
# metrics_df.to_csv("./all_model_metrics_summary.csv", index=False)

Using device: cuda


Token indices sequence length is longer than the specified maximum sequence length for this model (1546 > 512). Running this sequence through the model will result in indexing errors
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = GradScaler()


Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 1


  with autocast():
Training: 100%|██████████| 94/94 [00:21<00:00,  4.36it/s]


Train loss: 0.5747, Accuracy: 0.7253
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 2


Training: 100%|██████████| 94/94 [00:20<00:00,  4.62it/s]


Train loss: 0.5487, Accuracy: 0.7527
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 3


Training: 100%|██████████| 94/94 [00:20<00:00,  4.56it/s]


Train loss: 0.5258, Accuracy: 0.7520
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 4


Training: 100%|██████████| 94/94 [00:20<00:00,  4.57it/s]


Train loss: 0.4722, Accuracy: 0.7694
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 5


Training: 100%|██████████| 94/94 [00:20<00:00,  4.55it/s]


Train loss: 0.4232, Accuracy: 0.8068
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 6


Training: 100%|██████████| 94/94 [00:20<00:00,  4.53it/s]


Train loss: 0.4035, Accuracy: 0.8242
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 7


Training: 100%|██████████| 94/94 [00:20<00:00,  4.54it/s]


Train loss: 0.4072, Accuracy: 0.8235
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 8


Training: 100%|██████████| 94/94 [00:20<00:00,  4.55it/s]


Train loss: 0.4064, Accuracy: 0.8229
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 9


Training: 100%|██████████| 94/94 [00:20<00:00,  4.65it/s]


Train loss: 0.4057, Accuracy: 0.8249
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 10


Training: 100%|██████████| 94/94 [00:20<00:00,  4.63it/s]


Train loss: 0.4035, Accuracy: 0.8235
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 11


Training: 100%|██████████| 94/94 [00:20<00:00,  4.62it/s]


Train loss: 0.4047, Accuracy: 0.8229
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 12


Training: 100%|██████████| 94/94 [00:20<00:00,  4.57it/s]


Train loss: 0.4048, Accuracy: 0.8209
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 13


Training: 100%|██████████| 94/94 [00:20<00:00,  4.57it/s]


Train loss: 0.4056, Accuracy: 0.8215
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 14


Training: 100%|██████████| 94/94 [00:20<00:00,  4.57it/s]


Train loss: 0.4063, Accuracy: 0.8222
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 15


Training: 100%|██████████| 94/94 [00:20<00:00,  4.58it/s]


Train loss: 0.4058, Accuracy: 0.8249
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 16


Training: 100%|██████████| 94/94 [00:20<00:00,  4.60it/s]


Train loss: 0.4043, Accuracy: 0.8309
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 17


Training: 100%|██████████| 94/94 [00:20<00:00,  4.56it/s]


Train loss: 0.4036, Accuracy: 0.8235
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 18


Training: 100%|██████████| 94/94 [00:20<00:00,  4.60it/s]


Train loss: 0.4048, Accuracy: 0.8222
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 19


Training: 100%|██████████| 94/94 [00:20<00:00,  4.57it/s]


Train loss: 0.4042, Accuracy: 0.8215
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 20


Training: 100%|██████████| 94/94 [00:20<00:00,  4.57it/s]


Train loss: 0.4035, Accuracy: 0.8242


Validation: 100%|██████████| 23/23 [00:04<00:00,  4.93it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 1


Training: 100%|██████████| 95/95 [00:20<00:00,  4.57it/s]


Train loss: 0.5676, Accuracy: 0.7317
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 2


Training: 100%|██████████| 95/95 [00:20<00:00,  4.69it/s]


Train loss: 0.5599, Accuracy: 0.7510
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 3


Training: 100%|██████████| 95/95 [00:20<00:00,  4.74it/s]


Train loss: 0.5382, Accuracy: 0.7610
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 4


Training: 100%|██████████| 95/95 [00:20<00:00,  4.74it/s]


Train loss: 0.5339, Accuracy: 0.7669
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 5


Training: 100%|██████████| 95/95 [00:20<00:00,  4.75it/s]


Train loss: 0.5114, Accuracy: 0.7729
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 6


Training: 100%|██████████| 95/95 [00:20<00:00,  4.75it/s]


Train loss: 0.5106, Accuracy: 0.7742
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 7


Training: 100%|██████████| 95/95 [00:19<00:00,  4.75it/s]


Train loss: 0.5113, Accuracy: 0.7749
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 8


Training: 100%|██████████| 95/95 [00:20<00:00,  4.74it/s]


Train loss: 0.5140, Accuracy: 0.7742
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 9


Training: 100%|██████████| 95/95 [00:19<00:00,  4.75it/s]


Train loss: 0.5099, Accuracy: 0.7749
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 10


Training: 100%|██████████| 95/95 [00:20<00:00,  4.74it/s]


Train loss: 0.5075, Accuracy: 0.7736
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 11


Training: 100%|██████████| 95/95 [00:20<00:00,  4.75it/s]


Train loss: 0.5064, Accuracy: 0.7736
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 12


Training: 100%|██████████| 95/95 [00:20<00:00,  4.74it/s]


Train loss: 0.5137, Accuracy: 0.7742
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 13


Training: 100%|██████████| 95/95 [00:20<00:00,  4.74it/s]


Train loss: 0.5055, Accuracy: 0.7742
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 14


Training: 100%|██████████| 95/95 [00:20<00:00,  4.73it/s]


Train loss: 0.5123, Accuracy: 0.7736
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 15


Training: 100%|██████████| 95/95 [00:20<00:00,  4.73it/s]


Train loss: 0.5121, Accuracy: 0.7749
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 16


Training: 100%|██████████| 95/95 [00:20<00:00,  4.73it/s]


Train loss: 0.5134, Accuracy: 0.7749
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 17


Training: 100%|██████████| 95/95 [00:20<00:00,  4.73it/s]


Train loss: 0.5090, Accuracy: 0.7742
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 18


Training: 100%|██████████| 95/95 [00:20<00:00,  4.73it/s]


Train loss: 0.5130, Accuracy: 0.7742
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 19


Training: 100%|██████████| 95/95 [00:20<00:00,  4.73it/s]


Train loss: 0.5056, Accuracy: 0.7742
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 20


Training: 100%|██████████| 95/95 [00:20<00:00,  4.73it/s]


Train loss: 0.5045, Accuracy: 0.7749


Validation: 100%|██████████| 22/22 [00:04<00:00,  4.98it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 1


Training: 100%|██████████| 91/91 [00:19<00:00,  4.74it/s]


Train loss: 0.5672, Accuracy: 0.7424
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 2


Training: 100%|██████████| 91/91 [00:19<00:00,  4.74it/s]


Train loss: 0.5399, Accuracy: 0.7576
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 3


Training: 100%|██████████| 91/91 [00:19<00:00,  4.74it/s]


Train loss: 0.5337, Accuracy: 0.7611
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 4


Training: 100%|██████████| 91/91 [00:19<00:00,  4.74it/s]


Train loss: 0.5205, Accuracy: 0.7611
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 5


Training: 100%|██████████| 91/91 [00:19<00:00,  4.74it/s]


Train loss: 0.5036, Accuracy: 0.7632
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 6


Training: 100%|██████████| 91/91 [00:19<00:00,  4.72it/s]


Train loss: 0.4975, Accuracy: 0.7645
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 7


Training: 100%|██████████| 91/91 [00:19<00:00,  4.72it/s]


Train loss: 0.4952, Accuracy: 0.7652
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 8


Training: 100%|██████████| 91/91 [00:19<00:00,  4.72it/s]


Train loss: 0.4933, Accuracy: 0.7652
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 9


Training: 100%|██████████| 91/91 [00:19<00:00,  4.73it/s]


Train loss: 0.4959, Accuracy: 0.7652
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 10


Training: 100%|██████████| 91/91 [00:19<00:00,  4.73it/s]


Train loss: 0.4987, Accuracy: 0.7645
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 11


Training: 100%|██████████| 91/91 [00:19<00:00,  4.73it/s]


Train loss: 0.4978, Accuracy: 0.7652
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 12


Training: 100%|██████████| 91/91 [00:19<00:00,  4.73it/s]


Train loss: 0.4944, Accuracy: 0.7645
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 13


Training: 100%|██████████| 91/91 [00:19<00:00,  4.73it/s]


Train loss: 0.4957, Accuracy: 0.7632
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 14


Training: 100%|██████████| 91/91 [00:19<00:00,  4.71it/s]


Train loss: 0.4940, Accuracy: 0.7639
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 15


Training: 100%|██████████| 91/91 [00:19<00:00,  4.73it/s]


Train loss: 0.4925, Accuracy: 0.7645
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 16


Training: 100%|██████████| 91/91 [00:19<00:00,  4.74it/s]


Train loss: 0.4971, Accuracy: 0.7625
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 17


Training: 100%|██████████| 91/91 [00:19<00:00,  4.73it/s]


Train loss: 0.4944, Accuracy: 0.7625
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 18


Training: 100%|██████████| 91/91 [00:19<00:00,  4.74it/s]


Train loss: 0.4927, Accuracy: 0.7632
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 19


Training: 100%|██████████| 91/91 [00:19<00:00,  4.72it/s]


Train loss: 0.4986, Accuracy: 0.7652
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 20


Training: 100%|██████████| 91/91 [00:19<00:00,  4.73it/s]


Train loss: 0.5006, Accuracy: 0.7659


Validation: 100%|██████████| 26/26 [00:05<00:00,  4.99it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 1


Training: 100%|██████████| 93/93 [00:19<00:00,  4.72it/s]


Train loss: 0.5878, Accuracy: 0.7285
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 2


Training: 100%|██████████| 93/93 [00:19<00:00,  4.72it/s]


Train loss: 0.5682, Accuracy: 0.7400
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 3


Training: 100%|██████████| 93/93 [00:19<00:00,  4.72it/s]


Train loss: 0.5512, Accuracy: 0.7400
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 4


Training: 100%|██████████| 93/93 [00:19<00:00,  4.72it/s]


Train loss: 0.5276, Accuracy: 0.7387
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 5


Training: 100%|██████████| 93/93 [00:19<00:00,  4.72it/s]


Train loss: 0.4924, Accuracy: 0.7481
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 6


Training: 100%|██████████| 93/93 [00:19<00:00,  4.72it/s]


Train loss: 0.4790, Accuracy: 0.7515
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 7


Training: 100%|██████████| 93/93 [00:19<00:00,  4.72it/s]


Train loss: 0.4818, Accuracy: 0.7508
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 8


Training: 100%|██████████| 93/93 [00:19<00:00,  4.73it/s]


Train loss: 0.4741, Accuracy: 0.7556
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 9


Training: 100%|██████████| 93/93 [00:19<00:00,  4.72it/s]


Train loss: 0.4760, Accuracy: 0.7522
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 10


Training: 100%|██████████| 93/93 [00:19<00:00,  4.72it/s]


Train loss: 0.4762, Accuracy: 0.7542
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 11


Training: 100%|██████████| 93/93 [00:19<00:00,  4.71it/s]


Train loss: 0.4770, Accuracy: 0.7536
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 12


Training: 100%|██████████| 93/93 [00:19<00:00,  4.71it/s]


Train loss: 0.4777, Accuracy: 0.7542
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 13


Training: 100%|██████████| 93/93 [00:19<00:00,  4.70it/s]


Train loss: 0.4754, Accuracy: 0.7542
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 14


Training: 100%|██████████| 93/93 [00:19<00:00,  4.72it/s]


Train loss: 0.4747, Accuracy: 0.7502
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 15


Training: 100%|██████████| 93/93 [00:19<00:00,  4.72it/s]


Train loss: 0.4779, Accuracy: 0.7549
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 16


Training: 100%|██████████| 93/93 [00:19<00:00,  4.71it/s]


Train loss: 0.4749, Accuracy: 0.7549
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 17


Training: 100%|██████████| 93/93 [00:19<00:00,  4.72it/s]


Train loss: 0.4764, Accuracy: 0.7508
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 18


Training: 100%|██████████| 93/93 [00:19<00:00,  4.72it/s]


Train loss: 0.4768, Accuracy: 0.7529
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 19


Training: 100%|██████████| 93/93 [00:19<00:00,  4.72it/s]


Train loss: 0.4755, Accuracy: 0.7549
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 20


Training: 100%|██████████| 93/93 [00:19<00:00,  4.72it/s]


Train loss: 0.4777, Accuracy: 0.7529


Validation: 100%|██████████| 24/24 [00:04<00:00,  5.02it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 1


Training: 100%|██████████| 94/94 [00:19<00:00,  4.71it/s]


Train loss: 0.5798, Accuracy: 0.7288
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 2


Training: 100%|██████████| 94/94 [00:20<00:00,  4.70it/s]


Train loss: 0.5625, Accuracy: 0.7415
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 3


Training: 100%|██████████| 94/94 [00:20<00:00,  4.69it/s]


Train loss: 0.5467, Accuracy: 0.7415
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 4


Training: 100%|██████████| 94/94 [00:20<00:00,  4.69it/s]


Train loss: 0.5284, Accuracy: 0.7375
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 5


Training: 100%|██████████| 94/94 [00:20<00:00,  4.69it/s]


Train loss: 0.4953, Accuracy: 0.7508
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 6


Training: 100%|██████████| 94/94 [00:20<00:00,  4.70it/s]


Train loss: 0.4829, Accuracy: 0.7669
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 7


Training: 100%|██████████| 94/94 [00:19<00:00,  4.70it/s]


Train loss: 0.4836, Accuracy: 0.7689
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 8


Training: 100%|██████████| 94/94 [00:20<00:00,  4.70it/s]


Train loss: 0.4830, Accuracy: 0.7649
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 9


Training: 100%|██████████| 94/94 [00:20<00:00,  4.70it/s]


Train loss: 0.4834, Accuracy: 0.7642
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 10


Training: 100%|██████████| 94/94 [00:20<00:00,  4.70it/s]


Train loss: 0.4842, Accuracy: 0.7642
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 11


Training: 100%|██████████| 94/94 [00:20<00:00,  4.70it/s]


Train loss: 0.4854, Accuracy: 0.7662
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 12


Training: 100%|██████████| 94/94 [00:19<00:00,  4.71it/s]


Train loss: 0.4844, Accuracy: 0.7669
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 13


Training: 100%|██████████| 94/94 [00:19<00:00,  4.71it/s]


Train loss: 0.4841, Accuracy: 0.7682
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 14


Training: 100%|██████████| 94/94 [00:20<00:00,  4.69it/s]


Train loss: 0.4851, Accuracy: 0.7615
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 15


Training: 100%|██████████| 94/94 [00:20<00:00,  4.69it/s]


Train loss: 0.4841, Accuracy: 0.7675
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 16


Training: 100%|██████████| 94/94 [00:20<00:00,  4.70it/s]


Train loss: 0.4839, Accuracy: 0.7655
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 17


Training: 100%|██████████| 94/94 [00:20<00:00,  4.69it/s]


Train loss: 0.4850, Accuracy: 0.7622
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 18


Training: 100%|██████████| 94/94 [00:20<00:00,  4.69it/s]


Train loss: 0.4836, Accuracy: 0.7642
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 19


Training: 100%|██████████| 94/94 [00:20<00:00,  4.69it/s]


Train loss: 0.4828, Accuracy: 0.7615
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 20


Training: 100%|██████████| 94/94 [00:20<00:00,  4.70it/s]


Train loss: 0.4843, Accuracy: 0.7655


Validation: 100%|██████████| 23/23 [00:04<00:00,  5.06it/s]



=== albert-base-chinese on text ===
AUC: 0.7138
F1: 0.7780
Sensitivity: 0.9684
Specificity: 0.0917
PPV: 0.6502
NPV: 0.6250


Token indices sequence length is longer than the specified maximum sequence length for this model (1546 > 512). Running this sequence through the model will result in indexing errors
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indiejoseph/bert-base-cantonese and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = GradScaler()


Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 1


  with autocast():
Training: 100%|██████████| 94/94 [00:25<00:00,  3.68it/s]


Train loss: 0.5691, Accuracy: 0.7373
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 2


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.5086, Accuracy: 0.7721
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 3


Training: 100%|██████████| 94/94 [00:25<00:00,  3.70it/s]


Train loss: 0.4170, Accuracy: 0.8255
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 4


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.3036, Accuracy: 0.8803
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 5


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.2216, Accuracy: 0.9184
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 6


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.1970, Accuracy: 0.9338
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 7


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.2000, Accuracy: 0.9332
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 8


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.1937, Accuracy: 0.9332
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 9


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.1983, Accuracy: 0.9398
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 10


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.1939, Accuracy: 0.9332
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 11


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.1988, Accuracy: 0.9412
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 12


Training: 100%|██████████| 94/94 [00:25<00:00,  3.70it/s]


Train loss: 0.1960, Accuracy: 0.9332
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 13


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.1971, Accuracy: 0.9325
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 14


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.2005, Accuracy: 0.9338
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 15


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.1997, Accuracy: 0.9325
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 16


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.1924, Accuracy: 0.9412
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 17


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.1915, Accuracy: 0.9325
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 18


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.2007, Accuracy: 0.9285
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 19


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.1944, Accuracy: 0.9352
Model: indiejoseph/bert-base-cantonese, Fold: 1, Epoch 20


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.2007, Accuracy: 0.9311


Validation: 100%|██████████| 23/23 [00:04<00:00,  4.98it/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indiejoseph/bert-base-cantonese and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 1


Training: 100%|██████████| 95/95 [00:25<00:00,  3.72it/s]


Train loss: 0.5648, Accuracy: 0.7477
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 2


Training: 100%|██████████| 95/95 [00:25<00:00,  3.72it/s]


Train loss: 0.5346, Accuracy: 0.7722
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 3


Training: 100%|██████████| 95/95 [00:25<00:00,  3.73it/s]


Train loss: 0.4923, Accuracy: 0.7855
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 4


Training: 100%|██████████| 95/95 [00:25<00:00,  3.72it/s]


Train loss: 0.4544, Accuracy: 0.8101
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 5


Training: 100%|██████████| 95/95 [00:25<00:00,  3.73it/s]


Train loss: 0.3964, Accuracy: 0.8300
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 6


Training: 100%|██████████| 95/95 [00:25<00:00,  3.72it/s]


Train loss: 0.3713, Accuracy: 0.8506
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 7


Training: 100%|██████████| 95/95 [00:25<00:00,  3.73it/s]


Train loss: 0.3878, Accuracy: 0.8473
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 8


Training: 100%|██████████| 95/95 [00:25<00:00,  3.72it/s]


Train loss: 0.3722, Accuracy: 0.8519
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 9


Training: 100%|██████████| 95/95 [00:25<00:00,  3.73it/s]


Train loss: 0.3813, Accuracy: 0.8433
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 10


Training: 100%|██████████| 95/95 [00:25<00:00,  3.72it/s]


Train loss: 0.3793, Accuracy: 0.8426
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 11


Training: 100%|██████████| 95/95 [00:25<00:00,  3.72it/s]


Train loss: 0.3740, Accuracy: 0.8440
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 12


Training: 100%|██████████| 95/95 [00:25<00:00,  3.74it/s]


Train loss: 0.3783, Accuracy: 0.8453
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 13


Training: 100%|██████████| 95/95 [00:25<00:00,  3.72it/s]


Train loss: 0.3776, Accuracy: 0.8486
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 14


Training: 100%|██████████| 95/95 [00:25<00:00,  3.73it/s]


Train loss: 0.3734, Accuracy: 0.8473
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 15


Training: 100%|██████████| 95/95 [00:25<00:00,  3.73it/s]


Train loss: 0.3784, Accuracy: 0.8473
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 16


Training: 100%|██████████| 95/95 [00:25<00:00,  3.73it/s]


Train loss: 0.3739, Accuracy: 0.8440
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 17


Training: 100%|██████████| 95/95 [00:25<00:00,  3.73it/s]


Train loss: 0.3760, Accuracy: 0.8426
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 18


Training: 100%|██████████| 95/95 [00:25<00:00,  3.72it/s]


Train loss: 0.3738, Accuracy: 0.8446
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 19


Training: 100%|██████████| 95/95 [00:25<00:00,  3.73it/s]


Train loss: 0.3731, Accuracy: 0.8426
Model: indiejoseph/bert-base-cantonese, Fold: 2, Epoch 20


Training: 100%|██████████| 95/95 [00:25<00:00,  3.71it/s]


Train loss: 0.3749, Accuracy: 0.8433


Validation: 100%|██████████| 22/22 [00:04<00:00,  4.90it/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indiejoseph/bert-base-cantonese and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 1


Training: 100%|██████████| 91/91 [00:24<00:00,  3.72it/s]


Train loss: 0.5636, Accuracy: 0.7493
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 2


Training: 100%|██████████| 91/91 [00:24<00:00,  3.72it/s]


Train loss: 0.4604, Accuracy: 0.7922
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 3


Training: 100%|██████████| 91/91 [00:24<00:00,  3.71it/s]


Train loss: 0.3753, Accuracy: 0.8317
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 4


Training: 100%|██████████| 91/91 [00:24<00:00,  3.72it/s]


Train loss: 0.2634, Accuracy: 0.9003
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 5


Training: 100%|██████████| 91/91 [00:24<00:00,  3.73it/s]


Train loss: 0.2015, Accuracy: 0.9217
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 6


Training: 100%|██████████| 91/91 [00:24<00:00,  3.73it/s]


Train loss: 0.1662, Accuracy: 0.9432
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 7


Training: 100%|██████████| 91/91 [00:24<00:00,  3.73it/s]


Train loss: 0.1643, Accuracy: 0.9446
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 8


Training: 100%|██████████| 91/91 [00:24<00:00,  3.72it/s]


Train loss: 0.1699, Accuracy: 0.9453
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 9


Training: 100%|██████████| 91/91 [00:24<00:00,  3.71it/s]


Train loss: 0.1700, Accuracy: 0.9398
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 10


Training: 100%|██████████| 91/91 [00:24<00:00,  3.71it/s]


Train loss: 0.1696, Accuracy: 0.9404
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 11


Training: 100%|██████████| 91/91 [00:24<00:00,  3.74it/s]


Train loss: 0.1733, Accuracy: 0.9370
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 12


Training: 100%|██████████| 91/91 [00:24<00:00,  3.72it/s]


Train loss: 0.1631, Accuracy: 0.9418
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 13


Training: 100%|██████████| 91/91 [00:24<00:00,  3.73it/s]


Train loss: 0.1634, Accuracy: 0.9432
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 14


Training: 100%|██████████| 91/91 [00:24<00:00,  3.72it/s]


Train loss: 0.1651, Accuracy: 0.9432
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 15


Training: 100%|██████████| 91/91 [00:24<00:00,  3.73it/s]


Train loss: 0.1685, Accuracy: 0.9384
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 16


Training: 100%|██████████| 91/91 [00:24<00:00,  3.73it/s]


Train loss: 0.1698, Accuracy: 0.9349
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 17


Training: 100%|██████████| 91/91 [00:24<00:00,  3.73it/s]


Train loss: 0.1741, Accuracy: 0.9363
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 18


Training: 100%|██████████| 91/91 [00:24<00:00,  3.72it/s]


Train loss: 0.1671, Accuracy: 0.9384
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 19


Training: 100%|██████████| 91/91 [00:24<00:00,  3.72it/s]


Train loss: 0.1698, Accuracy: 0.9391
Model: indiejoseph/bert-base-cantonese, Fold: 3, Epoch 20


Training: 100%|██████████| 91/91 [00:24<00:00,  3.68it/s]


Train loss: 0.1816, Accuracy: 0.9404


Validation: 100%|██████████| 26/26 [00:05<00:00,  4.92it/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indiejoseph/bert-base-cantonese and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 1


Training: 100%|██████████| 93/93 [00:24<00:00,  3.72it/s]


Train loss: 0.5666, Accuracy: 0.7339
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 2


Training: 100%|██████████| 93/93 [00:25<00:00,  3.72it/s]


Train loss: 0.5057, Accuracy: 0.7718
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 3


Training: 100%|██████████| 93/93 [00:25<00:00,  3.71it/s]


Train loss: 0.4330, Accuracy: 0.8111
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 4


Training: 100%|██████████| 93/93 [00:24<00:00,  3.72it/s]


Train loss: 0.3327, Accuracy: 0.8639
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 5


Training: 100%|██████████| 93/93 [00:24<00:00,  3.73it/s]


Train loss: 0.2819, Accuracy: 0.8917
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 6


Training: 100%|██████████| 93/93 [00:24<00:00,  3.72it/s]


Train loss: 0.2510, Accuracy: 0.9052
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 7


Training: 100%|██████████| 93/93 [00:24<00:00,  3.72it/s]


Train loss: 0.2582, Accuracy: 0.9045
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 8


Training: 100%|██████████| 93/93 [00:24<00:00,  3.73it/s]


Train loss: 0.2520, Accuracy: 0.9018
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 9


Training: 100%|██████████| 93/93 [00:25<00:00,  3.70it/s]


Train loss: 0.2572, Accuracy: 0.8991
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 10


Training: 100%|██████████| 93/93 [00:25<00:00,  3.72it/s]


Train loss: 0.2496, Accuracy: 0.9059
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 11


Training: 100%|██████████| 93/93 [00:24<00:00,  3.72it/s]


Train loss: 0.2597, Accuracy: 0.8978
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 12


Training: 100%|██████████| 93/93 [00:25<00:00,  3.72it/s]


Train loss: 0.2484, Accuracy: 0.9066
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 13


Training: 100%|██████████| 93/93 [00:25<00:00,  3.72it/s]


Train loss: 0.2503, Accuracy: 0.9012
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 14


Training: 100%|██████████| 93/93 [00:24<00:00,  3.72it/s]


Train loss: 0.2641, Accuracy: 0.8978
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 15


Training: 100%|██████████| 93/93 [00:24<00:00,  3.73it/s]


Train loss: 0.2598, Accuracy: 0.8978
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 16


Training: 100%|██████████| 93/93 [00:25<00:00,  3.72it/s]


Train loss: 0.2487, Accuracy: 0.9072
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 17


Training: 100%|██████████| 93/93 [00:24<00:00,  3.72it/s]


Train loss: 0.2550, Accuracy: 0.9039
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 18


Training: 100%|██████████| 93/93 [00:25<00:00,  3.72it/s]


Train loss: 0.2516, Accuracy: 0.9086
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 19


Training: 100%|██████████| 93/93 [00:25<00:00,  3.71it/s]


Train loss: 0.2456, Accuracy: 0.9093
Model: indiejoseph/bert-base-cantonese, Fold: 4, Epoch 20


Training: 100%|██████████| 93/93 [00:24<00:00,  3.73it/s]


Train loss: 0.2552, Accuracy: 0.9012


Validation: 100%|██████████| 24/24 [00:04<00:00,  4.93it/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indiejoseph/bert-base-cantonese and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 1


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.5837, Accuracy: 0.7208
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 2


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.5615, Accuracy: 0.7448
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 3


Training: 100%|██████████| 94/94 [00:25<00:00,  3.70it/s]


Train loss: 0.4973, Accuracy: 0.7695
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 4


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.4113, Accuracy: 0.8176
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 5


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.3521, Accuracy: 0.8564
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 6


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.3301, Accuracy: 0.8657
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 7


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.3354, Accuracy: 0.8664
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 8


Training: 100%|██████████| 94/94 [00:25<00:00,  3.70it/s]


Train loss: 0.3365, Accuracy: 0.8631
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 9


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.3340, Accuracy: 0.8717
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 10


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.3312, Accuracy: 0.8671
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 11


Training: 100%|██████████| 94/94 [00:25<00:00,  3.70it/s]


Train loss: 0.3391, Accuracy: 0.8611
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 12


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.3317, Accuracy: 0.8657
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 13


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.3314, Accuracy: 0.8697
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 14


Training: 100%|██████████| 94/94 [00:25<00:00,  3.70it/s]


Train loss: 0.3339, Accuracy: 0.8731
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 15


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.3347, Accuracy: 0.8684
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 16


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.3357, Accuracy: 0.8617
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 17


Training: 100%|██████████| 94/94 [00:25<00:00,  3.70it/s]


Train loss: 0.3348, Accuracy: 0.8624
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 18


Training: 100%|██████████| 94/94 [00:25<00:00,  3.70it/s]


Train loss: 0.3330, Accuracy: 0.8671
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 19


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.3397, Accuracy: 0.8624
Model: indiejoseph/bert-base-cantonese, Fold: 5, Epoch 20


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.3382, Accuracy: 0.8604


Validation: 100%|██████████| 23/23 [00:04<00:00,  4.99it/s]



=== bert-base-cantonese on text ===
AUC: 0.8103
F1: 0.7927
Sensitivity: 0.7947
Specificity: 0.6330
PPV: 0.7906
NPV: 0.6389


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at zwzzz/Chinese-MentalBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = GradScaler()


Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 1


  with autocast():
Training: 100%|██████████| 94/94 [00:25<00:00,  3.68it/s]


Train loss: 0.5701, Accuracy: 0.7473
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 2


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.5741, Accuracy: 0.7386
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 3


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.5366, Accuracy: 0.7433
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 4


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.5229, Accuracy: 0.7480
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 5


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.5161, Accuracy: 0.7527
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 6


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.5036, Accuracy: 0.7520
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 7


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.5010, Accuracy: 0.7527
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 8


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.5026, Accuracy: 0.7527
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 9


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.5000, Accuracy: 0.7533
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 10


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.4995, Accuracy: 0.7540
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 11


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.5052, Accuracy: 0.7527
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 12


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.5012, Accuracy: 0.7527
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 13


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.5041, Accuracy: 0.7527
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 14


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.5029, Accuracy: 0.7520
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 15


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.5039, Accuracy: 0.7540
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 16


Training: 100%|██████████| 94/94 [00:25<00:00,  3.70it/s]


Train loss: 0.5036, Accuracy: 0.7527
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 17


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.5083, Accuracy: 0.7520
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 18


Training: 100%|██████████| 94/94 [00:25<00:00,  3.72it/s]


Train loss: 0.5011, Accuracy: 0.7533
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 19


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.4994, Accuracy: 0.7547
Model: zwzzz/Chinese-MentalBERT, Fold: 1, Epoch 20


Training: 100%|██████████| 94/94 [00:25<00:00,  3.71it/s]


Train loss: 0.5033, Accuracy: 0.7533


Validation: 100%|██████████| 23/23 [00:04<00:00,  5.02it/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at zwzzz/Chinese-MentalBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: zwzzz/Chinese-MentalBERT, Fold: 2, Epoch 1


Training: 100%|██████████| 95/95 [00:25<00:00,  3.72it/s]


Train loss: 0.5747, Accuracy: 0.7384
Model: zwzzz/Chinese-MentalBERT, Fold: 2, Epoch 2


Training: 100%|██████████| 95/95 [00:25<00:00,  3.73it/s]


Train loss: 0.5672, Accuracy: 0.7463
Model: zwzzz/Chinese-MentalBERT, Fold: 2, Epoch 3


Training: 100%|██████████| 95/95 [00:25<00:00,  3.73it/s]


Train loss: 0.5608, Accuracy: 0.7483
Model: zwzzz/Chinese-MentalBERT, Fold: 2, Epoch 4


Training: 100%|██████████| 95/95 [00:25<00:00,  3.73it/s]


Train loss: 0.5394, Accuracy: 0.7430
Model: zwzzz/Chinese-MentalBERT, Fold: 2, Epoch 5


Training: 100%|██████████| 95/95 [00:25<00:00,  3.72it/s]


Train loss: 0.5205, Accuracy: 0.7497
Model: zwzzz/Chinese-MentalBERT, Fold: 2, Epoch 6


Training: 100%|██████████| 95/95 [00:25<00:00,  3.73it/s]


Train loss: 0.5001, Accuracy: 0.7550
Model: zwzzz/Chinese-MentalBERT, Fold: 2, Epoch 7


Training: 100%|██████████| 95/95 [00:25<00:00,  3.73it/s]


Train loss: 0.5067, Accuracy: 0.7543
Model: zwzzz/Chinese-MentalBERT, Fold: 2, Epoch 8


Training: 100%|██████████| 95/95 [00:25<00:00,  3.69it/s]


Train loss: 0.5002, Accuracy: 0.7610
Model: zwzzz/Chinese-MentalBERT, Fold: 2, Epoch 9


Training:  63%|██████▎   | 60/95 [00:16<00:09,  3.64it/s]


KeyboardInterrupt: 

In [18]:
import torch
import pandas as pd
import numpy as np
import random
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    AutoModel, AdamW, get_scheduler, set_seed
)
from torch.nn import functional as F
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from opencc import OpenCC
import jieba

# --- Seed setup ---
def seed_everything(seed=6):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(6)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def tokenize_cantonese(text):
    return list(jieba.cut(text))
# --- Preprocessing function using Weikit and OpenCC ---
def preprocess_cantonese(texts, opencc_config='hk2s'):
    cc = OpenCC(opencc_config)
    processed = []
    for text in texts:
        text_conv = cc.convert(text)  # Convert to Mandarin/Simplified
        tokens = tokenize_cantonese(text_conv)  # Tokenize (still useful for segmentation)
        processed.append(" ".join(tokens))
    return processed

# --- Dataset ---
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=512, overlap=128, cases=None, pad_token_id=None):
        self.samples = []
        pad_token_id = pad_token_id if pad_token_id is not None else tokenizer.pad_token_id or 0
        for idx, (text, label) in enumerate(zip(texts, labels)):
            case_id = cases[idx] if cases is not None else idx
            encoding = tokenizer.encode_plus(
                text,
                add_special_tokens=True,
                return_attention_mask=False,
                return_tensors=None,
            )
            input_ids = encoding["input_ids"]
            # Chunking
            start = 0
            while start < len(input_ids):
                end = start + max_len
                chunk = input_ids[start:end]
                if len(chunk) < max_len:
                    chunk += [pad_token_id] * (max_len - len(chunk))
                self.samples.append({
                    "input_ids": chunk,
                    "label": label,
                    "case_id": case_id,
                    "chunk_id": start // (max_len - overlap)
                })
                if end >= len(input_ids):
                    break
                start += max_len - overlap

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        sample = self.samples[idx]
        input_ids = torch.tensor(sample["input_ids"], dtype=torch.long)
        attention_mask = (input_ids != 0).long()  # assumes pad_token_id is 0
        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "label": torch.tensor(sample["label"], dtype=torch.long),
            "case_id": sample["case_id"],
            "chunk_id": sample["chunk_id"],
        }

# --- Model fallback if classification head is missing ---
class CustomClassifier(torch.nn.Module):
    def __init__(self, base_model, hidden_size, num_labels=2):
        super().__init__()
        self.base = base_model
        self.dropout = torch.nn.Dropout(0.3)
        self.classifier = torch.nn.Linear(hidden_size, num_labels)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.base(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.last_hidden_state[:, 0]  # CLS token
        logits = self.classifier(self.dropout(pooled_output))
        loss = None
        if labels is not None:
            loss = F.cross_entropy(logits, labels)
        return type('Output', (), {'loss': loss, 'logits': logits})

def dataset_to_dataframe(dataset, tokenizer):
    records = []
    for sample in dataset:
        input_ids = sample["input_ids"]
        text = tokenizer.decode(input_ids, skip_special_tokens=True)
        records.append({
            "case_id": sample["case_id"],
            "chunk_id": sample["chunk_id"],
            "label": sample["label"].item(),
            "chunk_text": text
        })
    return pd.DataFrame(records)

# --- Metrics ---
def compute_metrics(y_true, y_prob, y_pred):
    auc = roc_auc_score(y_true, y_prob)
    f1 = f1_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    specificity = tn / (tn + fp)
    npv = tn / (tn + fn) if (tn + fn) > 0 else 0.0
    return {
        "AUC": auc,
        "F1": f1,
        "Sensitivity": recall,
        "Specificity": specificity,
        "PPV": precision,
        "NPV": npv
    }

# --- Training ---
def train_epoch(model, data_loader, optimizer, scheduler, device, scaler):
    model.train()
    losses = []
    correct_predictions = 0

    for batch in tqdm(data_loader, desc="Training"):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)

        optimizer.zero_grad()

        with autocast():
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            logits = outputs.logits

        _, preds = torch.max(logits, dim=1)
        correct_predictions += torch.sum(preds == labels)
        losses.append(loss.item())

        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

    return correct_predictions.double() / len(data_loader.dataset), np.mean(losses)

# --- Evaluation ---
def eval_model(model, data_loader, device):
    model.eval()
    true_labels, pred_probs, cased = [], [], []

    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Validation"):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            probs = F.softmax(outputs.logits, dim=1)

            true_labels.extend(labels.cpu().numpy())
            pred_probs.extend(probs.cpu().numpy())
            # FIX HERE:
            cased.extend(batch["case_id"])  # No .cpu().numpy() needed
    return np.array(cased), np.array(true_labels), np.array(pred_probs)

# --- Model list ---
models = [
    'ckiplab/albert-base-chinese',
    # 'indiejoseph/bert-base-cantonese',
    # 'zwzzz/Chinese-MentalBERT',
    # 'Geotrend/distilbert-base-zh-cased',
    # 'hfl/chinese-roberta-wwm-ext',
    # 'hfl/chinese-xlnet-base',
    # 'hfl/chinese-electra-base-discriminator'
]

names=['text','interviewee_text','new_text','new_interviewee_text']

all_model_metrics = []

for name in names:
    originaldata = pd.read_csv("combine.csv")
    raw_texts = originaldata[name].tolist()
    labels = originaldata["label"].tolist()
    cases = originaldata["case"].tolist()

    # --- Preprocess using Weikit + OpenCC for Cantonese ---
    texts = preprocess_cantonese(raw_texts, opencc_config='hk2s')  # or 't2s', depending on your text

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    for model_name in models:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        result_all_folds = pd.DataFrame()

        for fold_idx, (train_index, val_index) in enumerate(skf.split(texts, labels)):
            train_texts = [texts[i] for i in train_index]
            train_labels = [labels[i] for i in train_index]
            val_texts = [texts[i] for i in val_index]
            val_labels = [labels[i] for i in val_index]
            val_cases = [cases[i] for i in val_index]

            train_dataset = TextDataset(train_texts, train_labels, tokenizer)
            val_dataset = TextDataset(val_texts, val_labels, tokenizer, cases=val_cases)

            train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, generator=torch.Generator().manual_seed(6))
            val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

            try:
                model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2).to(device)
            except:
                base = AutoModel.from_pretrained(model_name)
                model = CustomClassifier(base_model=base, hidden_size=base.config.hidden_size).to(device)

            optimizer = AdamW(model.parameters(), lr=1e-5, correct_bias=False)
            scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=len(train_loader) * 5)
            scaler = GradScaler()

            for epoch in range(5):  # Reduce epochs for speed; adjust as needed
                print(f"Model: {model_name}, Fold: {fold_idx + 1}, Epoch {epoch + 1}")
                train_acc, train_loss = train_epoch(model, train_loader, optimizer, scheduler, device, scaler)
                print(f"Train loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}")

            # Evaluation
            cased, true_labels, pred_probs = eval_model(model, val_loader, device)
            val_chunks = dataset_to_dataframe(val_dataset, tokenizer)

            fold_results = pd.DataFrame({
                "cased": val_chunks["case_id"],
                "true_label": val_chunks["label"],
                "prob0": pred_probs[:, 0],
                "prob1": pred_probs[:, 1],
            })

            result_all_folds = pd.concat([result_all_folds, fold_results], ignore_index=True)

        # Majority Voting
        voting_df = (
            result_all_folds
            .assign(pred=(result_all_folds["prob1"] > 0.5).astype(int))
            .groupby("cased")
            .agg({
                "true_label": "first",
                "pred": lambda x: x.value_counts().idxmax(),
                "prob1": "mean"
            })
            .reset_index()
            .rename(columns={"cased": "case", "true_label": "label", "prob1": "prob"})
        )

        y_true = voting_df["label"]
        y_pred = voting_df["pred"]
        y_prob = voting_df["prob"]
        y_case = voting_df["case"]
        metrics = compute_metrics(y_true, y_prob, y_pred)
        model_short = model_name.split("/")[-1]
        print(f"\n=== {model_short} on {name} ===")
        for k, v in metrics.items():
            print(f"{k}: {v:.4f}")

        metrics["Model"] = model_short
        metrics["TextType"] = name
        all_model_metrics.append(metrics)

        # Optionally save results for each model
        # voting_df.to_csv(f"./newresult/{name}_{model_short}_majority_voting_results.csv", index=False)

# # Save all metrics summary
# metrics_df = pd.DataFrame(all_model_metrics)
# metrics_df.to_csv("./all_model_metrics_summary.csv", index=False)

Using device: cuda


Token indices sequence length is longer than the specified maximum sequence length for this model (1546 > 512). Running this sequence through the model will result in indexing errors
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = GradScaler()


Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 1


  with autocast():
Training: 100%|██████████| 94/94 [00:47<00:00,  1.99it/s]


Train loss: 0.5848, Accuracy: 0.7360
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 2


Training: 100%|██████████| 94/94 [00:47<00:00,  1.98it/s]


Train loss: 0.5556, Accuracy: 0.7527
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 3


Training: 100%|██████████| 94/94 [00:47<00:00,  1.97it/s]


Train loss: 0.5331, Accuracy: 0.7527
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 4


Training: 100%|██████████| 94/94 [00:48<00:00,  1.94it/s]


Train loss: 0.5435, Accuracy: 0.7527
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 5


Training: 100%|██████████| 94/94 [00:47<00:00,  1.96it/s]


Train loss: 0.5101, Accuracy: 0.7527


Validation: 100%|██████████| 23/23 [00:04<00:00,  5.11it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 1


Training: 100%|██████████| 95/95 [00:53<00:00,  1.77it/s]


Train loss: 0.5858, Accuracy: 0.7344
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 2


Training: 100%|██████████| 95/95 [00:53<00:00,  1.78it/s]


Train loss: 0.5589, Accuracy: 0.7477
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 3


Training: 100%|██████████| 95/95 [00:53<00:00,  1.78it/s]


Train loss: 0.5304, Accuracy: 0.7477
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 4


Training: 100%|██████████| 95/95 [00:53<00:00,  1.77it/s]


Train loss: 0.5174, Accuracy: 0.7483
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 5


Training: 100%|██████████| 95/95 [00:53<00:00,  1.77it/s]


Train loss: 0.4867, Accuracy: 0.7503


Validation: 100%|██████████| 22/22 [00:04<00:00,  5.01it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 1


Training: 100%|██████████| 91/91 [00:49<00:00,  1.82it/s]


Train loss: 0.5716, Accuracy: 0.7368
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 2


Training: 100%|██████████| 91/91 [00:49<00:00,  1.82it/s]


Train loss: 0.5575, Accuracy: 0.7562
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 3


Training: 100%|██████████| 91/91 [00:50<00:00,  1.82it/s]


Train loss: 0.5510, Accuracy: 0.7569
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 4


Training: 100%|██████████| 91/91 [00:50<00:00,  1.81it/s]


Train loss: 0.5260, Accuracy: 0.7569
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 5


Training: 100%|██████████| 91/91 [00:49<00:00,  1.83it/s]


Train loss: 0.4992, Accuracy: 0.7583


Validation: 100%|██████████| 26/26 [00:05<00:00,  5.07it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 1


Training: 100%|██████████| 93/93 [01:08<00:00,  1.36it/s]


Train loss: 0.5854, Accuracy: 0.7156
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 2


Training: 100%|██████████| 93/93 [01:08<00:00,  1.35it/s]


Train loss: 0.5641, Accuracy: 0.7407
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 3


Training: 100%|██████████| 93/93 [01:08<00:00,  1.35it/s]


Train loss: 0.5650, Accuracy: 0.7407
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 4


Training: 100%|██████████| 93/93 [01:08<00:00,  1.35it/s]


Train loss: 0.5559, Accuracy: 0.7407
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 5


Training: 100%|██████████| 93/93 [01:08<00:00,  1.36it/s]


Train loss: 0.5436, Accuracy: 0.7407


Validation: 100%|██████████| 24/24 [00:04<00:00,  5.10it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 1


Training: 100%|██████████| 94/94 [00:55<00:00,  1.70it/s]


Train loss: 0.5811, Accuracy: 0.7335
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 2


Training: 100%|██████████| 94/94 [00:55<00:00,  1.69it/s]


Train loss: 0.5839, Accuracy: 0.7415
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 3


Training: 100%|██████████| 94/94 [00:56<00:00,  1.68it/s]


Train loss: 0.5703, Accuracy: 0.7415
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 4


Training: 100%|██████████| 94/94 [00:55<00:00,  1.68it/s]


Train loss: 0.5607, Accuracy: 0.7415
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 5


Training: 100%|██████████| 94/94 [00:56<00:00,  1.66it/s]


Train loss: 0.5268, Accuracy: 0.7415


Validation: 100%|██████████| 23/23 [00:04<00:00,  5.12it/s]



=== albert-base-chinese on text ===
AUC: 0.7417
F1: 0.7771
Sensitivity: 1.0000
Specificity: 0.0000
PPV: 0.6355
NPV: 0.0000


Token indices sequence length is longer than the specified maximum sequence length for this model (518 > 512). Running this sequence through the model will result in indexing errors
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = GradScaler()


Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 1


  with autocast():
Training: 100%|██████████| 43/43 [00:27<00:00,  1.57it/s]


Train loss: 0.5546, Accuracy: 0.7651
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 2


Training: 100%|██████████| 43/43 [00:26<00:00,  1.60it/s]


Train loss: 0.5359, Accuracy: 0.7489
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 3


Training: 100%|██████████| 43/43 [00:26<00:00,  1.61it/s]


Train loss: 0.4943, Accuracy: 0.7680
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 4


Training: 100%|██████████| 43/43 [00:26<00:00,  1.60it/s]


Train loss: 0.4555, Accuracy: 0.8076
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 5


Training: 100%|██████████| 43/43 [00:27<00:00,  1.59it/s]


Train loss: 0.4205, Accuracy: 0.8253


Validation: 100%|██████████| 10/10 [00:02<00:00,  5.00it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 1


Training: 100%|██████████| 44/44 [00:32<00:00,  1.37it/s]


Train loss: 0.5854, Accuracy: 0.7157
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 2


Training: 100%|██████████| 44/44 [00:32<00:00,  1.36it/s]


Train loss: 0.5231, Accuracy: 0.7648
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 3


Training: 100%|██████████| 44/44 [00:32<00:00,  1.34it/s]


Train loss: 0.5266, Accuracy: 0.7561
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 4


Training: 100%|██████████| 44/44 [00:32<00:00,  1.36it/s]


Train loss: 0.5009, Accuracy: 0.7835
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 5


Training: 100%|██████████| 44/44 [00:32<00:00,  1.34it/s]


Train loss: 0.4703, Accuracy: 0.7908


Validation: 100%|██████████| 10/10 [00:01<00:00,  5.45it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 1


Training: 100%|██████████| 41/41 [00:26<00:00,  1.55it/s]


Train loss: 0.5472, Accuracy: 0.7601
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 2


Training: 100%|██████████| 41/41 [00:26<00:00,  1.52it/s]


Train loss: 0.4829, Accuracy: 0.8006
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 3


Training: 100%|██████████| 41/41 [00:26<00:00,  1.52it/s]


Train loss: 0.4642, Accuracy: 0.8022
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 4


Training: 100%|██████████| 41/41 [00:27<00:00,  1.51it/s]


Train loss: 0.4642, Accuracy: 0.8131
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 5


Training: 100%|██████████| 41/41 [00:26<00:00,  1.52it/s]


Train loss: 0.4261, Accuracy: 0.8209


Validation: 100%|██████████| 13/13 [00:02<00:00,  5.28it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 1


Training: 100%|██████████| 43/43 [00:31<00:00,  1.36it/s]


Train loss: 0.5873, Accuracy: 0.7134
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 2


Training: 100%|██████████| 43/43 [00:32<00:00,  1.34it/s]


Train loss: 0.5457, Accuracy: 0.7518
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 3


Training: 100%|██████████| 43/43 [00:32<00:00,  1.34it/s]


Train loss: 0.5423, Accuracy: 0.7607
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 4


Training: 100%|██████████| 43/43 [00:31<00:00,  1.34it/s]


Train loss: 0.5245, Accuracy: 0.7651
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 5


Training: 100%|██████████| 43/43 [00:32<00:00,  1.34it/s]


Train loss: 0.5341, Accuracy: 0.7770


Validation: 100%|██████████| 11/11 [00:02<00:00,  5.42it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 1


Training: 100%|██████████| 42/42 [00:25<00:00,  1.67it/s]


Train loss: 0.5601, Accuracy: 0.7168
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 2


Training: 100%|██████████| 42/42 [00:25<00:00,  1.66it/s]


Train loss: 0.5186, Accuracy: 0.7765
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 3


Training: 100%|██████████| 42/42 [00:25<00:00,  1.66it/s]


Train loss: 0.5049, Accuracy: 0.7765
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 4


Training: 100%|██████████| 42/42 [00:25<00:00,  1.66it/s]


Train loss: 0.4875, Accuracy: 0.7750
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 5


Training: 100%|██████████| 42/42 [00:25<00:00,  1.66it/s]


Train loss: 0.4833, Accuracy: 0.7779


Validation: 100%|██████████| 11/11 [00:02<00:00,  5.23it/s]



=== albert-base-chinese on interviewee_text ===
AUC: 0.7061
F1: 0.7765
Sensitivity: 0.8684
Specificity: 0.3578
PPV: 0.7021
NPV: 0.6094


Token indices sequence length is longer than the specified maximum sequence length for this model (2350 > 512). Running this sequence through the model will result in indexing errors
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = GradScaler()


Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 1


  with autocast():
Training: 100%|██████████| 82/82 [00:45<00:00,  1.78it/s]


Train loss: 0.5895, Accuracy: 0.7215
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 2


Training: 100%|██████████| 82/82 [00:46<00:00,  1.77it/s]


Train loss: 0.5702, Accuracy: 0.7353
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 3


Training: 100%|██████████| 82/82 [00:46<00:00,  1.77it/s]


Train loss: 0.5629, Accuracy: 0.7368
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 4


Training: 100%|██████████| 82/82 [00:45<00:00,  1.79it/s]


Train loss: 0.5463, Accuracy: 0.7399
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 5


Training: 100%|██████████| 82/82 [00:46<00:00,  1.77it/s]


Train loss: 0.5225, Accuracy: 0.7429


Validation: 100%|██████████| 18/18 [00:03<00:00,  5.14it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 1


Training: 100%|██████████| 82/82 [00:51<00:00,  1.60it/s]


Train loss: 0.6203, Accuracy: 0.6925
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 2


Training: 100%|██████████| 82/82 [00:51<00:00,  1.59it/s]


Train loss: 0.5961, Accuracy: 0.7202
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 3


Training: 100%|██████████| 82/82 [00:51<00:00,  1.59it/s]


Train loss: 0.5773, Accuracy: 0.7218
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 4


Training: 100%|██████████| 82/82 [00:51<00:00,  1.59it/s]


Train loss: 0.5474, Accuracy: 0.7325
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 5


Training: 100%|██████████| 82/82 [00:51<00:00,  1.59it/s]


Train loss: 0.5139, Accuracy: 0.7679


Validation: 100%|██████████| 18/18 [00:03<00:00,  5.03it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 1


Training: 100%|██████████| 77/77 [00:57<00:00,  1.34it/s]


Train loss: 0.6031, Accuracy: 0.7281
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 2


Training: 100%|██████████| 77/77 [00:57<00:00,  1.34it/s]


Train loss: 0.5820, Accuracy: 0.7379
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 3


Training: 100%|██████████| 77/77 [00:56<00:00,  1.36it/s]


Train loss: 0.5666, Accuracy: 0.7379
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 4


Training: 100%|██████████| 77/77 [00:58<00:00,  1.33it/s]


Train loss: 0.5336, Accuracy: 0.7379
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 5


Training: 100%|██████████| 77/77 [00:57<00:00,  1.34it/s]


Train loss: 0.4910, Accuracy: 0.7838


Validation: 100%|██████████| 23/23 [00:04<00:00,  5.00it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 1


Training: 100%|██████████| 79/79 [01:05<00:00,  1.20it/s]


Train loss: 0.6104, Accuracy: 0.6986
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 2


Training: 100%|██████████| 79/79 [01:06<00:00,  1.19it/s]


Train loss: 0.5869, Accuracy: 0.7170
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 3


Training: 100%|██████████| 79/79 [01:07<00:00,  1.18it/s]


Train loss: 0.5847, Accuracy: 0.7186
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 4


Training: 100%|██████████| 79/79 [01:06<00:00,  1.19it/s]


Train loss: 0.5684, Accuracy: 0.7290
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 5


Training: 100%|██████████| 79/79 [01:05<00:00,  1.20it/s]


Train loss: 0.5518, Accuracy: 0.7386


Validation: 100%|██████████| 21/21 [00:04<00:00,  5.00it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 1


Training: 100%|██████████| 80/80 [00:48<00:00,  1.66it/s]


Train loss: 0.6287, Accuracy: 0.6822
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 2


Training: 100%|██████████| 80/80 [00:48<00:00,  1.65it/s]


Train loss: 0.5995, Accuracy: 0.7271
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 3


Training: 100%|██████████| 80/80 [00:48<00:00,  1.65it/s]


Train loss: 0.5793, Accuracy: 0.7279
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 4


Training: 100%|██████████| 80/80 [00:48<00:00,  1.65it/s]


Train loss: 0.5753, Accuracy: 0.7279
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 5


Training: 100%|██████████| 80/80 [00:47<00:00,  1.67it/s]


Train loss: 0.5450, Accuracy: 0.7248


Validation: 100%|██████████| 20/20 [00:03<00:00,  5.00it/s]



=== albert-base-chinese on new_text ===
AUC: 0.6744
F1: 0.7667
Sensitivity: 0.9684
Specificity: 0.0275
PPV: 0.6345
NPV: 0.3333


Token indices sequence length is longer than the specified maximum sequence length for this model (1475 > 512). Running this sequence through the model will result in indexing errors
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = GradScaler()


Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 1


  with autocast():
Training: 100%|██████████| 43/43 [00:31<00:00,  1.35it/s]


Train loss: 0.5786, Accuracy: 0.7137
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 2


Training: 100%|██████████| 43/43 [00:33<00:00,  1.30it/s]


Train loss: 0.5630, Accuracy: 0.7442
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 3


Training: 100%|██████████| 43/43 [00:33<00:00,  1.30it/s]


Train loss: 0.5564, Accuracy: 0.7442
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 4


Training: 100%|██████████| 43/43 [00:32<00:00,  1.31it/s]


Train loss: 0.5430, Accuracy: 0.7442
Model: ckiplab/albert-base-chinese, Fold: 1, Epoch 5


Training: 100%|██████████| 43/43 [00:32<00:00,  1.30it/s]


Train loss: 0.5285, Accuracy: 0.7442


Validation: 100%|██████████| 10/10 [00:01<00:00,  5.07it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 1


Training: 100%|██████████| 45/45 [00:27<00:00,  1.66it/s]


Train loss: 0.5779, Accuracy: 0.7336
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 2


Training: 100%|██████████| 45/45 [00:27<00:00,  1.66it/s]


Train loss: 0.5609, Accuracy: 0.7406
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 3


Training: 100%|██████████| 45/45 [00:26<00:00,  1.67it/s]


Train loss: 0.5417, Accuracy: 0.7462
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 4


Training: 100%|██████████| 45/45 [00:26<00:00,  1.67it/s]


Train loss: 0.5357, Accuracy: 0.7587
Model: ckiplab/albert-base-chinese, Fold: 2, Epoch 5


Training: 100%|██████████| 45/45 [00:26<00:00,  1.67it/s]


Train loss: 0.5167, Accuracy: 0.7601


Validation: 100%|██████████| 9/9 [00:01<00:00,  5.63it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 1


Training: 100%|██████████| 40/40 [00:25<00:00,  1.59it/s]


Train loss: 0.5609, Accuracy: 0.7203
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 2


Training: 100%|██████████| 40/40 [00:25<00:00,  1.56it/s]


Train loss: 0.5320, Accuracy: 0.7734
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 3


Training: 100%|██████████| 40/40 [00:25<00:00,  1.55it/s]


Train loss: 0.5215, Accuracy: 0.7656
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 4


Training: 100%|██████████| 40/40 [00:26<00:00,  1.53it/s]


Train loss: 0.4814, Accuracy: 0.7828
Model: ckiplab/albert-base-chinese, Fold: 3, Epoch 5


Training: 100%|██████████| 40/40 [00:25<00:00,  1.56it/s]


Train loss: 0.4464, Accuracy: 0.7828


Validation: 100%|██████████| 13/13 [00:02<00:00,  5.06it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 1


Training: 100%|██████████| 42/42 [00:23<00:00,  1.77it/s]


Train loss: 0.5869, Accuracy: 0.7046
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 2


Training: 100%|██████████| 42/42 [00:23<00:00,  1.77it/s]


Train loss: 0.5937, Accuracy: 0.7241
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 3


Training: 100%|██████████| 42/42 [00:23<00:00,  1.78it/s]


Train loss: 0.5914, Accuracy: 0.7256
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 4


Training: 100%|██████████| 42/42 [00:23<00:00,  1.76it/s]


Train loss: 0.5852, Accuracy: 0.7256
Model: ckiplab/albert-base-chinese, Fold: 4, Epoch 5


Training: 100%|██████████| 42/42 [00:23<00:00,  1.78it/s]


Train loss: 0.5814, Accuracy: 0.7256


Validation: 100%|██████████| 12/12 [00:02<00:00,  5.39it/s]
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ckiplab/albert-base-chinese and are newly initialized: ['albert.pooler.bias', 'albert.pooler.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 1


Training: 100%|██████████| 42/42 [00:23<00:00,  1.81it/s]


Train loss: 0.5864, Accuracy: 0.7321
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 2


Training: 100%|██████████| 42/42 [00:22<00:00,  1.83it/s]


Train loss: 0.5631, Accuracy: 0.7351
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 3


Training: 100%|██████████| 42/42 [00:22<00:00,  1.83it/s]


Train loss: 0.5161, Accuracy: 0.7619
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 4


Training: 100%|██████████| 42/42 [00:23<00:00,  1.82it/s]


Train loss: 0.4402, Accuracy: 0.7932
Model: ckiplab/albert-base-chinese, Fold: 5, Epoch 5


Training: 100%|██████████| 42/42 [00:22<00:00,  1.83it/s]


Train loss: 0.3436, Accuracy: 0.8586


Validation: 100%|██████████| 11/11 [00:02<00:00,  5.07it/s]


=== albert-base-chinese on new_interviewee_text ===
AUC: 0.6537
F1: 0.7748
Sensitivity: 0.9053
Specificity: 0.2477
PPV: 0.6772
NPV: 0.6000





In [17]:
def compute_metrics(y_true, y_prob, y_pred):
    auc = roc_auc_score(y_true, y_prob)
    f1 = f1_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    specificity = tn / (tn + fp)
    npv = tn / (tn + fn) if (tn + fn) > 0 else 0.0
    return {
        "AUC": auc,
        "F1": f1,
        "Sensitivity": recall,
        "Specificity": specificity,
        "PPV": precision,
        "NPV": npv
    }
data=pd.read_csv('./newresult/text_albert-base-chinese_majority_voting_results.csv')
compute_metrics(data['label'], data['prob'], data['pred'])


{'AUC': np.float64(0.7530661516175761),
 'F1': 0.7903930131004366,
 'Sensitivity': 0.9526315789473684,
 'Specificity': np.float64(0.2018348623853211),
 'PPV': 0.6753731343283582,
 'NPV': np.float64(0.7096774193548387)}