In [1]:
#pip install -q datasets

In [2]:
#pip install -q evaluate

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from datasets import Dataset
import evaluate
import numpy as np
from sklearn.model_selection import KFold
import torch

In [2]:
import transformers
print(transformers.__version__)

4.56.1


In [3]:
import os
os.environ["WANDB_DISABLED"] = "true"

from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer

In [4]:
df = pd.read_csv("clickbait_title_classification.csv")
df = df.rename(columns={"clickbait": "labels"})

In [5]:
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [6]:
def tokenize_fn(examples):
    return tokenizer(
        examples["title"],
        padding="max_length",
        truncation=True,
        max_length=128,
    )

In [7]:
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")
f1 = evaluate.load("f1")
roc_auc = evaluate.load("roc_auc")

def compute_metrics(eval_pred):
    logits, labels = eval_pred

    # Convert logits to predicted class
    preds = np.argmax(logits, axis=-1)

    # Convert logits to probabilities for ROC AUC
    probs = 1 / (1 + np.exp(-logits[:, 1]))  # sigmoid on class 1 logits

    return {
        "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "precision": precision.compute(predictions=preds, references=labels, average="binary")["precision"],
        "recall": recall.compute(predictions=preds, references=labels, average="binary")["recall"],
        "f1": f1.compute(predictions=preds, references=labels, average="binary")["f1"],
        "roc_auc": roc_auc.compute(prediction_scores=probs, references=labels)["roc_auc"]
    }

In [10]:
#pip show transformers accelerate torch

In [8]:
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

for param in model.bert.parameters():
        param.requires_grad = False
    
for name, param in model.named_parameters():
    print(name, param.requires_grad)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


bert.embeddings.word_embeddings.weight False
bert.embeddings.position_embeddings.weight False
bert.embeddings.token_type_embeddings.weight False
bert.embeddings.LayerNorm.weight False
bert.embeddings.LayerNorm.bias False
bert.encoder.layer.0.attention.self.query.weight False
bert.encoder.layer.0.attention.self.query.bias False
bert.encoder.layer.0.attention.self.key.weight False
bert.encoder.layer.0.attention.self.key.bias False
bert.encoder.layer.0.attention.self.value.weight False
bert.encoder.layer.0.attention.self.value.bias False
bert.encoder.layer.0.attention.output.dense.weight False
bert.encoder.layer.0.attention.output.dense.bias False
bert.encoder.layer.0.attention.output.LayerNorm.weight False
bert.encoder.layer.0.attention.output.LayerNorm.bias False
bert.encoder.layer.0.intermediate.dense.weight False
bert.encoder.layer.0.intermediate.dense.bias False
bert.encoder.layer.0.output.dense.weight False
bert.encoder.layer.0.output.dense.bias False
bert.encoder.layer.0.output.Lay

In [9]:
kf = KFold(n_splits=10, shuffle=True, random_state=42)
fold_results = []

for fold, (train_val_index, test_index) in enumerate(kf.split(df)):
    print(f"=== Fold {fold+1} ===")

    test_df = df.iloc[test_index]

    # Remaining 9 folds for training+validation
    train_val_df = df.iloc[train_val_index]

    # --------------------------
    # Internal validation fold (8/1 split)
    # --------------------------
    train_df, val_df = train_test_split(train_val_df, test_size=1/9, random_state=42, stratify=train_val_df["labels"])

    # --------------------------
    # Convert to Hugging Face Datasets
    # --------------------------
    train_ds = Dataset.from_pandas(train_df)
    val_ds   = Dataset.from_pandas(val_df)
    test_ds  = Dataset.from_pandas(test_df)

    # --------------------------
    # Tokenize datasets
    # --------------------------
    train_ds = train_ds.map(tokenize_fn, batched=True)
    val_ds   = val_ds.map(tokenize_fn, batched=True)
    test_ds  = test_ds.map(tokenize_fn, batched=True)

    for ds in [train_ds, val_ds, test_ds]:
        ds.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

    #----------------------------------------------------------------------------------    
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    for param in model.bert.parameters():
        param.requires_grad = False
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    #-----------------------------------------------------------------------------------

    training_args = TrainingArguments(
        output_dir=f"./results/fold_{fold+1}",
        num_train_epochs=3,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        eval_strategy="epoch",
        save_strategy="epoch",
        learning_rate=2e-4,
        weight_decay=0.01,
        load_best_model_at_end=True,
        logging_steps=10,
        fp16=True,
        report_to="none",
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    trainer.train()
    metrics = trainer.evaluate(test_ds)
    fold_results.append(metrics)
    print(f"\n Fold {fold+1} complete.")

=== Fold 1 ===


Map:   0%|          | 0/25600 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Roc Auc
1,0.3057,0.203647,0.93625,0.935122,0.937461,0.93629,0.982516
2,0.1976,0.17858,0.942187,0.959091,0.923702,0.941064,0.986049
3,0.2253,0.163655,0.94875,0.952111,0.944966,0.948525,0.987155



 Fold 1 complete.
=== Fold 2 ===


Map:   0%|          | 0/25600 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Roc Auc
1,0.2488,0.216972,0.9325,0.936869,0.9275,0.932161,0.977428
2,0.2198,0.186483,0.939688,0.95183,0.92625,0.938866,0.982694
3,0.1913,0.174763,0.943438,0.945386,0.94125,0.943313,0.983846



 Fold 2 complete.
=== Fold 3 ===


Map:   0%|          | 0/25600 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Roc Auc
1,0.2397,0.214993,0.929688,0.928349,0.93125,0.929797,0.978492
2,0.192,0.181589,0.93625,0.933002,0.94,0.936488,0.983064
3,0.1906,0.174734,0.9375,0.939147,0.935625,0.937383,0.984165



 Fold 3 complete.
=== Fold 4 ===


Map:   0%|          | 0/25600 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Roc Auc
1,0.237,0.21074,0.928438,0.941935,0.913071,0.927279,0.98034
2,0.2489,0.171354,0.94375,0.9482,0.938712,0.943432,0.986489
3,0.2706,0.162868,0.946875,0.949089,0.94434,0.946708,0.987472



 Fold 4 complete.
=== Fold 5 ===


Map:   0%|          | 0/25600 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Roc Auc
1,0.2403,0.211845,0.92875,0.92542,0.932373,0.928883,0.978731
2,0.2127,0.182574,0.934375,0.942565,0.924859,0.933628,0.983834
3,0.1849,0.170115,0.938438,0.938596,0.938009,0.938303,0.985209



 Fold 5 complete.
=== Fold 6 ===


Map:   0%|          | 0/25600 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Roc Auc
1,0.2297,0.21547,0.927813,0.916109,0.941875,0.928814,0.97901
2,0.224,0.182523,0.9375,0.934243,0.94125,0.937733,0.983568
3,0.2055,0.175282,0.94,0.941103,0.93875,0.939925,0.984635



 Fold 6 complete.
=== Fold 7 ===


Map:   0%|          | 0/25600 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Roc Auc
1,0.277,0.211603,0.929063,0.939744,0.916823,0.928142,0.980522
2,0.2119,0.181546,0.935312,0.952536,0.916198,0.934013,0.985049
3,0.1995,0.169763,0.942187,0.950318,0.933083,0.941622,0.986324



 Fold 7 complete.
=== Fold 8 ===


Map:   0%|          | 0/25600 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Roc Auc
1,0.2307,0.215213,0.930625,0.930582,0.930582,0.930582,0.979252
2,0.2412,0.18478,0.936562,0.94801,0.923702,0.935698,0.984048
3,0.1823,0.173354,0.939375,0.946599,0.931207,0.93884,0.985325



 Fold 8 complete.
=== Fold 9 ===


Map:   0%|          | 0/25600 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Roc Auc
1,0.2453,0.203488,0.935312,0.910483,0.965646,0.937254,0.982983
2,0.2323,0.16688,0.945312,0.940123,0.95128,0.945669,0.987107
3,0.2242,0.160157,0.9475,0.948654,0.946284,0.947467,0.988099



 Fold 9 complete.
=== Fold 10 ===


Map:   0%|          | 0/25600 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Roc Auc
1,0.2347,0.239092,0.911563,0.942953,0.876482,0.908503,0.975241
2,0.1914,0.186116,0.934063,0.932298,0.936369,0.934329,0.982157
3,0.2332,0.179043,0.93625,0.938009,0.934498,0.93625,0.98339



 Fold 10 complete.


In [10]:
results_df = pd.DataFrame(fold_results)
print(results_df.mean())

eval_loss                     0.167488
eval_accuracy                 0.942469
eval_precision                0.944246
eval_recall                   0.940520
eval_f1                       0.942363
eval_roc_auc                  0.986352
eval_runtime                  3.286270
eval_samples_per_second    1047.401700
eval_steps_per_second        65.462700
epoch                         3.000000
dtype: float64


In [11]:
metrics_names = ["eval_accuracy", "eval_precision", "eval_recall", "eval_f1", "eval_roc_auc"]

for metric in metrics_names:
    values = [fold[metric] for fold in fold_results]
    avg = np.mean(values)
    std = np.std(values)
    print(f"{metric}: {avg:.4f} ± {std:.4f}")


eval_accuracy: 0.9425 ± 0.0045
eval_precision: 0.9442 ± 0.0049
eval_recall: 0.9405 ± 0.0066
eval_f1: 0.9424 ± 0.0043
eval_roc_auc: 0.9864 ± 0.0020
