In [1]:
!pip install transformers datasets scikit-learn pandas torch



In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from datasets import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments, EvalPrediction
import torch
import time

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"

df = pd.read_csv("data/dataset.csv")
df["humor"] = df["humor"].astype(int)
print(df.head())

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df["humor"])
train_ds = Dataset.from_pandas(train_df)
test_ds = Dataset.from_pandas(test_df)

                                                text  humor
0  Joe biden rules out 2020 bid: 'guys, i'm not r...      0
1  Watch: darvish gave hitter whiplash with slow ...      0
2  What do you call a turtle without its shell? d...      1
3      5 reasons the 2016 election feels so personal      0
4  Pasco police shot mexican migrant from behind,...      0


In [4]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

def tokenize_fn(batch):
    return tokenizer(batch["text"], truncation=True, padding="max_length", max_length=64)

train_ds = train_ds.map(tokenize_fn, batched=True)
test_ds = test_ds.map(tokenize_fn, batched=True)
train_ds = train_ds.rename_column("humor", "labels")
test_ds = test_ds.rename_column("humor", "labels")

train_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])




Map: 100%|██████████| 160000/160000 [00:25<00:00, 6325.93 examples/s]
Map: 100%|██████████| 40000/40000 [00:06<00:00, 6313.13 examples/s]


In [5]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2).to(device)

def compute_metrics(p: EvalPrediction):
    preds = p.predictions.argmax(-1)
    labels = p.label_ids
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds)
    }

training_args = TrainingArguments(
    output_dir="./bert-humor",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50,
    fp16=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds,
    compute_metrics=compute_metrics,
)



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
start_time = time.time()
if torch.cuda.is_available():
    torch.cuda.reset_peak_memory_stats()
trainer.train()
end_time = time.time()

if torch.cuda.is_available():
    peak_gpu_memory_gb = torch.cuda.max_memory_allocated() / 1024**3 

results = trainer.evaluate()

print(f"Training time: {end_time - start_time:.2f} seconds")
if torch.cuda.is_available():
    print(f"Peak GPU memory usage: {peak_gpu_memory_gb:.2f} GB")
print("Performance metrics:", results)

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0429,0.061654,0.98805,0.98804,0.988881,0.9872
2,0.0114,0.073127,0.988375,0.988334,0.991793,0.9849
3,0.0001,0.080237,0.9891,0.989061,0.992647,0.9855




Training time: 8362.04 seconds
Performance metrics: {'eval_loss': 0.08023671805858612, 'eval_accuracy': 0.9891, 'eval_f1': 0.9890606182256122, 'eval_precision': 0.9926470588235294, 'eval_recall': 0.9855, 'eval_runtime': 148.0888, 'eval_samples_per_second': 270.108, 'eval_steps_per_second': 8.441, 'epoch': 3.0}
