In [2]:
import random
import os
import json
import numpy as np
import torch
from torch.utils.data import DataLoader
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report
from transformers import ElectraTokenizer, ElectraForSequenceClassification, AutoConfig, DataCollatorWithPadding, AdamW

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

def set_seed(seed_value):
    """Set seed for reproducibility"""
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)

def tokenize_function(examples, tokenizer, max_length):
    """Tokenize the input text"""
    return tokenizer(examples["text"], truncation=True, max_length=max_length)

def evaluate(dataloader, model):
    """Evaluate the model on a given dataloader"""
    ground_truth = []
    preds = []

    model.eval()
    for batch in dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        labels = batch.pop("labels")
        labels = labels.detach().cpu().numpy()

        with torch.no_grad():
            outputs = model(**batch)
            logits = outputs.logits
            pred = torch.argmax(logits, dim=-1).detach().cpu().numpy()

        ground_truth.extend(labels.tolist())
        preds.extend(pred.tolist())

    acc = accuracy_score(ground_truth, preds)
    return acc

def train(train_dataloader, val_dataloader, model, optimizer, epochs):
    """Train the model"""
    max_acc = 0
    for epoch in range(epochs):
        model.train()
        for batch in train_dataloader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch, output_hidden_states=True)
            loss = outputs.loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        acc = evaluate(val_dataloader, model)
        if acc > max_acc:
            print(f'Validation Accuracy: from {max_acc} to {acc}')
            max_acc = acc

    return model

set_seed(1234)
model_checkpoint = "google/electra-base-discriminator"
max_length = 512
epochs = 5
batch_size = 8
lr = 5e-5

prompt = 1
seed = 1

train_path = f'prompt{prompt}_seed{seed}_train.csv'
val_path = f'prompt{prompt}_seed{seed}_val.csv'
test_path = f'prompt{prompt}_seed{seed}_test.csv'

config = AutoConfig.from_pretrained(
    model_checkpoint,
    label2id={'human': 0, 'chatgpt': 1},
    id2label={0: 'human', 1: 'chatgpt'}
)

tokenizer = ElectraTokenizer.from_pretrained(
    model_checkpoint,
    padding=True,
    truncation=True,
    model_max_length=max_length
)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

raw_datasets = load_dataset("csv", data_files={"train": train_path, "val": val_path, "test": test_path})

tokenized_datasets = raw_datasets.map(
    lambda x: tokenize_function(x, tokenizer, max_length),
    batched=True,
    remove_columns=['text']
)

model = ElectraForSequenceClassification.from_pretrained(
    model_checkpoint, config=config
)
model.to(device)

optimizer = AdamW(model.parameters(), lr=lr)

train_dataloader = DataLoader(
    tokenized_datasets["train"],
    batch_size=batch_size,
    collate_fn=data_collator,
    shuffle=True
)

val_dataloader = DataLoader(
    tokenized_datasets["val"],
    batch_size=batch_size,
    collate_fn=data_collator
)

test_dataloader = DataLoader(
    tokenized_datasets["test"],
    batch_size=batch_size,
    collate_fn=data_collator
)

model = train(train_dataloader, val_dataloader, model, optimizer, epochs)

test_acc = evaluate(test_dataloader, model)
print(f'Test Accuracy: {test_acc}')

ground_truth, preds = [], []
model.eval()
for batch in test_dataloader:
    batch = {k: v.to(device) for k, v in batch.items()}
    labels = batch.pop("labels")
    labels = labels.detach().cpu().numpy()

    with torch.no_grad():
        outputs = model(**batch)
        logits = outputs.logits
        pred = torch.argmax(logits, dim=-1).detach().cpu().numpy()

    ground_truth.extend(labels.tolist())
    preds.extend(pred.tolist())

print("Unique labels in ground truth:", np.unique(ground_truth))
print("Unique labels in predictions:", np.unique(preds))

print(classification_report(ground_truth, preds, digits=3))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/666 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating val split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/3080 [00:00<?, ? examples/s]

Map:   0%|          | 0/440 [00:00<?, ? examples/s]

Map:   0%|          | 0/880 [00:00<?, ? examples/s]

pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: from 0 to 0.975
Test Accuracy: 0.9784090909090909
Unique labels in ground truth: [0 1]
Unique labels in predictions: [0 1]
              precision    recall  f1-score   support

           0      0.998     0.959     0.978       440
           1      0.961     0.998     0.979       440

    accuracy                          0.978       880
   macro avg      0.979     0.978     0.978       880
weighted avg      0.979     0.978     0.978       880

