In [1]:
from transformers import ElectraTokenizerFast, ElectraForSequenceClassification, Trainer, TrainingArguments
import torch
from torch.utils.data import Dataset
import numpy as np


  from .autonotebook import tqdm as notebook_tqdm





In [None]:


# ===============================================================
# ELECTRA SMALL â€“ FULL TRAINING PIPELINE (ONE RUN)
# ===============================================================

# 1. Imports
from transformers import ElectraTokenizerFast, ElectraForSequenceClassification, Trainer, TrainingArguments
import torch
from torch.utils.data import Dataset
import numpy as np

# 2. Load tokenizer
tokenizer_bert = ElectraTokenizerFast.from_pretrained("google/electra-small-discriminator")

# 3. Tokenize dataset
train_encodings = tokenizer_bert(
    X_train_bert.tolist(),
    truncation=True,
    padding=True,
    max_length=256
)

test_encodings = tokenizer_bert(
    X_test_bert.tolist(),
    truncation=True,
    padding=True,
    max_length=256
)

# 4. PyTorch Dataset Class
class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(int(self.labels[idx]))  # int32 label
        return item

    def __len__(self):
        return len(self.labels)

# 5. Create train/test datasets
train_dataset = NewsDataset(train_encodings, y_train_bert.values)
test_dataset  = NewsDataset(test_encodings,  y_test_bert.values)

# 6. Load ELECTRA SMALL model
model = ElectraForSequenceClassification.from_pretrained(
    "google/electra-small-discriminator",
    num_labels=2
)

# 7. Training arguments
training_args = TrainingArguments(
    output_dir="./electra_results",
    num_train_epochs=2,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    logging_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True
)

# 8. Trainer object
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset
)

# 9. Train model
trainer.train()

# 10. Evaluate model
eval_results = trainer.evaluate()
print("ELECTRA Evaluation:", eval_results)

# 11. Save model + tokenizer
model.save_pretrained("../models/electra_model")
tokenizer_bert.save_pretrained("../models/electra_tokenizer")

# ===============================================================
# END OF ONE-RUN ELECTRA PIPELINE
# ===============================================================
