In [None]:
import os
os.environ["TRANSFORMERS_NO_TF"] = "1"

import pandas as pd
import torch
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    TrainingArguments,
    Trainer,
    TrainerCallback,
    is_torch_available
)
from datasets import Dataset
from transformers.trainer_utils import get_last_checkpoint
import logging

# ✅ Logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
     
# ✅ STEP 1: Load & Prepare Dataset
df = pd.read_csv("cuad_final.csv")

label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["clause_type"])

# Save label map
label_map_df = pd.DataFrame({
    "id": list(range(len(label_encoder.classes_))),
    "clause_type": label_encoder.classes_
})
label_map_df.to_csv("label_mapping.csv", index=False)

# ✅ STEP 2: Tokenizer & Dataset
tokenizer = BertTokenizer.from_pretrained("nlpaueb/legal-bert-base-uncased")

def tokenize(batch):
    return tokenizer(batch["answer_text"], padding="max_length", truncation=True, max_length=256)

dataset = Dataset.from_pandas(df[["answer_text", "label"]])
dataset = dataset.train_test_split(test_size=0.2)
dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

# ✅ STEP 3: Load Model
model = BertForSequenceClassification.from_pretrained(
    "nlpaueb/legal-bert-base-uncased",
    num_labels=len(label_encoder.classes_)
)
model.gradient_checkpointing_enable()

# ✅ STEP 4: Metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = torch.tensor(logits).argmax(axis=1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="weighted")
    return {"accuracy": acc, "f1": f1}

# ✅ STEP 5: Training Arguments
training_args = TrainingArguments(
    output_dir="./legalbert_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=3e-5,  #  Slightly lower learning rate
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=4,          #  Increase epochs for better learning
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    save_total_limit=1
)

# ✅ STEP 6: Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# ✅ Allow all required NumPy types for PyTorch 2.6 unpickling
import torch.serialization
import numpy as np

torch.serialization.add_safe_globals([
    np.ndarray,
    np.dtype,
    np.generic,
    np.float64,
    np.int64,
    np.uint32,
    np.dtypes.UInt32DType
])

# Check for existing checkpoint (if any)
output_dir = "./legalbert_model"
last_checkpoint = None

if os.path.isdir(output_dir) and os.listdir(output_dir):
    checkpoints = [os.path.join(output_dir, d) for d in os.listdir(output_dir) if d.startswith("checkpoint")]
    if checkpoints:
        last_checkpoint = sorted(checkpoints)[-1]  # Get most recent checkpoint

# ✅ Resume from checkpoint 
if last_checkpoint is not None:
    logger.info(f"🔁 Resuming training from checkpoint: {last_checkpoint}")
    trainer.train(resume_from_checkpoint=last_checkpoint)
else:
    logger.info("🚀 Starting training from scratch...")
    trainer.train()


# ✅ STEP 8: Evaluate
metrics = trainer.evaluate()
print(f"✅ Final Accuracy: {metrics['eval_accuracy']:.4f}")
print(f"✅ Final F1 Score: {metrics['eval_f1']:.4f}")
print(f"📉 Eval Loss: {metrics['eval_loss']:.4f}")

# ✅ STEP 9: Save model
model.save_pretrained("./legalbert_model")
tokenizer.save_pretrained("./legalbert_model")
print("✅ Model and tokenizer saved!")



  from .autonotebook import tqdm as notebook_tqdm
Map: 100%|██████████| 3787/3787 [00:07<00:00, 539.32 examples/s]
Map: 100%|██████████| 947/947 [00:01<00:00, 524.78 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/legal-bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
INFO:__main__:🔁 Resuming training from checkpoint: ./legalbert_model\checkpoint-3788
  0%|          | 0/3788 [00:00<?, ?it/s]


{'train_runtime': 0.2317, 'train_samples_per_second': 65389.233, 'train_steps_per_second': 16351.625, 'train_loss': 0.0, 'epoch': 4.0}


100%|██████████| 237/237 [03:41<00:00,  1.07it/s]


✅ Final Accuracy: 0.9187
✅ Final F1 Score: 0.9091
📉 Eval Loss: 0.2217
✅ Model and tokenizer saved!
