In [None]:
# 1. Imports
import json
import wandb
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from datasets import Dataset, DatasetDict
from evaluate import load as load_metric
from typing import Dict, List

# 🧠 Enable W&B
wandb.login(key="") #login and add your key

  from .autonotebook import tqdm as notebook_tqdm
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/taz/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mtasdeequerockz[0m ([33mtaz-jec[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
# 2. Load CUAD data
with open("CUAD_v1/CUAD_v1.json") as f:
    raw = json.load(f)

In [None]:
# 3. Prepare data list
data = []
for doc in raw["data"]:
    for para in doc["paragraphs"]:
        context = para["context"]
        for qa in para["qas"]:
            label = qa["question"]
            # We only care about answers that are present
            if not qa['is_impossible']:
                for answer in qa["answers"]:
                    data.append({
                        "clause": answer["text"],  # The extracted clause span
                        "context": context,      # The full paragraph/context
                        "label": label
                    })

In [None]:
# 4. Encode labels
le = LabelEncoder()
clauses = [d["clause"] for d in data]
contexts = [d["context"] for d in data]
labels = le.fit_transform([d["label"] for d in data])
label2id = {l: int(i) for i, l in enumerate(le.classes_)}
id2label = {int(i): l for i, l in enumerate(le.classes_)}

In [None]:
# 5. Stratified split (including context)
train_clauses, val_clauses, train_contexts, val_contexts, y_train, y_val = train_test_split(
    clauses, contexts, labels, test_size=0.2, stratify=labels, random_state=42
)

In [None]:
# 6. Convert to HuggingFace datasets
train_texts = [f"{clause} [SEP] {context}" for clause, context in zip(train_clauses, train_contexts)]
val_texts = [f"{clause} [SEP] {context}" for clause, context in zip(val_clauses, val_contexts)]

train_dataset = Dataset.from_dict({"text": train_texts, "label": y_train})
val_dataset = Dataset.from_dict({"text": val_texts, "label": y_val})
dataset = DatasetDict({"train": train_dataset, "validation": val_dataset})

In [None]:
# 7. Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("./legal-bert")

def preprocess(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)

encoded_dataset = dataset.map(preprocess, batched=True)

Map: 100%|██████████| 11058/11058 [01:55<00:00, 95.84 examples/s] 
Map: 100%|██████████| 2765/2765 [00:27<00:00, 100.90 examples/s]


In [None]:
# 8. Load model
model = AutoModelForSequenceClassification.from_pretrained(
    "./legal-bert",
    num_labels=len(label2id),
    id2label=id2label,
    label2id=label2id
)

In [None]:
# 9. Metrics
metric = load_metric("f1")

def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    return {
        "accuracy": (preds == p.label_ids).mean(),
        "f1": metric.compute(predictions=preds, references=p.label_ids, average="macro")["f1"]
    }

In [None]:
# 10. Training config
# Early stopping is enabled, so it will stop automatically if it stops improving.
args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8, # Adjust this based on your GPU memory
    per_device_eval_batch_size=8,  # Adjust this based on your GPU memory
    num_train_epochs=10,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="f1", # We can also use f1 score to select the best model
    report_to="wandb",
    run_name="legal-bert-cuad-context"
)

In [None]:
# 11. Trainer
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

  trainer = Trainer(


In [None]:
# 12. Train
trainer.train()

  return forward_call(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.0798,0.78839,0.774684,0.528229
2,0.6514,0.668297,0.790235,0.628281
3,0.481,0.682774,0.790958,0.644061
4,0.3997,0.719216,0.788427,0.668365
5,0.3844,0.752847,0.78264,0.658992
6,0.348,0.823402,0.77613,0.663802
7,0.3284,0.830047,0.765642,0.668409
8,0.2906,0.850905,0.76528,0.664612
9,0.2612,0.889018,0.760579,0.657742
10,0.2287,0.896487,0.751537,0.651633


  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


TrainOutput(global_step=13830, training_loss=0.480028378368717, metrics={'train_runtime': 11827.2607, 'train_samples_per_second': 9.35, 'train_steps_per_second': 1.169, 'total_flos': 2.910500849719296e+16, 'train_loss': 0.480028378368717, 'epoch': 10.0})

In [None]:
# 💾 13. Save
model.config.id2label = id2label
model.config.label2id = label2id
trainer.save_model("./legal-bert-finetuned")
tokenizer.save_pretrained("./legal-bert-finetuned")


NameError: name 'id2label' is not defined