In [7]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.metrics import classification_report, accuracy_score
from tqdm import tqdm

In [8]:
MODEL_PATH = r"D:\AI\Projects\Contract_NLP\legalbert_cuad_paragraph"
DATA_DIR = r"D:\AI\Projects\Contract_NLP\CUAD_v1\processed"
TEST_CSV = os.path.join(DATA_DIR, "test_clauses.csv")

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 8
MAX_LEN = 512



In [9]:
tokenizer = BertTokenizer.from_pretrained(MODEL_PATH)
model = BertForSequenceClassification.from_pretrained(MODEL_PATH)
model.to(DEVICE)
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [12]:
class CUADDataset(Dataset):
    def __init__(self, df, tokenizer, max_len=512):
        self.texts = df["paragraph"].tolist()
        self.labels = df["label"].tolist()
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt",
        )
        item = {key: val.squeeze(0) for key, val in encoding.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item



In [13]:
test_df = pd.read_csv(TEST_CSV)
print(f"✅ Loaded Test Set: {len(test_df)} samples")
print(test_df.head())

test_dataset = CUADDataset(test_df, tokenizer, max_len=MAX_LEN)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)



✅ Loaded Test Set: 2201 samples
                                           paragraph  \
0  CORPORATE SPONSORSHIP AGREEMENT\n\nThis agreem...   
1  CORPORATE SPONSORSHIP AGREEMENT\n\nThis agreem...   
2  CORPORATE SPONSORSHIP AGREEMENT\n\nThis agreem...   
3  CORPORATE SPONSORSHIP AGREEMENT\n\nThis agreem...   
4  CORPORATE SPONSORSHIP AGREEMENT\n\nThis agreem...   

                            clause          label  
0  CORPORATE SPONSORSHIP AGREEMENT  Document Name  
1         Phoenix Performance, LLC        Parties  
2                           Vendor        Parties  
3                           Torvec        Parties  
4                      Torvec Inc.        Parties  


In [14]:
def evaluate(model, dataloader):
    preds, labels = [], []
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            batch = {k: v.to(DEVICE) for k, v in batch.items()}
            outputs = model(**batch)
            logits = outputs.logits
            batch_preds = torch.argmax(logits, dim=-1)
            preds.extend(batch_preds.cpu().numpy())
            labels.extend(batch["labels"].cpu().numpy())
    return labels, preds




In [16]:
y_true, y_pred = evaluate(model, test_loader)
print("\n📊 Evaluation Report:")
print(classification_report(y_true, y_pred, digits=4))
print(f"✅ Accuracy: {accuracy_score(y_true, y_pred):.4f}")

Evaluating:   0%|          | 0/276 [00:00<?, ?it/s]


TypeError: new(): invalid data type 'str'