In [1]:
import json, csv
from transformers import AutoTokenizer, LongformerForSequenceClassification
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import LongformerTokenizer, LongformerForSequenceClassification, AdamW
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm

  torch.utils._pytree._register_pytree_node(
  torch.utils._pytree._register_pytree_node(


In [2]:
# 1. Load and prepare data
class ContractNLIDataset(Dataset):
    def __init__(self, texts, hypotheses, labels, tokenizer, max_length=4096):
        self.tokenizer = tokenizer
        self.texts = texts
        self.hypotheses = hypotheses
        self.labels = labels
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        hypothesis = str(self.hypotheses[idx])
        
        # Combine text and hypothesis with separator
        combined_text = text + " </s> " + hypothesis

        # Tokenize
        encoding = self.tokenizer(
            combined_text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(self.labels[idx], dtype=torch.long)
        }
    
def prepare_data(json_path):
    # Read JSON file
    with open(json_path, 'r') as f:
        data = json.load(f)
    
    # Convert JSON to DataFrame
    df = pd.DataFrame(data)
    
    # Convert verdicts to numerical labels
    label_encoder = LabelEncoder()
    labels = label_encoder.fit_transform(df['label'])
    
    # Select texts, hypotheses, and labels for the specified dataset type
    texts = df['document'].values
    hypotheses = df['hypothesis'].values
    
    return (texts, hypotheses, labels), label_encoder


In [3]:
# Initialize tokenizer and model
tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
model = LongformerForSequenceClassification.from_pretrained(
    'allenai/longformer-base-4096',
    num_labels=3  # Neutral, Entailment, Contradiction
)

# Load and prepare data (assuming same data preparation as before)
(train_texts, train_hypotheses, train_labels), \
label_encoder = prepare_data('train.json')

(test_texts, test_hypotheses, test_labels), _ = prepare_data('test.json')

(val_texts, val_hypotheses, val_labels), _ = prepare_data('dev.json')



# Create datasets
train_dataset = ContractNLIDataset(
    train_texts, train_hypotheses, train_labels, tokenizer
)

test_dataset = ContractNLIDataset(
    test_texts, test_hypotheses, test_labels, tokenizer
)

val_dataset = ContractNLIDataset(
    val_texts, val_hypotheses, val_labels, tokenizer
)

# Create dataloaders - Note: Smaller batch size due to longer sequences
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1)
val_loader = DataLoader(val_dataset, batch_size=1)

Some weights of LongformerForSequenceClassification were not initialized from the model checkpoint at allenai/longformer-base-4096 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:

def train_model():
    # Training setup
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    num_epochs = 3
    best_val_loss = float('inf')

    # Training loop
    scaler = GradScaler()
    for epoch in range(num_epochs):
        torch.save(model.state_dict(), 'Longformer.pth')
        model.train()
        train_loss = 0
        
        # Add tqdm progress bar for training
        train_pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Train]')
        for batch in train_pbar:
            with autocast():
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                outputs = model(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    labels=labels
                )

                loss = outputs.loss
                train_loss += loss.item()

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            
            # Update progress bar with current loss
            train_pbar.set_postfix({'loss': f'{loss.item():.4f}'})

        avg_train_loss = train_loss / len(train_loader)
            
        # Validation
        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        
        # Add tqdm progress bar for validation
        val_pbar = tqdm(val_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Valid]')
        with torch.no_grad():
            for batch in val_pbar:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                outputs = model(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    labels=labels
                )

                val_loss += outputs.loss.item()
                predictions = torch.argmax(outputs.logits, dim=1)
                correct += (predictions == labels).sum().item()
                total += labels.size(0)
                
                # Update validation progress bar
                val_pbar.set_postfix({
                    'loss': f'{outputs.loss.item():.4f}',
                    'acc': f'{100*correct/total:.2f}%'
                })

        avg_val_loss = val_loss / len(val_loader)
        accuracy = 100 * correct / total

        if(avg_val_loss < best_val_loss):
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), 'Longformer.pth')

        print(f'\nEpoch {epoch+1} Summary:')
        print(f'Training Loss: {avg_train_loss:.4f}')
        print(f'Validation Loss: {avg_val_loss:.4f}')
        print(f'Validation Accuracy: {accuracy:.2f}%\n')

In [5]:
train_model()

Epoch 1/3 [Train]:   0%|          | 3/7191 [00:04<3:02:12,  1.52s/it, loss=1.0488]


KeyboardInterrupt: 

In [6]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

def evaluate_model(model, test_loader, device, label_encoder):
    model.eval()
    all_predictions = []
    all_labels = []
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc='Evaluating'):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask
            )

            predictions = torch.argmax(outputs.logits, dim=1)
            
            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Convert predictions and labels back to original classes
    pred_classes = label_encoder.inverse_transform(all_predictions)
    true_classes = label_encoder.inverse_transform(all_labels)

    # Calculate metrics
    precision, recall, f1, _ = precision_recall_fscore_support(
        true_classes, 
        pred_classes, 
        average=None,
        labels=label_encoder.classes_
    )
    
    # Calculate macro averages
    macro_precision, macro_recall, macro_f1, _ = precision_recall_fscore_support(
        true_classes, 
        pred_classes, 
        average='macro'
    )
    
    # Calculate accuracy
    accuracy = accuracy_score(true_classes, pred_classes)

    return {
        'accuracy': accuracy,
        'macro_precision': macro_precision,
        'macro_recall': macro_recall,
        'macro_f1': macro_f1,
        'class_metrics': {
            'classes': label_encoder.classes_,
            'precision': precision,
            'recall': recall,
            'f1': f1
        }
    }

In [7]:
 # Load the trained model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LongformerForSequenceClassification.from_pretrained(
    'allenai/longformer-base-4096',
    num_labels=3
)

model.load_state_dict(torch.load('Longformer.pth'))
model.to(device)

# Evaluate the model
results = evaluate_model(model, test_loader, device, label_encoder)

# Save the results to a JSON file

with open('results.json', 'w') as f:
    json.dump(results, f, indent=4)

Some weights of LongformerForSequenceClassification were not initialized from the model checkpoint at allenai/longformer-base-4096 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Evaluating:   0%|          | 7/2091 [00:16<1:23:32,  2.41s/it]


KeyboardInterrupt: 