<a href="https://colab.research.google.com/github/OneFineStarstuff/OneFineStarstuff/blob/main/Using_a_pre_trained_model_like_BERT_for_analyzing_legal_text.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch
import numpy as np

def get_contract_text():
    # Simulated function to fetch legal contract text
    # In a real implementation, this would involve reading from an actual document
    return ["This contract is binding and must be adhered to by both parties.", "In case of disputes, arbitration will be sought."]

def analyze_legal_text(text):
    # Load pre-trained BERT model and tokenizer
    model_name = "nlpaueb/legal-bert-small-uncased"
    tokenizer = BertTokenizer.from_pretrained(model_name)
    model = BertForSequenceClassification.from_pretrained(model_name)

    # Tokenize and encode the text
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)

    # Get model predictions
    with torch.no_grad():
        outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)

    # Convert predictions to readable labels (this part is hypothetical)
    # Assume label 0: 'neutral', 1: 'positive', 2: 'negative'
    labels = np.argmax(predictions.numpy(), axis=1)
    label_map = {0: 'neutral', 1: 'positive', 2: 'negative'}
    analysis_results = [label_map[label] for label in labels]

    return analysis_results

# Legal document analysis using a hybrid model
legal_text = get_contract_text()
analysis = analyze_legal_text(legal_text)
print("Legal Document Analysis:", analysis)