In [1]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification, BartTokenizer, BartForConditionalGeneration
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Load tokenizer and models
# 1. RoBERTa for classification
roberta_tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
roberta_model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)

# 2. LegalBERT for classification (better suited for legal texts)
legalbert_tokenizer = AutoTokenizer.from_pretrained('nlpaueb/legal-bert-base-uncased')
legalbert_model = AutoModelForSequenceClassification.from_pretrained('nlpaueb/legal-bert-base-uncased', num_labels=2)

# 3. BART for summarization
bart_tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
bart_model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')

print("Models and tokenizers loaded successfully!")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/222k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/legal-bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Models and tokenizers loaded successfully!


In [16]:
# Load the legal text file
with open('/legal_document2.txt', 'r') as f:
    legal_text2 = f.read()

print(legal_text2[:])

with open('/legal_document.txt', 'r') as f:
    legal_text = f.read()

print( legal_text[:])

In the case of Johnson v. The City of Redwood, the plaintiff, Ms. Emily Johnson, filed a civil lawsuit against the city, alleging negligence on the part of the city’s maintenance department. Ms. Johnson sustained serious injuries after tripping and falling on an uneven sidewalk in a residential area. She claimed that the city failed to properly maintain the sidewalks, leading to hazardous conditions that caused her fall.

The defendant, the City of Redwood, argued that they were unaware of the specific sidewalk defect and that they cannot be held liable for every potential hazard within city limits. They contended that they had a reasonable inspection schedule in place and that the defect had not been reported before Ms. Johnson’s accident.

The plaintiff, however, presented evidence showing that several residents had previously reported the uneven sidewalk to the city on multiple occasions over a period of six months, but no action was taken to repair it. Additionally, expert testimon

In [33]:
# Summarization using BART
inputs = bart_tokenizer([legal_text], max_length=1024, return_tensors='pt', truncation=True)
summary_ids = bart_model.generate(inputs['input_ids'], max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)

# Decode and print summary
summary = bart_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print("Summary:", summary)

inputs2 = bart_tokenizer([legal_text2], max_length=1024, return_tensors='pt', truncation=True)
summary_ids2 = bart_model.generate(inputs2['input_ids'], max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)

# Decode and print summary
summary2 = bart_tokenizer.decode(summary_ids2[0], skip_special_tokens=True)
print("\n Summary:", summary2)

Summary: The court ruled in favor of the defendant, Omega Pharmaceuticals, and dismissed the plaintiff's claims. The court found that the plaintiff had failed to prove that the product was unreasonably dangerous or that the defendant breached their duty.

 Summary: Emily Johnson sustained serious injuries after tripping and falling on an uneven sidewalk in a residential area. She claimed that the city failed to properly maintain the sidewalks, leading to hazardous conditions that caused her fall. The defendant, the City of Redwood, argued that they were unaware of the specific sidewalk defect.


In [34]:
# Tokenize the text for RoBERTa
inputs = roberta_tokenizer(legal_text, return_tensors="pt", padding=True, truncation=True, max_length=512)

# Perform classification
outputs = roberta_model(**inputs)
logits = outputs.logits
predicted_class = logits.argmax(dim=-1).item()

# Display the result (0 for against, 1 for in favor)
roberta_label = "In Favor of Plaintiff" if predicted_class == 1 else "In Favor of Defendant"
print("RoBERTa Prediction:", roberta_label)



RoBERTa Prediction: In Favor of Plaintiff


In [35]:
inputs2 = roberta_tokenizer(legal_text2, return_tensors="pt", padding=True, truncation=True, max_length=512)

# Perform classification
outputs2 = roberta_model(**inputs2)  # Use inputs2 here
logits2 = outputs2.logits           # Use outputs2 here
predicted_class2 = logits2.argmax(dim=-1).item()

# Display the result (0 for against, 1 for in favor)
roberta_label2 = "In Favor of Defendant" if predicted_class2 == 0 else "In Favor of Plaintiff"
print("RoBERTa Prediction :", roberta_label2)

RoBERTa Prediction : In Favor of Defendant


In [36]:
# Tokenize the text for LegalBERT
inputs = legalbert_tokenizer(legal_text, return_tensors="pt", padding=True, truncation=True, max_length=512)

# Perform classification
outputs = legalbert_model(**inputs)
logits = outputs.logits
predicted_class = logits.argmax(dim=-1).item()

# Display the result (0 for against, 1 for in favor)
legalbert_label = "In Favor of Plaintiff" if predicted_class == 1 else "In Favor of Defendant"
print("LegalBERT Prediction:", legalbert_label)



LegalBERT Prediction: In Favor of Defendant


In [37]:
# Tokenize the text for LegalBERT
inputs2 = legalbert_tokenizer(legal_text2, return_tensors="pt", padding=True, truncation=True, max_length=512)

# Perform classification
outputs2 = legalbert_model(**inputs2)
logits2 = outputs2.logits
predicted_class2 = logits2.argmax(dim=-1).item()

# Display the result (0 for against, 1 for in favor)
legalbert_label2 = "In Favor of Defendant" if predicted_class2 == 0 else "In Favor of Plaintiff"
print("LegalBERT Prediction:", legalbert_label2)

LegalBERT Prediction: In Favor of Plaintiff


In [38]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification

# Load DistilBERT for classification
distilbert_tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
distilbert_model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)

# Tokenize the text for DistilBERT
inputs = distilbert_tokenizer(legal_text, return_tensors="pt", padding=True, truncation=True, max_length=512)

# Perform classification
outputs = distilbert_model(**inputs)
logits = outputs.logits
predicted_class = logits.argmax(dim=-1).item()

# Display the result (0 for against, 1 for in favor)
distilbert_label = "In Favor of Plaintiff" if predicted_class == 1 else "In Favor of Defendant"
print("DistilBERT Prediction:", distilbert_label)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DistilBERT Prediction: In Favor of Defendant


In [39]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification

# Load DistilBERT for classification
distilbert_tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
distilbert_model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)

# Tokenize the text for DistilBERT
inputs2 = distilbert_tokenizer(legal_text2, return_tensors="pt", padding=True, truncation=True, max_length=512)

# Perform classification
outputs2 = distilbert_model(**inputs2)
logits2 = outputs2.logits
predicted_class2 = logits2.argmax(dim=-1).item()

# Display the result (0 for against, 1 for in favor)
distilbert_label2 = "In Favor of Defendant" if predicted_class2 == 0 else "In Favor of Plaintiff"
print("DistilBERT Prediction for legal_text2:", distilbert_label2)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DistilBERT Prediction for legal_text2: In Favor of Defendant
