# Translation

### English to Hindi

In [1]:
from transformers import MarianTokenizer, MarianMTModel
import zipfile
import os

translator_model_English_to_Hindi = "/kaggle/input/english-to-hindi/2000"

tokenizer_English_to_Hindi = MarianTokenizer.from_pretrained(translator_model_English_to_Hindi)
model_English_to_Hindi = MarianMTModel.from_pretrained(translator_model_English_to_Hindi)

print("done")


done


In [9]:
def translate_English_to_Hindi(src_text):
  inputs = tokenizer_English_to_Hindi.prepare_seq2seq_batch(
    src_text,
    return_tensors="pt",
    padding=True,
    truncation=True
  )
  translated = model_English_to_Hindi.generate(**inputs, max_length=128)
  hindi_text = tokenizer_English_to_Hindi.batch_decode(translated, skip_special_tokens=True)
  return hindi_text[0]


In [10]:
from nltk.tokenize import sent_tokenize



# Example paragraph
paragraph = "Hello, how are you? I hope you're doing well. It's nice to meet you."

# Tokenize the paragraph into sentences
sentences = sent_tokenize(paragraph)

# Translate each sentence and combine the result
translated_paragraph = " ".join([translate_English_to_Hindi(sentence) for sentence in sentences])

print(translated_paragraph)


आप कैसे हैं? उम्मीद है कि आप अच्छी तरह से कर रहे हैं। यह आपसे मिलने के लिए अच्छा है।


### Hindi to English

In [12]:
translator_model_Hindi_to_English = "/kaggle/input/hindi-to-english-model/hi-en"

tokenizer_Hindi_to_English = MarianTokenizer.from_pretrained(translator_model_Hindi_to_English)
model_Hindi_to_English = MarianMTModel.from_pretrained(translator_model_Hindi_to_English)

print("done")

done


In [13]:
def translate_Hindi_to_English(src_text):
  inputs = tokenizer_Hindi_to_English.prepare_seq2seq_batch(
    src_text,
    return_tensors="pt",
    padding=True,
    truncation=True
  )
  translated = model_Hindi_to_English.generate(**inputs, max_length=128)
  english_text = tokenizer_Hindi_to_English.batch_decode(translated, skip_special_tokens=True)
  return english_text[0]


In [14]:
from nltk.tokenize import sent_tokenize



# Example paragraph
paragraph = "आप कैसे हैं? उम्मीद है कि आप अच्छी तरह से कर रहे हैं। यह आपसे मिलने के लिए अच्छा है।"

# Tokenize the paragraph into sentences
sentences = sent_tokenize(paragraph)

# Translate each sentence and combine the result
translated_paragraph = " ".join([translate_Hindi_to_English(sentence) for sentence in sentences])

print(translated_paragraph)


How are you? well expected that you are well paying tax. It is good for you to receive it.


# Extractive Summary

In [18]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from nltk.tokenize import sent_tokenize

# Load tokenizer and model (trained to predict similarity score)
tokenizer_extractive = AutoTokenizer.from_pretrained("/kaggle/input/legalbert-extractive-with-model-and-checkpoint/legalbert-extractive/final-model")
model_extractive = AutoModelForSequenceClassification.from_pretrained("/kaggle/input/legalbert-extractive-with-model-and-checkpoint/legalbert-extractive/final-model")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_extractive.to(device)
model_extractive.eval()

MAX_TOKENS = 1024  # Total token limit for the summary

def score_sentences(sentences):
    inputs = tokenizer_extractive(sentences, padding=True, truncation=True, max_length=256, return_tensors="pt")
    inputs = {key: val.to(device) for key, val in inputs.items()}

    with torch.no_grad():
        outputs = model_extractive(**inputs)
        scores = outputs.logits.squeeze().tolist()
    
    if isinstance(scores, float):  # handle the case of single sentence input
        scores = [scores]
        
    return list(zip(sentences, scores))

def generate_extractive_summary_from_model(judgement_text):
    # Step 1: Split judgment into sentences
    sentences = sent_tokenize(judgement_text)

    # Step 2: Score each sentence using the model
    scored_sentences = score_sentences(sentences)

    # Step 3: Sort sentences by score (descending)
    sorted_sentences = sorted(scored_sentences, key=lambda x: x[1], reverse=True)

    # Step 4: Select top sentences within MAX_TOKENS
    selected_sentences = []
    total_tokens = 0

    for sentence, score in sorted_sentences:
        token_count = len(tokenizer_extractive.tokenize(sentence))
        if total_tokens + token_count <= MAX_TOKENS:
            selected_sentences.append(sentence)
            total_tokens += token_count
        else:
            break

    # Step 5: Return the extractive summary
    return " ".join(selected_sentences)

# Example usage:
if __name__ == "__main__":
    with open("/kaggle/input/testing-legal-dataset/dataset/IN-Abs/test-data/judgement/1195.txt", "r", encoding="utf-8") as f:
        judgement_text = f.read()

    extractive_summary = generate_extractive_summary_from_model(judgement_text)
    print("\n--- Extractive Summary ---\n")
    print(extractive_summary)


--- Extractive Summary ---

Learned counsel for the appellant contends that under the Rules the respondents could request the mining authorities to exempt them from the operation of rule 76 of the Indian Coal Mines Regulation, 1946, and if exemption was granted, they could remove the coal left by the appellant in the encroached area. Learned counsel for the appellant contends that the said boundary should be fixed solely with reference to the boundaries given in the lease of 1894, whereas learned counsel for the respondents contends that no plan has been annexed to the said lease and, therefore, the boundary could more satisfactorily and definitely be fixed with reference to the plans annexed to the subsequent lease deeds executed in favour of the successors in interest of the appellant and the respondents. The plaintiffs case was that as a result of a letter written by the Inspector of Mines on August 18, 1941, the plaintiffs made an inquiry and came to know that the defendants had e

# Abstractive Summary

In [19]:
from transformers import PegasusTokenizer, PegasusForConditionalGeneration
import torch

# Load the fine-tuned model and tokenizer
model_path = "/kaggle/input/legal-pegasus-summarizer/legal-pegasus-summarizer"

tokenizer_abstractive = PegasusTokenizer.from_pretrained(model_path)
model_abstractive = PegasusForConditionalGeneration.from_pretrained(model_path).to("cuda" if torch.cuda.is_available() else "cpu")

def summarize(text, max_input_length=1024, max_output_length=256):
    # Tokenize the input text
    inputs = tokenizer_abstractive(
        text,
        max_length=max_input_length,
        truncation=True,
        padding="max_length",
        return_tensors="pt"
    )
    
    input_ids = inputs["input_ids"].to(model_abstractive.device)
    attention_mask = inputs["attention_mask"].to(model_abstractive.device)

    # Generate summary
    summary_ids = model_abstractive.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_length=max_output_length,
        num_beams=5, # Using beam search for better summaries
        early_stopping=True
    )

    # Decode and return summary
    summary = tokenizer_abstractive.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Example usage
if __name__ == "__main__":
    generated_summary = summarize(extractive_summary)
    print("Summary:", generated_summary)


Summary: According to the Indian Mines Act and the Indian Coal Mines Regulation, 1946, the respondents encroached upon the appellant 's coal mines on the northern side and removed coal from the encroached portion and rendered the remaining coal of the encroached portion unworkable. The appellant gave evidence to show that the encroachment was prior to 1932, but there is no acceptable evidence on their part to establish that the respondents came to know of the removal of coal by the appellant or their possession of the coal removed beyond three years prior to the suit. The map, exhibit 3(b), annexed to the lease deed executed in favour of the respondents in interest clearly demarcates the boundary line between the holdings of the appellant and the respondents, and according to that plan, the disputed extent falls within the boundary of the respondents ' holding. In other words, he has to prove that he obtained the knowledge of the defendant 's possession of the property within three yea