In [1]:
!pip install -q --upgrade transformers datasets pandas evaluate rouge_score sentencepiece bert-score accelerate

# Install BLEURT directly from its GitHub repository
!pip install -q git+https://github.com/google-research/bleurt.git

In [None]:
import pandas as pd
import torch
import evaluate
from tqdm.notebook import tqdm
from datasets import Dataset, DatasetDict
from transformers import AutoTokenizer, MT5ForConditionalGeneration

# --- Configuration ---
MODEL_PATH = "mt5-base-cnn-summarizer-en-hi_v3/final_model"
FULL_DATA_PATH = "../Dataset/final_cleaned_dataset_CNN.csv"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

PREFIX_ENG = "summarize English: "
PREFIX_HIN = "summarize Hindi: "

Using device: cuda


In [None]:
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

print(f"Loading model and moving to {DEVICE}...")

model = MT5ForConditionalGeneration.from_pretrained(MODEL_PATH).to(DEVICE)
model.eval()  # Set model to evaluation mode

print("Model loaded successfully.")

Loading tokenizer...


You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers


Loading model and moving to cuda...
Model loaded successfully.


Quantitative Evaluation - Trial 1

In [None]:
# Load the full original dataset
print(f"Loading full dataset from: {FULL_DATA_PATH}")
df = pd.read_csv(FULL_DATA_PATH, engine="python", on_bad_lines="skip")
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)
raw_dataset = Dataset.from_pandas(df)


# Define the same formatting function used in training
def format_dataset(batch):
    inputs, targets = [], []
    for article, eng_summary, hin_summary in zip(
        batch["raw_news_article"], batch["english_summary"], batch["hindi_summary"]
    ):
        if isinstance(article, str):
            inputs.append(PREFIX_ENG + article)
            targets.append(eng_summary)
            inputs.append(PREFIX_HIN + article)
            targets.append(hin_summary)
    return {"inputs": inputs, "targets": targets}


# Process and split the dataset
print("Processing and splitting the dataset...")
processed_dataset = raw_dataset.map(
    format_dataset, batched=True, remove_columns=raw_dataset.column_names
).flatten()

# IMPORTANT: Use the same test_size and seed to get the identical test set
train_test_split = processed_dataset.train_test_split(test_size=0.1, seed=42)
test_dataset = train_test_split["test"]

# For a quick test, let's use a smaller sample. Remove .select() for the full evaluation.
test_sample = test_dataset.select(range(100))

print(f"Recreated test set with {len(test_sample)} samples for evaluation.")

Loading full dataset from: ../Dataset/final_cleaned_dataset_CNN.csv
Processing and splitting the dataset...


Map:   0%|          | 0/4919 [00:00<?, ? examples/s]

Recreated test set with 100 samples for evaluation.


In [7]:
# Load metrics
print("Loading evaluation metrics...")
rouge_metric = evaluate.load("rouge")
bleurt_metric = evaluate.load("bleurt", module_type="metric", checkpoint="BLEURT-20")
bertscore_metric = evaluate.load("bertscore")

# Generate predictions
predictions = []
references = []
print("Generating summaries for the test set...")

for example in tqdm(test_sample):
    inputs = tokenizer(
        example["inputs"], return_tensors="pt", max_length=1024, truncation=True
    ).to(DEVICE)
    summary_ids = model.generate(
        inputs.input_ids, max_length=256, num_beams=4, early_stopping=True
    )
    prediction = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    predictions.append(prediction)
    references.append(example["targets"])

# Separate by language (even indices are English, odd are Hindi)
eng_preds = predictions[::2]
hin_preds = predictions[1::2]
eng_refs = references[::2]
hin_refs = references[1::2]

# Compute and display results
print("\n--- Computing English Metrics ---")
rouge_eng = rouge_metric.compute(predictions=eng_preds, references=eng_refs)
bleurt_eng = bleurt_metric.compute(predictions=eng_preds, references=eng_refs)
bert_eng = bertscore_metric.compute(
    predictions=eng_preds, references=eng_refs, lang="en"
)

print(f"  ROUGE-2: {rouge_eng['rouge2'] * 100:.2f}")
print(f"  BLEURT Score: {sum(bleurt_eng['scores']) / len(bleurt_eng['scores']):.4f}")
print(
    f"  BERTScore Precision: {sum(bert_eng['precision']) / len(bert_eng['precision']):.4f}"
)


print("\n--- Computing Hindi Metrics ---")
rouge_hin = rouge_metric.compute(predictions=hin_preds, references=hin_refs)
bleurt_hin = bleurt_metric.compute(predictions=hin_preds, references=hin_refs)
bert_hin = bertscore_metric.compute(
    predictions=hin_preds, references=hin_refs, lang="hi"
)

print(f"  ROUGE-2: {rouge_hin['rouge2'] * 100:.2f}")
print(f"  BLEURT Score: {sum(bleurt_hin['scores']) / len(bleurt_hin['scores']):.4f}")
print(
    f"  BERTScore Precision: {sum(bert_hin['precision']) / len(bert_hin['precision']):.4f}"
)

Loading evaluation metrics...


Downloading builder script: 0.00B [00:00, ?B/s]

Using default BLEURT-Base checkpoint for sequence maximum length 128. You can use a bigger model for better results with e.g.: evaluate.load('bleurt', 'bleurt-large-512').


Downloading data:   0%|          | 0.00/405M [00:00<?, ?B/s]


INFO:tensorflow:Reading checkpoint C:\Users\admin\.cache\huggingface\metrics\bleurt\default\downloads\extracted\64a145a740562dda9fae1ce4fb71155ccaf922d41c2355bee049709b8590e973\bleurt-base-128.
INFO:tensorflow:Config file found, reading.
INFO:tensorflow:Will load checkpoint bert_custom
INFO:tensorflow:Loads full paths and checks that files exists.
INFO:tensorflow:... name:bert_custom
INFO:tensorflow:... vocab_file:vocab.txt
INFO:tensorflow:... bert_config_file:bert_config.json
INFO:tensorflow:... do_lower_case:True
INFO:tensorflow:... max_seq_length:128
INFO:tensorflow:Creating BLEURT scorer.
INFO:tensorflow:Creating WordPiece tokenizer.

INFO:tensorflow:WordPiece tokenizer instantiated.
INFO:tensorflow:Creating Eager Mode predictor.
INFO:tensorflow:Loading model.
INFO:tensorflow:BLEURT initialized.


INFO:tensorflow:BLEURT initialized.


Downloading builder script: 0.00B [00:00, ?B/s]

Generating summaries for the test set...


  0%|          | 0/100 [00:00<?, ?it/s]


--- Computing English Metrics ---


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  ROUGE-2: 17.94
  BLEURT Score: -0.3045
  BERTScore Precision: 0.8991

--- Computing Hindi Metrics ---


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

  ROUGE-2: 18.13
  BLEURT Score: -0.2792
  BERTScore Precision: 0.7319


Quantitative Evaluation - Trial 2

In [10]:
# Load the full original dataset
print(f"Loading full dataset from: {FULL_DATA_PATH}")
df = pd.read_csv(FULL_DATA_PATH, engine="python", on_bad_lines="skip")
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)
raw_dataset = Dataset.from_pandas(df)


# Define the same formatting function used in training
def format_dataset(batch):
    inputs, targets = [], []
    for article, eng_summary, hin_summary in zip(
        batch["raw_news_article"], batch["english_summary"], batch["hindi_summary"]
    ):
        if isinstance(article, str):
            inputs.append(PREFIX_ENG + article)
            targets.append(eng_summary)
            inputs.append(PREFIX_HIN + article)
            targets.append(hin_summary)
    return {"inputs": inputs, "targets": targets}


# Process and split the dataset
print("Processing and splitting the dataset...")
processed_dataset = raw_dataset.map(
    format_dataset, batched=True, remove_columns=raw_dataset.column_names
).flatten()

# IMPORTANT: Use the same test_size and seed to get the identical test set
train_test_split = processed_dataset.train_test_split(test_size=0.1, seed=42)
test_dataset = train_test_split["test"]

# For a quick test, let's use a smaller sample. Remove .select() for the full evaluation.
test_sample = test_dataset.select(range(100))

print(f"Recreated test set with {len(test_sample)} samples for evaluation.")

Loading full dataset from: ../Dataset/final_cleaned_dataset_CNN.csv
Processing and splitting the dataset...


Map:   0%|          | 0/4919 [00:00<?, ? examples/s]

Recreated test set with 100 samples for evaluation.


In [11]:
# Load metrics
print("Loading evaluation metrics...")
rouge_metric = evaluate.load("rouge")
bleurt_metric = evaluate.load("bleurt", module_type="metric", checkpoint="BLEURT-20")
bertscore_metric = evaluate.load("bertscore")

# Generate predictions
predictions = []
references = []
print("Generating summaries for the test set...")

for example in tqdm(test_sample):
    inputs = tokenizer(
        example["inputs"], return_tensors="pt", max_length=1024, truncation=True
    ).to(DEVICE)
    summary_ids = model.generate(
        inputs.input_ids, max_length=256, num_beams=4, early_stopping=True
    )
    prediction = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    predictions.append(prediction)
    references.append(example["targets"])

# Separate by language (even indices are English, odd are Hindi)
eng_preds = predictions[::2]
hin_preds = predictions[1::2]
eng_refs = references[::2]
hin_refs = references[1::2]

# Compute and display results
print("\n--- Computing English Metrics ---")
rouge_eng = rouge_metric.compute(predictions=eng_preds, references=eng_refs)
bleurt_eng = bleurt_metric.compute(predictions=eng_preds, references=eng_refs)
bert_eng = bertscore_metric.compute(
    predictions=eng_preds, references=eng_refs, lang="en"
)

print(f"  ROUGE-2: {rouge_eng['rouge2'] * 100:.2f}")
print(f"  BLEURT Score: {sum(bleurt_eng['scores']) / len(bleurt_eng['scores']):.4f}")
print(
    f"  BERTScore Precision: {sum(bert_eng['precision']) / len(bert_eng['precision']):.4f}"
)


print("\n--- Computing Hindi Metrics ---")
rouge_hin = rouge_metric.compute(predictions=hin_preds, references=hin_refs)
bleurt_hin = bleurt_metric.compute(predictions=hin_preds, references=hin_refs)
bert_hin = bertscore_metric.compute(
    predictions=hin_preds, references=hin_refs, lang="hi"
)

print(f"  ROUGE-2: {rouge_hin['rouge2'] * 100:.2f}")
print(f"  BLEURT Score: {sum(bleurt_hin['scores']) / len(bleurt_hin['scores']):.4f}")
print(
    f"  BERTScore Precision: {sum(bert_hin['precision']) / len(bert_hin['precision']):.4f}"
)

Loading evaluation metrics...




INFO:tensorflow:Reading checkpoint C:\Users\admin\.cache\huggingface\metrics\bleurt\default\downloads\extracted\64a145a740562dda9fae1ce4fb71155ccaf922d41c2355bee049709b8590e973\bleurt-base-128.


INFO:tensorflow:Reading checkpoint C:\Users\admin\.cache\huggingface\metrics\bleurt\default\downloads\extracted\64a145a740562dda9fae1ce4fb71155ccaf922d41c2355bee049709b8590e973\bleurt-base-128.


INFO:tensorflow:Config file found, reading.


INFO:tensorflow:Config file found, reading.


INFO:tensorflow:Will load checkpoint bert_custom


INFO:tensorflow:Will load checkpoint bert_custom


INFO:tensorflow:Loads full paths and checks that files exists.


INFO:tensorflow:Loads full paths and checks that files exists.


INFO:tensorflow:... name:bert_custom


INFO:tensorflow:... name:bert_custom


INFO:tensorflow:... vocab_file:vocab.txt


INFO:tensorflow:... vocab_file:vocab.txt


INFO:tensorflow:... bert_config_file:bert_config.json


INFO:tensorflow:... bert_config_file:bert_config.json


INFO:tensorflow:... do_lower_case:True


INFO:tensorflow:... do_lower_case:True


INFO:tensorflow:... max_seq_length:128


INFO:tensorflow:... max_seq_length:128


INFO:tensorflow:Creating BLEURT scorer.


INFO:tensorflow:Creating BLEURT scorer.


INFO:tensorflow:Creating WordPiece tokenizer.


INFO:tensorflow:Creating WordPiece tokenizer.


INFO:tensorflow:WordPiece tokenizer instantiated.


INFO:tensorflow:WordPiece tokenizer instantiated.


INFO:tensorflow:Creating Eager Mode predictor.


INFO:tensorflow:Creating Eager Mode predictor.


INFO:tensorflow:Loading model.


INFO:tensorflow:Loading model.


INFO:tensorflow:BLEURT initialized.


INFO:tensorflow:BLEURT initialized.


Generating summaries for the test set...


  0%|          | 0/100 [00:00<?, ?it/s]


--- Computing English Metrics ---


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  ROUGE-2: 17.94
  BLEURT Score: -0.3045
  BERTScore Precision: 0.8991

--- Computing Hindi Metrics ---
  ROUGE-2: 18.13
  BLEURT Score: -0.2792
  BERTScore Precision: 0.7319


Interactive Summarization

In [8]:
def summarize_article(article_text):
    """
    Generates and prints English and Hindi summaries for a given article text.
    """
    print("=" * 50)
    print("               SOURCE ARTICLE")
    print("=" * 50)
    print(article_text)

    # --- Generate English Summary ---
    english_input = PREFIX_ENG + article_text
    eng_inputs = tokenizer(
        english_input, return_tensors="pt", max_length=1024, truncation=True
    ).to(DEVICE)
    eng_summary_ids = model.generate(
        eng_inputs.input_ids, max_length=150, num_beams=5, early_stopping=True
    )
    english_summary = tokenizer.decode(eng_summary_ids[0], skip_special_tokens=True)

    print("\n" + "=" * 50)
    print("               ENGLISH SUMMARY")
    print("=" * 50)
    print(english_summary)

    # --- Generate Hindi Summary ---
    hindi_input = PREFIX_HIN + article_text
    hin_inputs = tokenizer(
        hindi_input, return_tensors="pt", max_length=1024, truncation=True
    ).to(DEVICE)
    hin_summary_ids = model.generate(
        hin_inputs.input_ids, max_length=200, num_beams=5, early_stopping=True
    )
    hindi_summary = tokenizer.decode(hin_summary_ids[0], skip_special_tokens=True)

    print("\n" + "=" * 50)
    print("                 HINDI SUMMARY")
    print("=" * 50)
    print(hindi_summary)
    print("\n" + "=" * 50)

In [9]:
# Paste any news article here to test the model
article_to_test = """
India's Chandrayaan-3 mission has successfully soft-landed on the lunar surface, making it the fourth country to achieve this feat. The Vikram lander touched down near the Moon's south pole, an unexplored region believed to contain water ice. The successful landing is a historic moment for India's space program, demonstrating advanced capabilities in landing technology. The Pragyan rover will now descend from the lander to explore the lunar terrain and conduct scientific experiments for one lunar day, which is equivalent to 14 Earth days. The mission aims to study the Moon's geology and the potential for a sustained human presence.
"""

summarize_article(article_to_test)

               SOURCE ARTICLE

India's Chandrayaan-3 mission has successfully soft-landed on the lunar surface, making it the fourth country to achieve this feat. The Vikram lander touched down near the Moon's south pole, an unexplored region believed to contain water ice. The successful landing is a historic moment for India's space program, demonstrating advanced capabilities in landing technology. The Pragyan rover will now descend from the lander to explore the lunar terrain and conduct scientific experiments for one lunar day, which is equivalent to 14 Earth days. The mission aims to study the Moon's geology and the potential for a sustained human presence.


               ENGLISH SUMMARY
Maharashtra's Chandrayaan-3 mission has successfully soft-landed on the lunar surface, making it the fourth country to achieve this feat. The Vikram lander touched down near the Moon's south pole, an unexplored region believed to contain water ice. The mission aims to study the Moon's geology an

In [14]:
def generate_high_quality_summary(article_text):
    """
    Takes a news article string and prints high-quality English and Hindi summaries.
    """
    PREFIX_ENG = "summarize English: "
    PREFIX_HIN = "summarize Hindi: "

    # --- Generation Hyperparameters ---
    NUM_BEAMS = 8
    LENGTH_PENALTY = 2.0
    NO_REPEAT_NGRAM_SIZE = 3
    MIN_SUMMARY_LENGTH = 50
    MAX_SUMMARY_LENGTH = 256
    
    # --- Print Source Article ---
    print("="*80)
    print("SOURCE ARTICLE:")
    print("="*80)
    print(article_text)

    # --- Generate English Summary ---
    eng_input_text = PREFIX_ENG + article_text
    eng_inputs = tokenizer(eng_input_text, return_tensors="pt", max_length=1024, truncation=True).to(DEVICE)
    
    # eng_summary_ids = model.generate(
    #     eng_inputs.input_ids,
    #     num_beams=NUM_BEAMS,
    #     max_length=MAX_SUMMARY_LENGTH,
    #     min_length=MIN_SUMMARY_LENGTH,
    #     length_penalty=LENGTH_PENALTY,
    #     no_repeat_ngram_size=NO_REPEAT_NGRAM_SIZE,
    #     early_stopping=True
    # )
    # english_summary = tokenizer.decode(eng_summary_ids[0], skip_special_tokens=True)
    
    # print("\n" + "="*80)
    # print("GENERATED ENGLISH SUMMARY:")
    # print("="*80)
    # print(english_summary)

    # --- Generate Hindi Summary ---
    hin_input_text = PREFIX_HIN + article_text
    hin_inputs = tokenizer(hin_input_text, return_tensors="pt", max_length=1024, truncation=True).to(DEVICE)

    hin_summary_ids = model.generate(
        hin_inputs.input_ids,
        num_beams=NUM_BEAMS,
        max_length=MAX_SUMMARY_LENGTH,
        min_length=MIN_SUMMARY_LENGTH,
        length_penalty=LENGTH_PENALTY,
        no_repeat_ngram_size=NO_REPEAT_NGRAM_SIZE,
        early_stopping=True
    )
    hindi_summary = tokenizer.decode(hin_summary_ids[0], skip_special_tokens=True)
    
    print("\n" + "="*80)
    print("GENERATED HINDI SUMMARY:")
    print("="*80)
    print(hindi_summary)
    print("\n" + "="*80)

In [15]:
article_to_test = """
India secured a decisive victory over Australia in the final match of the T20 series, winning by a margin of 35 runs in Bengaluru. Batting first, India posted a competitive total of 198 for 4, thanks to a powerful half-century from captain Suryakumar Yadav, who scored 78 off just 45 balls. In response, Australia's chase faltered early as they lost key wickets to India's fast bowlers.
"""

generate_high_quality_summary(article_to_test)

SOURCE ARTICLE:

India secured a decisive victory over Australia in the final match of the T20 series, winning by a margin of 35 runs in Bengaluru. Batting first, India posted a competitive total of 198 for 4, thanks to a powerful half-century from captain Suryakumar Yadav, who scored 78 off just 45 balls. In response, Australia's chase faltered early as they lost key wickets to India's fast bowlers.


GENERATED HINDI SUMMARY:
इंडिया ने ऑस्ट्रेलिया के खिलाफ ट20 सीज़न के फाइनल मैच में एक महत्वपूर्ण जीत हासिल की, जिसमें भारत ने 35 रन की बढ़त बनाए रखी। यह जीत इंडिया के कप्तान सुंदरकुमार यदव के नेतृत्व में हुई, जिन्होंने 78 से 45 बॉक्सों में 78 रन बनाए। इस जीत के बावजूद, आस्ट्रेलिया ने पहले ही एक प्रभावशाली हाफ-स्ट्राइकर के साथ शानदार वापसी की। बाद में, भारत ने मुख्य विकेटों को गंवाने के लिए कई विकेट हासिल किए, जबकि अन्य विकेट उनके पास थे। इस हार के बाद, इंडिया का प्रदर्शन तेज़ हो गया, और इंडिया की आक्रामक बल्लेबाजी कमजोर हुई।



In [None]:
article_to_test = """
A landmark international treaty to combat plastic pollution has been agreed upon by delegates from over 170 countries at a United Nations Environment Assembly session held in Nairobi. Hailed as the most significant environmental pact since the Paris Agreement, the resolution establishes an Intergovernmental Negotiating Committee (INC) tasked with drafting a legally binding agreement by the end of 2026. The future treaty aims to address the full lifecycle of plastic, from its production and design to its disposal and recycling. The negotiations were complex, with debates centering on whether the treaty should focus solely on plastic waste management or include caps on virgin plastic production. Major plastic-producing nations and fossil fuel companies had advocated for a focus on recycling, while a coalition of environmental groups and many developing nations pushed for stricter controls on production itself. The final resolution provides a broad mandate for the INC to consider all options.
"""

generate_high_quality_summary(article_to_test)

SOURCE ARTICLE:

A landmark international treaty to combat plastic pollution has been agreed upon by delegates from over 170 countries at a United Nations Environment Assembly session held in Nairobi. Hailed as the most significant environmental pact since the Paris Agreement, the resolution establishes an Intergovernmental Negotiating Committee (INC) tasked with drafting a legally binding agreement by the end of 2026. The future treaty aims to address the full lifecycle of plastic, from its production and design to its disposal and recycling. The negotiations were complex, with debates centering on whether the treaty should focus solely on plastic waste management or include caps on virgin plastic production. Major plastic-producing nations and fossil fuel companies had advocated for a focus on recycling, while a coalition of environmental groups and many developing nations pushed for stricter controls on production itself. The final resolution provides a broad mandate for the INC to 

In [18]:
article_to_test = """
India's Chandrayaan-3 mission has successfully soft-landed on the lunar surface, making it the fourth country to achieve this feat. The Vikram lander touched down near the Moon's south pole, an unexplored region believed to contain water ice. The successful landing is a historic moment for India's space program, demonstrating advanced capabilities in landing technology. The Pragyan rover will now descend from the lander to explore the lunar terrain and conduct scientific experiments for one lunar day, which is equivalent to 14 Earth days. The mission aims to study the Moon's geology and the potential for a sustained human presence.
"""

generate_high_quality_summary(article_to_test)

SOURCE ARTICLE:

India's Chandrayaan-3 mission has successfully soft-landed on the lunar surface, making it the fourth country to achieve this feat. The Vikram lander touched down near the Moon's south pole, an unexplored region believed to contain water ice. The successful landing is a historic moment for India's space program, demonstrating advanced capabilities in landing technology. The Pragyan rover will now descend from the lander to explore the lunar terrain and conduct scientific experiments for one lunar day, which is equivalent to 14 Earth days. The mission aims to study the Moon's geology and the potential for a sustained human presence.


GENERATED HINDI SUMMARY:
इंडिया के चंद्रयान-3 मिशन ने लौटने की तैयारी की है, जिससे यह भारत के चौथे राष्ट्र बन गया है। यह मिशन एक शानदार क्षण है, जिसका उद्देश्य मौन के दक्षिण पोल के पास पानी की ऊर्जा का अध्ययन करना है। मिशन का लक्ष्य मानव समुदाय के लिए एक महत्वपूर्ण भूमिका निभाना है, जो एक lunar सप्ताह की तुलना में 14 ईर्घ दिनों के रूप में 

In [21]:
article_to_test = """
The Indian Space Research Organisation (ISRO) has successfully completed a critical test for its ambitious Gaganyaan mission, which aims to send Indian astronauts to space. The test involved the final integrated validation of the crew module's parachute system at a facility in Chandigarh. The parachutes are essential for ensuring the safe return and landing of the crew module. Officials confirmed that the system performed flawlessly under simulated flight conditions. This milestone moves India one step closer to launching its first crewed spaceflight, which is currently scheduled for late 2025. The Gaganyaan programme is a top priority for the nation's space agency, marking its entry into human space exploration.
"""

generate_high_quality_summary(article_to_test)

SOURCE ARTICLE:

The Indian Space Research Organisation (ISRO) has successfully completed a critical test for its ambitious Gaganyaan mission, which aims to send Indian astronauts to space. The test involved the final integrated validation of the crew module's parachute system at a facility in Chandigarh. The parachutes are essential for ensuring the safe return and landing of the crew module. Officials confirmed that the system performed flawlessly under simulated flight conditions. This milestone moves India one step closer to launching its first crewed spaceflight, which is currently scheduled for late 2025. The Gaganyaan programme is a top priority for the nation's space agency, marking its entry into human space exploration.


GENERATED HINDI SUMMARY:
The Indian Space Research Organisation (ISRO) ने अपने नए गगनयान मिशन के लिए एक महत्वपूर्ण परीक्षण किया है, जिसका उद्देश्य इंडोनेशियाई एथलीटों को अंतरिक्ष में लौटने की तैयारी करना है। उन्होंने कैंडीज में एक प्रशिक्षण केंद्र में तैयार 

In [22]:
article_to_test = """
Google has announced a significant upgrade to its core AI model, Gemini. The new version, named Gemini 1.5 Pro, is designed to handle a much larger amount of information at once. The company claims it can process up to 1 million tokens, which is equivalent to an entire feature-length movie or over 700,000 words of text. This massive context window allows the model to understand and reason about very large documents, codebases, or hours of video content without forgetting earlier details. The new model is initially being made available to developers and enterprise customers through Google's AI Studio and Vertex AI platforms. This development is seen as a major step in the competition against other leading AI models like OpenAI's GPT-4."""

generate_high_quality_summary(article_to_test)

SOURCE ARTICLE:

Google has announced a significant upgrade to its core AI model, Gemini. The new version, named Gemini 1.5 Pro, is designed to handle a much larger amount of information at once. The company claims it can process up to 1 million tokens, which is equivalent to an entire feature-length movie or over 700,000 words of text. This massive context window allows the model to understand and reason about very large documents, codebases, or hours of video content without forgetting earlier details. The new model is initially being made available to developers and enterprise customers through Google's AI Studio and Vertex AI platforms. This development is seen as a major step in the competition against other leading AI models like OpenAI's GPT-4.

GENERATED HINDI SUMMARY:
मैनचेस्टर गूगल ने अपनी प्रमुख AI मशीन, Gemini 1.5 Pro, को एक महत्वपूर्ण वृद्धि की घोषणा की है। इस नए संस्करण, जिसमें 1 मिलियन tokens शामिल हैं, यह एक पूर्ण समूह-लंबी वीडियो या 700,000 शब्दों के वीडियो के रूप में 

In [24]:
article_to_test = """TThe Reserve Bank of India (RBI) has announced that it will keep the repo rate unchanged at 6.5% for the eighth consecutive time. The decision was made by the Monetary Policy Committee (MPC) following its recent three-day meeting. RBI Governor Shaktikanta Das stated that the committee is focused on ensuring inflation aligns with the target of 4% while supporting economic growth. The central bank also retained its GDP growth forecast for the current fiscal year at 7.2%. The decision was widely expected by economists, who believe that a stable policy rate is necessary to manage potential food price inflation and global economic uncertainties before considering any rate cuts later in the year."""
generate_high_quality_summary(article_to_test)

SOURCE ARTICLE:
TThe Reserve Bank of India (RBI) has announced that it will keep the repo rate unchanged at 6.5% for the eighth consecutive time. The decision was made by the Monetary Policy Committee (MPC) following its recent three-day meeting. RBI Governor Shaktikanta Das stated that the committee is focused on ensuring inflation aligns with the target of 4% while supporting economic growth. The central bank also retained its GDP growth forecast for the current fiscal year at 7.2%. The decision was widely expected by economists, who believe that a stable policy rate is necessary to manage potential food price inflation and global economic uncertainties before considering any rate cuts later in the year.

GENERATED HINDI SUMMARY:
इंडिया के रिजर्व बैंक ऑफ इंडिया (RBI) ने घोषणा की है कि वह अपनी रिपोर्ट रेट 6.5% से कम करेंगे। यह निर्णय MPC (MPC) के माध्यम से किया गया था, जिसका उद्देश्य अर्थव्यवस्था की वृद्धि को बढ़ावा देना है। बैंक ने अपनी GDP growth forecast पर 7.2% का लक्ष्य रखा है, ज