In [1]:
!pip install transformers



In [2]:
# Load the welfare scheme write-up from a text file
file_path = "AAY.txt"  # Ensure the file is in the same directory

with open(file_path, "r") as file:
    write_up = file.read()

print("Welfare scheme write-up loaded successfully.")


Welfare scheme write-up loaded successfully.


In [None]:
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
import torch
from tqdm import tqdm  # Import tqdm for progress bar

# Load the Pegasus tokenizer and model with tqdm progress bar
model_name = "google/pegasus-xsum"

# Use tqdm to show progress for loading the tokenizer
with tqdm(total=1, desc="Loading Tokenizer", unit="model") as pbar:
    tokenizer = PegasusTokenizer.from_pretrained(model_name)
    pbar.update(1)  # Update progress

# Use tqdm to show progress for loading the model
with tqdm(total=1, desc="Loading Model", unit="model") as pbar:
    model = PegasusForConditionalGeneration.from_pretrained(model_name)
    pbar.update(1)  # Update progress

  from .autonotebook import tqdm as notebook_tqdm
Loading Tokenizer: 100%|██████████| 1/1 [00:01<00:00,  1.04s/model]
Loading Model:   0%|          | 0/1 [00:00<?, ?model/s]Error while downloading from https://cdn-lfs.hf.co/google/pegasus-xsum/f19169fbf6d5bf3b3c713cb933e40a5fa22ffdb6e0d7628309e2deabc5978e59?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27pytorch_model.bin%3B+filename%3D%22pytorch_model.bin%22%3B&response-content-type=application%2Foctet-stream&Expires=1731409095&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTczMTQwOTA5NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9nb29nbGUvcGVnYXN1cy14c3VtL2YxOTE2OWZiZjZkNWJmM2IzYzcxM2NiOTMzZTQwYTVmYTIyZmZkYjZlMGQ3NjI4MzA5ZTJkZWFiYzU5NzhlNTk%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qJnJlc3BvbnNlLWNvbnRlbnQtdHlwZT0qIn1dfQ__&Signature=b-sXwOKjmJLolQcwX4KI-WJ3Acx2KncVjL2Y91DKfo-ghKxU0W0YsShujucZQPOyZ19nYtj59n26OaeXQELNYAMs5vhz9EnSJiRaiyEOfGptYNvtuILHOOEbAYAvvWNnB0HWZbfPVWGcyaY9dn

Error while downloading from https://cdn-lfs.hf.co/google/pegasus-xsum/46923d7498c8a594e3f467e2e48f7d82685a1ff19a060a5102dffa2a479ab6a9?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model.safetensors%3B+filename%3D%22model.safetensors%22%3B&Expires=1731409371&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTczMTQwOTM3MX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9nb29nbGUvcGVnYXN1cy14c3VtLzQ2OTIzZDc0OThjOGE1OTRlM2Y0NjdlMmU0OGY3ZDgyNjg1YTFmZjE5YTA2MGE1MTAyZGZmYTJhNDc5YWI2YTk%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=PRjUnjV8s8-k0iJJ%7E2szLv2m2MCC6A95hsVu8orglYS6-m-aQXpeK9VtQjdrxBsM--oVF8CicfG3RsAXyAFT7ma3-3H6KTAnHbrz404cGGRlBFpiZ3%7EWBg34nOmB2px-W5HhQ8YvHLQLdMpFChaozMc4OyznF4BVgPM6DZW52tqY0fUN2SDKMtb7sgwSkHq-3cdrLgUldXDdv8aI90dDuD%7E3TiQ5AjGCLim%7EWeMx-Xir4Rmo5JbMk043iqJkD5p1%7EOOtVIEigOobCFBDwcFe90LD4P3k25pJfX9FKJPmkobK3zYrusQBKWcyIakC-rxf4U2q9Dan-u8qlb8IOepy8Q__&Key-Pair-Id=K3RPWS32NSSJCE: HTTPSConnectionPo

In [16]:
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
import torch
from tqdm import tqdm  # Import tqdm for progress tracking

def get_summary_with_pegasus(write_up: str, prompt_section: str, window_size: int = 512, overlap: int = 50, **kwargs) -> str:
    """
    Summarizes the welfare scheme write-up based on the specified prompt section using Pegasus.
    This function uses a sliding window approach to handle long texts.

    Args:
    - write_up (str): Full write-up of the welfare scheme.
    - prompt_section (str): The section prompt, e.g., "Beneficiary and Problem Statement".
    - window_size (int): The size of each segment to summarize.
    - overlap (int): The number of tokens to overlap between segments.
    - kwargs: Additional parameters for the model's generate function.

    Returns:
    - str: The combined generated summary for the specified section.
    """
    # Customize the prompt for the section
    if not write_up or not prompt_section:
        raise ValueError("Write-up and prompt section must not be empty.")
    
    prompt = f"Summarize the write up {write_up} in light of this {prompt_section}"
    
    # Tokenize the entire input to get its length
    total_tokens = tokenizer(prompt)["input_ids"]
    
    summaries = []
    
    # Process in sliding windows
    for i in range(0, len(total_tokens), window_size - overlap):
        # Get the current chunk of tokens
        chunk = total_tokens[i:i + window_size]
        if len(chunk) == 0:
            break
        
        # Convert chunk back to string for summarization
        chunk_text = tokenizer.decode(chunk, skip_special_tokens=True)
        
        # Tokenize the chunk for model input
        inputs = tokenizer(chunk_text, max_length=1024, truncation=True, return_tensors="pt")

        try:
            # Generate summary with custom parameters
            if inputs.input_ids.size(1) == 0:
                continue  # Skip empty inputs
            
            summary_ids = model.generate(inputs.input_ids, **kwargs)
            summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
            summaries.append(summary)
        
        except Exception as e:
            print(f"Error generating summary for chunk starting at index {i}: {e}")
    
    # Combine all summaries into a single summary
    final_summary = " ".join(summaries)
    
    return final_summary

def summarize_scheme_with_pegasus(write_up: str) -> dict:
    """
    Generates summaries for each of the three sections using Pegasus:
    'Beneficiary and Problem Statement', 'Application Process and Benefits', 'Outcome and Impact'.
    
    Args:
    - write_up (str): Full write-up of the welfare scheme.

    Returns:
    - dict: A dictionary containing summaries for each section.
    """
    sections = [
        "Beneficiary and Problem Statement",
        "Application Process and Benefits",
        "Outcome and Impact"
    ]
    
    summaries = {}
    
    # Use tqdm to show progress for summarizing each section
    for section in tqdm(sections, desc="Summarizing Sections"):
        summaries[section] = get_summary_with_pegasus(write_up, section, max_length=60, num_beams=5, length_penalty=1.2, early_stopping=True)
    
    return summaries

In [17]:
# Generate summaries
summaries = summarize_scheme_with_pegasus(write_up)

# Display the summaries
for section, summary in summaries.items():
    print(f"{section}:\n{summary}\n")

Summarizing Sections: 100%|██████████| 3/3 [00:20<00:00,  6.85s/it]

Beneficiary and Problem Statement:
The Antyodaya Anna Yojana (AAY) is a flagship food security program initiated by the Government of India on December 25, 2000. The Antyodaya Anna Yojana (AAY) is a flagship scheme of the Indian government aimed at addressing food insecurity among the poorest sections of society in India.

Application Process and Benefits:
The Antyodaya Anna Yojana (AAY) is a flagship food security program initiated by the Government of India on December 25, 2000. The Antyodaya Anna Yojana (AAY) is a flagship scheme of the Indian government aimed at addressing food insecurity among the poorest sections of society in India.

Outcome and Impact:
The Antyodaya Anna Yojana (AAY) is a flagship food security program initiated by the Government of India on December 25, 2000. The Antyodaya Anna Yojana (AAY) is a flagship scheme of the Indian government aimed at addressing food insecurity among the poorest sections of society in India.




