# Common setup

## Install dependencies

In [13]:
!pip install transformers

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Defaulting to user installation because normal site-packages is not writeable


## Imports

In [14]:
import argparse
import os
from transformers import PegasusForConditionalGeneration, PegasusTokenizer, logging
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

## Set logging

In [15]:
# Silences "Some weights were not initialized from the model checkpoint" warning
logging.set_verbosity_error()

## Input files

In [16]:
input_files=["/home/levi/rhel-8-docs/rhel-9/modules/performance/con_grouping-vdo-threads-across-numa-nodes.adoc", "/home/levi/rhel-8-docs/rhel-9/modules/performance/proc_analyzing-vdo-performance-with-perf.adoc", "/home/levi/rhel-8-docs/rhel-9/modules/performance/proc_analyzing-vdo-performance-with-sar.adoc", "/home/levi/rhel-8-docs/rhel-9/modules/performance/proc_analyzing-vdo-performance-with-top.adoc", "/home/levi/rhel-8-docs/rhel-9/modules/performance/proc_configuring-the-cpu-affinity.adoc", "/home/levi/rhel-8-docs/rhel-9/modules/performance/proc_increasing-block-cache-size.adoc", "/home/levi/rhel-8-docs/rhel-9/modules/performance/proc_optimizing-cpu-frequency-scaling-for-vdo-performance.adoc", "/home/levi/rhel-8-docs/rhel-9/modules/performance/proc_speeding-up-discard-operations.adoc", "/home/levi/rhel-8-docs/rhel-9/modules/performance/ref_vdo-thread-types.adoc"]

## Define main function

In [17]:
def main(input_files):
    # parser = argparse.ArgumentParser(description="Abstractive Text Summarization Script")
    # parser.add_argument("input_files", nargs='+', help="Paths to the input text files")

    # args = parser.parse_args()
    # input_files = args.input_files

    for input_file_path in input_files:
        # Check if the file exists
        if not os.path.exists(input_file_path):
            print(f"Error: File '{input_file_path}' does not exist.")
            continue

        # Read input text from the file
        try:
            with open(input_file_path, "r", encoding="utf-8") as file:
                input_text = file.read()

            # Check if the file is empty
            if not input_text.strip():
                print(f"Error: File '{input_file_path}' is empty.")
                continue
        
        # Check if the file is corrupt
        except Exception as e:
            print(f"Error reading file '{input_file_path}': {e}")
            continue
        
        summary_dict = abstractive_summarization(input_file_path)
        
        for filepath, summary in summary_dict.items():
            print(f"\nFilename: {filepath}\nSummary: {summary}")


# Summarization function BART

In [18]:
def abstractive_summarization(file):
    # Load pre-trained model and tokenizer only once
    model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-xsum")
    tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-xsum")

    # Tokenize and generate summary
    with open(file, "r") as f:
        text = f.read()
        
        inputs = tokenizer("summarize: " + text, return_tensors="pt", padding="longest", truncation=True)
        summary_ids = model.generate(inputs["input_ids"], max_length=150, length_penalty=2.0, num_beams=4, early_stopping=True)
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

        return {file: summary}

main(input_files)


Filename: /home/levi/rhel-8-docs/rhel-9/modules/performance/con_grouping-vdo-threads-across-numa-nodes.adoc
Summary: Here is an example of how to improve the performance of VDO kernel threads by grouping them on the same NUMA nodes.

Filename: /home/levi/rhel-8-docs/rhel-9/modules/performance/proc_analyzing-vdo-performance-with-perf.adoc
Summary: Here is an example of how to test the performance of the VDO operating system on your computer.

Filename: /home/levi/rhel-8-docs/rhel-9/modules/performance/proc_analyzing-vdo-performance-with-sar.adoc
Summary: Here is a guide to analysing VDO performance using the `sar(1)` utility.

Filename: /home/levi/rhel-8-docs/rhel-9/modules/performance/proc_analyzing-vdo-performance-with-top.adoc
Summary: Using the `top` utility, you can examine the performance of VDO threads by using the following commands.

Filename: /home/levi/rhel-8-docs/rhel-9/modules/performance/proc_configuring-the-cpu-affinity.adoc
Summary: Here is an example of how to set the 