In [None]:
from transformers import BartTokenizer, BartForConditionalGeneration

def summarize_character(story: str, character: str,
                        max_input_length: int = 1024,
                        summary_max_length: int = 150,
                        summary_min_length: int = 30,
                        num_beams: int = 4) -> str:
    """
    Dynamically summarize a character from a story.

    Parameters:
      story (str): The full story or text from which to extract the character summary.
      character (str): The character to summarize.
      max_input_length (int): Maximum input token length for the model (default 1024).
      summary_max_length (int): Maximum token length for the generated summary.
      summary_min_length (int): Minimum token length for the generated summary.
      num_beams (int): Beam search size (controls the quality of generation).

    Returns:
      str: A summary of the character.
    """
    # Load the model and tokenizer (you may move these outside the function if reused many times)
    tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
    model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')

    # Create a prompt to instruct the model to focus on the character
    prompt = f"Summarize the character '{character}' in the following story:\n{story}"

    # Tokenize the input text with truncation
    inputs = tokenizer(prompt, max_length=max_input_length, truncation=True, return_tensors="pt")

    # Generate the summary using beam search
    summary_ids = model.generate(
        inputs['input_ids'],
        max_length=summary_max_length,
        min_length=summary_min_length,
        length_penalty=2.0,
        num_beams=num_beams,
        early_stopping=True
    )

    # Decode the summary tokens into a human-readable string
    character_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return character_summary

In [None]:
from transformers import T5ForConditionalGeneration, T5Tokenizer
import sys

def dynamic_character_summary(story: str, character: str,
                              max_input_length: int = 512,
                              summary_max_length: int = 100,
                              summary_min_length: int = 30,
                              num_beams: int = 4) -> str:
    """
    Generates a summary focusing on a specific character from a given story.
    """
    # Load T5 tokenizer and model
    tokenizer = T5Tokenizer.from_pretrained('t5-base')
    model = T5ForConditionalGeneration.from_pretrained('t5-base')

    # Construct a prompt explicitly directing the model to focus on the character.
    prompt = f"Summarize the character '{character}' in the following story: {story}"

    # Tokenize the prompt, ensuring truncation for overly long inputs.
    inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=max_input_length, truncation=True)

    # Generate the summary using beam search
    summary_ids = model.generate(
        inputs,
        max_length=summary_max_length,
        min_length=summary_min_length,
        num_beams=num_beams,
        early_stopping=True
    )

    # Convert the generated tokens back to text
    summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary_text

if __name__ == "__main__":
    # Check if we have valid command-line arguments.
    # Jupyter/Colab usually passes extra args (-f ...) that we don't want.
    if len(sys.argv) >= 3 and sys.argv[1] != "-f":
        character = sys.argv[1]
        story = sys.argv[2]
    else:
        # When running in a notebook, use default inputs or ask the user interactively.
        print("No valid command-line arguments detected. Using default story and character.")
        story = (
            "Frodo Baggins, a humble hobbit of the Shire, is entrusted with a perilous quest "
            "to destroy a powerful ring that could bring ruin to the world. Throughout his journey, "
            "Frodo faces immense challenges and grows in courage and resilience."
        )
        character = "Frodo Baggins"

    # Generate the character summary using the dynamic function
    character_summary = dynamic_character_summary(story, character)

    print(f"Character Summary of '{character}':\n{character_summary}")


No valid command-line arguments detected. Using default story and character.


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Character Summary of 'Frodo Baggins':
Summarize the character 'Frodo Baggins' in the following story: 'Frodo Baggins' in the following story: Frodo Baggins is a humble hobbit of the Shire.


In [None]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

def dynamic_character_summary(story: str, character: str,
                              max_input_length: int = 512,
                              summary_max_length: int = 100,
                              summary_min_length: int = 30,
                              num_beams: int = 4) -> str:
    """
    Generates a summary focusing on a specific character from a given story.
    """
    tokenizer = T5Tokenizer.from_pretrained('t5-base')
    model = T5ForConditionalGeneration.from_pretrained('t5-base')

    prompt = f"Summarize the character '{character}' in the following story: {story}"
    inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=max_input_length, truncation=True)

    summary_ids = model.generate(
        inputs,
        max_length=summary_max_length,
        min_length=summary_min_length,
        num_beams=num_beams,
        early_stopping=True
    )

    summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary_text

if __name__ == "__main__":
    # Directly ask for input instead of relying on sys.argv
    story = input("Enter the story text:\n")
    character = input("Enter the character name:\n")

    character_summary = dynamic_character_summary(story, character)
    print(f"\nCharacter Summary of '{character}':\n{character_summary}")


Enter the story text:
The fact that Henry Armstrong was buried did not seem to him to prove that he was dead: he had always been a hard man to convince. That he really was buried, the testimony of his senses compelled him to admit. His posture -- flat upon his back, with his hands crossed upon his stomach and tied with something that he easily broke without profitably altering the situation -- the strict confinement of his entire person, the black darkness and profound silence, made a body of evidence impossible to controvert and he accepted it without cavil.  But dead -- no; he was only very, very ill. He had, withal, the invalid's apathy and did not greatly concern himself about the uncommon fate that had been allotted to him. No philosopher was he -- just a plain, commonplace person gifted, for the time being, with a pathological indifference: the organ that he feared consequences with was torpid. So, with no particular apprehension for his immediate future, he fell asleep and all w

In [None]:
import spacy
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load spaCy for named entity recognition
nlp = spacy.load("en_core_web_sm")

# Load model and tokenizer
model_name = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

def extract_characters(text):
    doc = nlp(text)
    characters = list({ent.text for ent in doc.ents if ent.label_ == "PERSON"})
    return characters

def get_character_sentences(text, character):
    doc = nlp(text)
    relevant_sentences = [sent.text.strip() for sent in doc.sents if character.lower() in sent.text.lower()]
    return " ".join(relevant_sentences)

def summarize_character(text, character):
    character_text = get_character_sentences(text, character)
    if not character_text:
        return f"No information found for {character}."

    prompt = f"Summarize the personality, role, and actions of the character '{character}' based on the following story:\n\n{character_text}"

    inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(inputs["input_ids"], max_length=100, min_length=30, do_sample=False)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def summarize_all_characters(text):
    character_list = extract_characters(text)
    summaries = {}
    for character in character_list:
        print(f"Generating summary for: {character}")
        summaries[character] = summarize_character(text, character)
    return summaries

# Example story
story = """
In the ancient kingdom of Veridia, there lived a resourceful warrior named Gareth. Amidst political strife and looming invasions, Gareth emerged as a beacon of hope.
His companion, Elara, a wise and intuitive mage, assisted him in deciphering ancient prophecies.
Meanwhile, the cunning Lord Malrec plotted in the shadows to seize power, and the gentle healer, Mirin, devoted herself to caring for the wounded.
As the kingdom fell into chaos, each individual's actions shaped the fate of Veridia.
"""

summaries = summarize_all_characters(story)
for name, summary in summaries.items():
    print(f"\nCharacter Summary for {name}:\n{summary}\n")


tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Generating summary for: Gareth
Generating summary for: Mirin

Character Summary for Gareth:
Find out what makes Gareth a beacon of hope. Find out what makes Gareth a resourceful warrior. Find out what makes Gareth a beacon of hope.


Character Summary for Mirin:
The story of the 'healer', Mirin, is a story about a woman who is a healer.

