In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from sentence_transformers import SentenceTransformer, util

# Load the pre-trained GPT-2 model and tokenizer
MODEL_NAME = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)

# Set pad token to avoid attention mask issues
tokenizer.pad_token = tokenizer.eos_token

# Load a lightweight SentenceTransformer model for semantic similarity
semantic_model = SentenceTransformer("paraphrase-MiniLM-L3-v2")

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

def generate_story_chunk(prompt, max_new_tokens=150, temperature=0.8, top_p=0.9):
    """
    Generates a chunk of the story based on the given prompt.
    """
    input_data = tokenizer.encode_plus(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=256,
        add_special_tokens=True,
    )
    input_ids = input_data["input_ids"].to(device)
    attention_mask = input_data["attention_mask"].to(device)

    outputs = model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        top_p=top_p,
        do_sample=True,
        repetition_penalty=1.8,
        no_repeat_ngram_size=3,
        pad_token_id=tokenizer.pad_token_id,
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

def remove_repetition(text, all_previous_chunks, similarity_threshold=0.75):
    """
    Removes repetitive or semantically similar sentences based on similarity.
    """
    sentences = text.split(". ")
    filtered_sentences = []
    sentence_embeddings = semantic_model.encode(sentences, convert_to_tensor=True)

    for i, sentence_embedding in enumerate(sentence_embeddings):
        is_similar = False

        for prev_chunk in all_previous_chunks:
            chunk_embedding = semantic_model.encode(prev_chunk, convert_to_tensor=True)
            if util.cos_sim(sentence_embedding, chunk_embedding).item() > similarity_threshold:
                is_similar = True
                break

        if not is_similar:
            filtered_sentences.append(sentences[i].strip())

    return ". ".join(filtered_sentences).strip()

def clean_story_ending(text):
    """
    Cleans the story ending by removing incomplete trailing sentences.
    """
    sentences = text.split(". ")
    if sentences[-1] and not sentences[-1].endswith("."):
        return ". ".join(sentences[:-1]).strip() + "."
    return text.strip()

def count_words(text):
    """
    Counts the number of words in a given text.
    """
    return len(text.split())

def build_story(prompt, word_count=500):
    """
    Builds the story in chunks, ensuring reduced repetition and coherence.
    """
    max_new_tokens = 150
    current_story = prompt
    all_previous_chunks = [prompt]
    iteration_limit = 25
    iterations = 0

    while count_words(current_story) < word_count and iterations < iteration_limit:
        iterations += 1
        remaining_words = word_count - count_words(current_story)

        # Expand prompt for richer context
        extended_prompt = f"{current_story}\n\nWhat happens next in the story?"
        chunk = generate_story_chunk(
            extended_prompt, max_new_tokens=min(max_new_tokens, remaining_words * 2)
        )

        # Filter repetitive or semantically similar sentences
        filtered_chunk = remove_repetition(chunk, all_previous_chunks)
        cleaned_chunk = clean_story_ending(filtered_chunk)

        # Append valid chunk to the story
        if count_words(cleaned_chunk) > 5 and cleaned_chunk not in all_previous_chunks:
            current_story += " " + cleaned_chunk
            all_previous_chunks.append(cleaned_chunk)

    return current_story.strip()

def evaluate_story(prompt, story):
    """
    Evaluates the semantic similarity between the prompt and story.
    """
    prompt_embedding = semantic_model.encode(prompt, convert_to_tensor=True)
    story_embedding = semantic_model.encode(story, convert_to_tensor=True)
    similarity = util.cos_sim(prompt_embedding, story_embedding).item()
    return similarity * 100

def main():
    """
    Main function to handle user input and generate a story.
    """
    print("=" * 50)
    print("‚ú® Welcome to the AI Story Generator! ‚ú®")
    print("=" * 50)

    prompt = input("\nWhat should your story be about? (e.g., 'A young wizard finds a magic book'): ").strip()
    if not prompt:
        print("‚ö†Ô∏è Please enter a valid story idea.")
        return

    try:
        word_count = int(input("\nHow many words should the story be (approx.)? (e.g., 100): ").strip())
    except ValueError:
        print("‚ö†Ô∏è Invalid input. Using default length of 150 words.")
        word_count = 150

    print("\nüåü Generating your story... Please wait! üåü")
    story = build_story(prompt, word_count=word_count)
    relevance_score = evaluate_story(prompt, story)

    print("\nHere‚Äôs your story:\n")
    print("=" * 50)
    print(story)
    print("=" * 50)
    print(f"\nüîç Relevance to prompt: {relevance_score:.2f}%")
    print(f"üìä Word Count (Story): {count_words(story)}")
    print("\nüåü Thank you for using the AI Story Generator! üåü")

if __name__ == "__main__":
    main()

‚ú® Welcome to the AI Story Generator! ‚ú®



What should your story be about? (e.g., 'A young wizard finds a magic book'):  An amateur treasure hunter stumbles upon a map that leads to a lost city of gold, but they‚Äôre not the only one searching for it.

How many words should the story be (approx.)? (e.g., 100):  500



üåü Generating your story... Please wait! üåü

Here‚Äôs your story:

An amateur treasure hunter stumbles upon a map that leads to a lost city of gold, but they‚Äôre not the only one searching for it. An amateur treasure hunter stumbles upon a map that leads to a lost city of gold, but they‚Äôre not the only one searching for it.

What happens next in the story? The most interesting and suspenseful part is this chapter: you get your first glimpse at some major characters or events from other areas which make up Chapter 10 ‚Äì What Happens Next!


üîç Relevance to prompt: 86.58%
üìä Word Count (Story): 488

üåü Thank you for using the AI Story Generator! üåü
