In [8]:
import torch
from transformers import (
    GPT2Tokenizer,
    GPT2LMHeadModel,
    LogitsProcessorList,
    MinLengthLogitsProcessor,
    TopKLogitsWarper,
    TopPLogitsWarper,
    StoppingCriteriaList,
    MaxLengthCriteria
)
import nltk
from nltk.tokenize import sent_tokenize
nltk.download('punkt')



model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
model.eval()


if torch.cuda.is_available():
    model.cuda()


prompt = "In a world where AI governs everything, humanity"


input_ids = tokenizer(prompt, return_tensors="pt").input_ids
input_ids = input_ids.cuda() if torch.cuda.is_available() else input_ids


top_k = 50
# The model will select the next word from the top 50 most probable tokens.
# This is part of Top-K Sampling, which restricts the model to a set of most likely tokens.
top_p = 0.95
# This parameter is for Top-P (nucleus) Sampling, which means the model will select the next token from the smallest
# set of tokens that together have a cumulative probability of 0.95.
temperature = 0.8
# Controls the randomness of the output. Lower values make the model more deterministic, while higher values (close to 1) make the generation more diverse.
max_new_tokens = 100 # The maximum number of tokens to generate beyond the input prompt.


processors = LogitsProcessorList([
    MinLengthLogitsProcessor(min_length=40, eos_token_id=tokenizer.eos_token_id),
    TopKLogitsWarper(top_k),
    TopPLogitsWarper(top_p)
])

# MinLengthLogitsProcessor: Ensures that the generated text is at least 40 tokens long.

# TopKLogitsWarper: Limits the selection of next tokens to the top k logits (50 tokens in this case).

with torch.no_grad():
    output = model.generate(
        input_ids,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        logits_processor=processors,
        stopping_criteria=StoppingCriteriaList([
            MaxLengthCriteria(max_length=len(input_ids[0]) + max_new_tokens)
        ]),
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )


generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

print("\n--- Story Generated ---\n")
print(generated_text)

keywords = ["AI", "freedom", "resistance"]
banned = ["blood", "kill", "murder"]

print("\n--- Keyword/Banned Word Analysis ---\n")
sentences = sent_tokenize(generated_text)
for sent in sentences:
    hits = [kw for kw in keywords if kw.lower() in sent.lower() and kw.lower()]
    bans = [bw for bw in banned if bw.lower() in sent.lower()]
    if hits or bans:
        print(f"Sentence: {sent}")
        if hits:
            print(f"Keywords found: {hits}")
        if bans:
            print(f"Banned words found: {bans}")
        print()



[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!



--- Story Generated ---

In a world where AI governs everything, humanity will need to change its own behavior and make more intelligent decisions. In other words, the human population will have to adapt quickly. However, that is the only way that AI will take over.

--- Keyword/Banned Word Analysis ---

Sentence: In a world where AI governs everything, humanity will need to change its own behavior and make more intelligent decisions.
Keywords found: ['AI']

Sentence: However, that is the only way that AI will take over.
Keywords found: ['AI']

