<a href="https://colab.research.google.com/github/Bimpiel/CART498-GENERATIVE-AI/blob/main/A2/Assignment2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# prompt: save the output to a file called p+7

from transformers import AutoTokenizer, AutoModelForCausalLM, LogitsProcessorList, TopKLogitsWarper
import torch
import random
# Import necessary libraries

# Initialize the GPT-2 tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2")

# List of sentences to complete
prompts = [
    "One must have a mind of winter",
    "To regard the frost and the boughs",
    "Of the pine-trees crusted with snow;",
    "And have been cold a long time",
    "To behold the junipers shagged with ice,",
    "The spruces rough in the distant glitter",
    "Of the January sun; and not to think",
    "Of any misery in the sound of the wind,",
    "In the sound of a few leaves,",
    "Which is the sound of the land",
    "Full of the same wind",
    "That is blowing in the same bare place",
    "For the listener, who listens in the snow,",
    "And, nothing himself, beholds",
    "Nothing that is not there and the nothing that is."
]

# Function to generate the next token and replace the last word of the sentence
def complete_sentence(prompt):
    # Remove the last word of the prompt
    prompt_without_last_word = ' '.join(prompt.split()[:-1])

    # Tokenize the prompt without the last word and convert it to input IDs
    input_ids = tokenizer(prompt_without_last_word, return_tensors="pt").input_ids

    # Generate the model's output logits for the next token
    with torch.no_grad():
        outputs = model(input_ids)
        logits = outputs.logits[:, -1, :]  # Get logits for the last token

    # Apply a Top-K logits processor to focus on the most probable continuations
    logits_processor = LogitsProcessorList([TopKLogitsWarper(top_k=50)])
    processed_logits = logits_processor(input_ids, logits)

    # Convert logits to probabilities using softmax
    probabilities = torch.softmax(processed_logits, dim=-1).squeeze()

    # Get the top 7 most probable tokens and their probabilities
    top_k = 7
    top_probs, top_indices = torch.topk(probabilities, top_k)

    # Decode the tokens, remove the 'Ġ' prefix, and pair them with their probabilities
    continuations = [token.replace('Ġ', '') for token in tokenizer.convert_ids_to_tokens(top_indices.tolist())]
    results = list(zip(continuations, top_probs.tolist()))

    # Randomly choose one of the top 7 words
    chosen_word = random.choice(continuations)

    # Replace the last word of the original prompt with the chosen word
    completed_sentence = f"{prompt_without_last_word} {chosen_word}"

    return results, completed_sentence

# Loop over each prompt and generate the results
with open("p+7.txt", "w") as f:
    for prompt in prompts:
        results, completed_sentence = complete_sentence(prompt)
        print(completed_sentence, file=f)

Version 2: Change X for different outcome

In [3]:
# prompt: A second version of the text processed with P+x. Choose an x value that produces the funniest, wittiest, or most absurd version of the original text. Save this as a .txt file, and include the x value in the filename

from transformers import AutoTokenizer, AutoModelForCausalLM, LogitsProcessorList, TopKLogitsWarper
import torch
import random

# Initialize the GPT-2 tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2")

# List of sentences to complete
prompts = [
    "One must have a mind of winter",
    "To regard the frost and the boughs",
    "Of the pine-trees crusted with snow;",
    "And have been cold a long time",
    "To behold the junipers shagged with ice,",
    "The spruces rough in the distant glitter",
    "Of the January sun; and not to think",
    "Of any misery in the sound of the wind,",
    "In the sound of a few leaves,",
    "Which is the sound of the land",
    "Full of the same wind",
    "That is blowing in the same bare place",
    "For the listener, who listens in the snow,",
    "And, nothing himself, beholds",
    "Nothing that is not there and the nothing that is."
]

# Function to generate the next token and replace the last word of the sentence
def complete_sentence(prompt, x_value): # Added x_value as parameter
    # Remove the last word of the prompt
    prompt_without_last_word = ' '.join(prompt.split()[:-1])

    # Tokenize the prompt without the last word and convert it to input IDs
    input_ids = tokenizer(prompt_without_last_word, return_tensors="pt").input_ids

    # Generate the model's output logits for the next token
    with torch.no_grad():
        outputs = model(input_ids)
        logits = outputs.logits[:, -1, :]  # Get logits for the last token

    # Apply a Top-K logits processor to focus on the most probable continuations
    logits_processor = LogitsProcessorList([TopKLogitsWarper(top_k=x_value)]) # Use x_value for top_k
    processed_logits = logits_processor(input_ids, logits)

    # Convert logits to probabilities using softmax
    probabilities = torch.softmax(processed_logits, dim=-1).squeeze()

    # Get the top 7 most probable tokens and their probabilities
    top_k = 7 # Keep top_k as 7
    top_probs, top_indices = torch.topk(probabilities, top_k)

    # Decode the tokens, remove the 'Ġ' prefix, and pair them with their probabilities
    continuations = [token.replace('Ġ', '') for token in tokenizer.convert_ids_to_tokens(top_indices.tolist())]
    results = list(zip(continuations, top_probs.tolist()))

    # Randomly choose one of the top 7 words
    chosen_word = random.choice(continuations)

    # Replace the last word of the original prompt with the chosen word
    completed_sentence = f"{prompt_without_last_word} {chosen_word}"

    return results, completed_sentence

# Choose an x value
x = random.randrange(8,30)

# Loop over each prompt and generate the results
filename = f"p+{x}.txt"
with open(filename, "w") as f:
    for prompt in prompts:
        results, completed_sentence = complete_sentence(prompt, x) # Pass x_value to the function
        print(completed_sentence, file=f)