**LOAD MODELS**

In [1]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load GPT-2 model and tokenizer
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Set the model to evaluation mode
model.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

**GENERATES THE POEM AND 1000 PROBABILITIES**

In [2]:
# Set the number of probabilities (1000 in this case)
num_predictions = 1000

# Function to get predictions for the next word (with as many as needed)
def get_predictions(prompt, num_predictions):
    # Encode the prompt text and get the input IDs
    input_ids = tokenizer.encode(prompt, return_tensors="pt")

    # Generate the logits for the next token
    with torch.no_grad():
        outputs = model(input_ids)
        logits = outputs.logits

    # Get the logits for the next token (last token in the input sequence)
    next_token_logits = logits[0, -1, :]

    # Apply softmax to convert logits into probabilities
    probabilities = torch.nn.functional.softmax(next_token_logits, dim=-1)

    # Get top 'num_predictions' most likely tokens and their probabilities
    top_values, top_indices = torch.topk(probabilities, num_predictions)
    top_tokens = top_indices.squeeze(0).tolist()
    top_probabilities = top_values.squeeze(0).tolist()

    # Decode the tokens back to text
    top_tokens_text = [tokenizer.decode([index]).strip() for index in top_tokens]

    return top_tokens_text, top_probabilities

# Function to replace the last word in a line using P+x
def process_line_with_p_x(line, x, num_predictions):
    if not line.strip():  # Skip empty lines
        return line
    words = line.split()
    prompt = " ".join(words[:-1])  # All words except the last
    top_tokens, top_probabilities = get_predictions(prompt, num_predictions)

    # Check if 'x' is within the range of available predictions
    if x > len(top_tokens):
        print(f"Warning: x = {x} exceeds available tokens. Using the highest probability word instead.")
        replacement_word = top_tokens[0]  # Use the highest probability token
    else:
        replacement_word = top_tokens[x - 1]  # Find the x-th most likely word

    return f"{prompt} {replacement_word}"

# Function to process the entire text with P+x
def process_text_with_p_x(text, x, num_predictions):
    lines = text.split("\n")
    processed_lines = [process_line_with_p_x(line, x, num_predictions) for line in lines]
    return "\n".join(processed_lines)

# The Snow Man poem
original_text = """One must have a mind of winter
To regard the frost and the boughs
Of the pine-trees crusted with snow;
And have been cold a long time
To behold the junipers shagged with ice,
The spruces rough in the distant glitter
Of the January sun; and not to think
Of any misery in the sound of the wind,
In the sound of a few leaves,
Which is the sound of the land
Full of the same wind
That is blowing in the same bare place
For the listener, who listens in the snow,
And, nothing himself, beholds
Nothing that is not there and the nothing that is."""

**DEBUG USING THE FIRST LINE**

In [3]:
# Get predictions for the last word of the first line ("winter")
first_line = "One must have a mind of"

# Generate predictions using the get_predictions function (make sure it's defined earlier)
top_tokens, top_probabilities = get_predictions(first_line, num_predictions)

# Display only the first 100 probabilities for the next word after "One must have a mind of"
print("First 100 probabilities for the word after 'One must have a mind of':")
for i, (token, prob) in enumerate(zip(top_tokens[:100], top_probabilities[:100])):
    print(f"P+{i+1}: {token} with probability {prob:.4f}")

# Now replace the word "winter" with the highest probability word
replacement_word = top_tokens[0]  # Take the highest probability word
modified_first_line = f"One must have a mind of {replacement_word}"

print("\nModified first line:")
print(modified_first_line)

First 100 probabilities for the word after 'One must have a mind of':
P+1: their with probability 0.3277
P+2: its with probability 0.1343
P+3: his with probability 0.1304
P+4: your with probability 0.0248
P+5: a with probability 0.0202
P+6: our with probability 0.0169
P+7: her with probability 0.0162
P+8: this with probability 0.0123
P+9: my with probability 0.0116
P+10: one with probability 0.0109
P+11: the with probability 0.0083
P+12: balance with probability 0.0058
P+13: " with probability 0.0052
P+14: what with probability 0.0045
P+15: it with probability 0.0041
P+16: some with probability 0.0041
P+17: ' with probability 0.0035
P+18: good with probability 0.0034
P+19: how with probability 0.0031
P+20: steel with probability 0.0030
P+21: something with probability 0.0028
P+22: self with probability 0.0026
P+23: order with probability 0.0026
P+24: thy with probability 0.0024
P+25: humility with probability 0.0023
P+26: gold with probability 0.0022
P+27: mystery with probability 0.00

**DISPLAYS+X / 10 RANDOM TEXTS TO CHOOSE FROM**

In [8]:
import random

# Generate and display 10 random P+x texts
for i in range(10):
    # Generate a random number between 1 and num_predictions
    random_x = random.randint(1, num_predictions)

    # Process the poem with the randomly selected P+x
    random_p_text = process_text_with_p_x(original_text, random_x, num_predictions)

    # Display the randomly selected P+x result
    print(f"\nP+{random_x} Processed Poem")
    print(random_p_text)


P+222 Processed Poem
One must have a mind of certain
To regard the frost and the massive
Of the pine-trees crusted with venom
And have been cold a long process
To behold the junipers shagged with five
The spruces rough in the distant ruins
Of the January sun; and not to understand
Of any misery in the sound of the speaker
In the sound of a few har
Which is the sound of the spring
Full of the same technology
That is blowing in the same bare reality
For the listener, who listens in the master
And, nothing himself, outside
Nothing that is not there and the nothing that other

P+209 Processed Poem
One must have a mind of following
To regard the frost and the open
Of the pine-trees crusted with chocolate
And have been cold a long live
To behold the junipers shagged with be
The spruces rough in the distant st
Of the January sun; and not to help
Of any misery in the sound of the end
In the sound of a few hearts
Which is the sound of the roaring
Full of the same 's
That is blowing in the same

**GENERATES AND SAVES P+X TEXT**

In [10]:
x = 569  # or any value you want
p_text = process_text_with_p_x(original_text, x, num_predictions)

# Display the new poem with the last words replaced
print(f"\nP+{x} Processed Poem:")
print(p_text)

# Save results to text files
file_name = f"P+{x}.txt"
with open(file_name, "w") as f:
    f.write(p_text)

print(f"\nPoem saved as {file_name}!")


P+569 Processed Poem:
One must have a mind of fucking
To regard the frost and the magical
Of the pine-trees crusted with boiling
And have been cold a long loss
To behold the junipers shagged with saw
The spruces rough in the distant quad
Of the January sun; and not to children
Of any misery in the sound of the market
In the sound of a few her
Which is the sound of the grape
Full of the same human
That is blowing in the same bare cash
For the listener, who listens in the meanwhile
And, nothing himself, am
Nothing that is not there and the nothing that rules

Poem saved as P+569.txt!
