In [5]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import re # Import regular expression module

# Load pre-trained GPT-2 model and tokenizer
print("Loading GPT-2 model and tokenizer...")
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2")

print("Model and tokenizer loaded successfully.")

def get_seventh_highest_probable_word(prompt: str) -> str:
    """
    Generates the seventh highest probable next word for a given prompt
    using the GPT-2 model, ensuring only actual words are considered.
    """
    # Encode the input prompt
    input_ids = tokenizer.encode(prompt, return_tensors='pt')

    # Get model predictions (logits) for the next token
    with torch.no_grad():
        outputs = model(input_ids)
        logits = outputs.logits

    # Get the logits for the last token in the sequence
    last_token_logits = logits[0, -1, :]

    # Apply softmax to get probabilities
    probabilities = torch.softmax(last_token_logits, dim=-1)

    # Get the sorted probabilities and their indices
    sorted_probs, sorted_indices = torch.sort(probabilities, descending=True)

    seventh_word_found = False
    word_count = 0
    seventh_highest_word = ""

    for token_id in sorted_indices:
        decoded_word = tokenizer.decode(token_id.item()).strip()

        # Filter out tokens that are not purely alphabetic (i.e., not 'words')
        if decoded_word.isalpha(): # Use isalpha() for stricter 'word' definition
            word_count += 1
            if word_count == 7:
                seventh_highest_word = decoded_word
                seventh_word_found = True
                break

    if not seventh_word_found:
        return "Error: Not enough actual words to determine the 7th highest."

    return seventh_highest_word

# Example usage:
# prompt_sentence = "The quick brown fox jumps over the"
# predicted_word = get_seventh_highest_probable_word(prompt_sentence)
# print(f"Prompt: '{prompt_sentence}'")
# print(f"Seventh highest probable word: '{predicted_word}'")

Loading GPT-2 model and tokenizer...
Model and tokenizer loaded successfully.


Please provide your poem in the `poem_lines` variable in the next cell. The code will then process each line as described.

In [7]:
# Example poem. Replace this with your own poem.
poem_text = """
One must have a mind of winter
To regard the frost and the boughs
Of the pine-trees crusted with snow;
And have been cold a long time
To behold the junipers shagged with ice,
The spruces rough in the distant glitter
Of the January sun; and not to think
Of any misery in the sound of the wind,
In the sound of a few leaves,
Which is the sound of the land
Full of the same wind
That is blowing in the same bare place
For the listener, who listens in the snow,
And, nothing himself, beholds
Nothing that is not there and the nothing that is.
"""

# Split the multiline string into a list of lines
poem_lines = poem_text.strip().splitlines()

modified_poem_lines = []

for line in poem_lines:
    # Split the line into words
    words = line.split()

    if len(words) > 1:
        # Form the prompt by taking all words except the last one
        prompt_for_line = ' '.join(words[:-1])
        predicted_word = get_seventh_highest_probable_word(prompt_for_line)
        # Reconstruct the line with the predicted word
        modified_line = f"{prompt_for_line} {predicted_word}"
    elif len(words) == 1:
        # If there's only one word, the prompt is that word itself
        prompt_for_line = words[0]
        predicted_word = get_seventh_highest_probable_word(prompt_for_line)
        modified_line = f"{prompt_for_line} {predicted_word}"
    else:
        # Handle empty lines
        modified_line = ""

    modified_poem_lines.append(modified_line)

print("Original Poem:")
for line in poem_lines:
    print(line)

print("\nModified Poem (Seventh highest probable word):")
for line in modified_poem_lines:
    print(line)

Original Poem:
One must have a mind of winter
To regard the frost and the boughs
Of the pine-trees crusted with snow;
And have been cold a long time
To behold the junipers shagged with ice,
The spruces rough in the distant glitter
Of the January sun; and not to think
Of any misery in the sound of the wind,
In the sound of a few leaves,
Which is the sound of the land
Full of the same wind
That is blowing in the same bare place
For the listener, who listens in the snow,
And, nothing himself, beholds
Nothing that is not there and the nothing that is.

Modified Poem (Seventh highest probable word):
One must have a mind of her
To regard the frost and the death
Of the pine-trees crusted with oil
And have been cold a long few
To behold the junipers shagged with white
The spruces rough in the distant night
Of the January sun; and not to have
Of any misery in the sound of the sound
In the sound of a few shots
Which is the sound of the voice
Full of the same story
That is blowing in the same bar

### Customizable Nth Probable Word Poem Analysis

Below, you can define the `word_rank` to choose which probable word (e.g., 1 for highest, 7 for seventh, etc.) will be used to modify your poem. The code will then process each line of the `poem_text` using this specified rank.

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import re # Import regular expression module

# Load pre-trained GPT-2 model and tokenizer (repeated for self-contained cell)
print("Loading GPT-2 model and tokenizer...")
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2")
print("Model and tokenizer loaded successfully.")

# Define the rank of the probable word you want to use (e.g., 1 for highest, 7 for seventh)
word_rank = 666 # You can change this number!

def get_nth_highest_probable_word(prompt: str, n: int) -> str:
    """
    Generates the nth highest probable next word for a given prompt
    using the GPT-2 model, ensuring only actual words are considered.
    """
    # Encode the input prompt
    input_ids = tokenizer.encode(prompt, return_tensors='pt')

    # Get model predictions (logits) for the last token in the sequence
    with torch.no_grad():
        outputs = model(input_ids)
        logits = outputs.logits
    last_token_logits = logits[0, -1, :]

    # Apply softmax to get probabilities
    probabilities = torch.softmax(last_token_logits, dim=-1)

    # Get the sorted probabilities and their indices
    sorted_probs, sorted_indices = torch.sort(probabilities, descending=True)

    nth_word_found = False
    word_count = 0
    nth_highest_word = ""

    for token_id in sorted_indices:
        decoded_word = tokenizer.decode(token_id.item()).strip()

        # Filter out tokens that are not purely alphabetic (i.e., not 'words')
        if decoded_word.isalpha():
            word_count += 1
            if word_count == n:
                nth_highest_word = decoded_word
                nth_word_found = True
                break

    if not nth_word_found:
        return f"Error: Not enough actual words to determine the {n}th highest."

    return nth_highest_word

# Your poem as a multiline string
poem_text = """
One must have a mind of winter
To regard the frost and the boughs
Of the pine-trees crusted with snow;
And have been cold a long time
To behold the junipers shagged with ice,
The spruces rough in the distant glitter
Of the January sun; and not to think
Of any misery in the sound of the wind,
In the sound of a few leaves,
Which is the sound of the land
Full of the same wind
That is blowing in the same bare place
For the listener, who listens in the snow,
And, nothing himself, beholds
Nothing that is not there and the nothing that is.
"""

# Split the multiline string into a list of lines
poem_lines = poem_text.strip().splitlines()

modified_poem_lines_nth = []

for line in poem_lines:
    # Split the line into words
    words = line.split()

    if len(words) > 1:
        # Form the prompt by taking all words except the last one
        prompt_for_line = ' '.join(words[:-1])
        predicted_word = get_nth_highest_probable_word(prompt_for_line, word_rank)
        # Reconstruct the line with the predicted word
        modified_line = f"{prompt_for_line} {predicted_word}"
    elif len(words) == 1:
        # If there's only one word, the prompt is that word itself
        prompt_for_line = words[0]
        predicted_word = get_nth_highest_probable_word(prompt_for_line, word_rank)
        modified_line = f"{prompt_for_line} {predicted_word}"
    else:
        # Handle empty lines
        modified_line = ""

    modified_poem_lines_nth.append(modified_line)

print("Original Poem:")
for line in poem_lines:
    print(line)

print(f"\nModified Poem ({word_rank}th highest probable word):")
for line in modified_poem_lines_nth:
    print(line)

Loading GPT-2 model and tokenizer...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Model and tokenizer loaded successfully.
Original Poem:
One must have a mind of winter
To regard the frost and the boughs
Of the pine-trees crusted with snow;
And have been cold a long time
To behold the junipers shagged with ice,
The spruces rough in the distant glitter
Of the January sun; and not to think
Of any misery in the sound of the wind,
In the sound of a few leaves,
Which is the sound of the land
Full of the same wind
That is blowing in the same bare place
For the listener, who listens in the snow,
And, nothing himself, beholds
Nothing that is not there and the nothing that is.

Modified Poem (666th highest probable word):
One must have a mind of considerable
To regard the frost and the cessation
Of the pine-trees crusted with muc
And have been cold a long bus
To behold the junipers shagged with fright
The spruces rough in the distant lunar
Of the January sun; and not to join
Of any misery in the sound of the coach
In the sound of a few I
Which is the sound of the French
Full

In [2]:
prompt_sentence = "One must have a mind of"
predicted_word = get_seventh_highest_probable_word(prompt_sentence)
print(f"Prompt: '{prompt_sentence}'")
print(f"Seventh highest probable word: '{predicted_word}'")

Prompt: 'One must have a mind of'
Seventh highest probable word: 'her'
