In [2]:
# !pip install wget
!pip install torch -q
!pip install transformers -q
!pip install datasets -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/471.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m471.0/471.6 kB[0m [31m13.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m471.6/471.6 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/116.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [4]:
import torch

if torch.cuda.is_available():
  device = torch.device("cuda")
  device_count = torch.cuda.device_count()
  device_name = torch.cuda.get_device_name(0)

  print(f"There are {device_count} GPU(s) available.")
  print(f"We will use the GPU: {device_name}")


else:
  print("No GPU available, using the CPU instead.")
  device = torch.device("cpu")

No GPU available, using the CPU instead.


In [5]:
import torch
from transformers import DistilBertTokenizer
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split


In [6]:
class PoemDataset(Dataset):
    def __init__(self, sentences, poems, tokenizer, max_length):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.inputs = []

        for sentence, poem in zip(sentences, poems):
            self.inputs.append(f"{sentence} {poem} {tokenizer.eos_token}")

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        input_text = self.inputs[idx]
        encodings = self.tokenizer(input_text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
        return {
            'input_ids': encodings['input_ids'].squeeze(),
            'attention_mask': encodings['attention_mask'].squeeze(),
        }


In [7]:

def prepare_poem_dataset(angry_sentences, funny_poems, model_name='gpt2', max_length=128, batch_size=4):
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    tokenizer.pad_token = tokenizer.eos_token
    train_sentences, test_sentences, train_poems, test_poems = train_test_split(angry_sentences, funny_poems, test_size=0.2, random_state=42)

    train_dataset = PoemDataset(train_sentences, train_poems, tokenizer, max_length)
    test_dataset = PoemDataset(test_sentences, test_poems, tokenizer, max_length)

    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

    return train_dataloader, test_dataloader, tokenizer

In [8]:
angry_sentences = [
    "I can't believe they forgot my birthday!",
    "This traffic is driving me crazy!",
    "Why is the WiFi so slow today?",
    "I'm so tired of eating the same thing every day!",
    "My phone battery always dies when I need it most!",
    "Why do I always lose my keys right when I'm late?",
    "I hate it when people chew with their mouth open!",
    "How come the line is always longest when I'm in a hurry?",
    "Why does it always rain when I forget my umbrella?",
    "I can't stand it when people don't use their turn signals!"
]

funny_poems = [
    "Forgotten day, oh what a blight! / But who needs cake at midnight? / Perhaps they plan a grand surprise / Or simply can't read calendar's guise.",
    "Cars crawl like snails on hot concrete / A turtle race can't be beat / In this jam, I'll grow a beard / Road rage? Nah, I'm just weird.",
    "Internet crawls, my patience thins / Loading bar becomes my frenemy / I could've trained a pigeon / To deliver emails more speedy.",
    "Monotonous meals, day after day / My taste buds threaten to run away / Perhaps I'll start a food rebellion / And eat my socks for this meal's hellion.",
    "Battery drains, oh cruel device! / Always fails at moments precise / I'll invent a phone powered by sighs / Or just yell my messages to the skies.",
    "Keys play hide and seek, what a game! / As I'm rushing out, they're to blame / I'll tie them to a giant balloon / So finding them won't spell my doom.",
    "Open-mouthed chewers, please beware / Your dinner sounds pollute the air / I'll invent a mute button for mouths / Or dine exclusively down south.",
    "Lines stretch long when time is tight / A cosmic joke, an endless plight / I'll master teleportation soon / Or just camp out since last June.",
    "Raindrops fall as umbrellas hide / Weather forecasts have surely lied / I'll grow a waterproof hairdo / Or just pretend I'm at the zoo.",
    "Turn signals forgotten, cars swerve / Testing each driver's last nerve / I'll invent telepathic cars / Or stick big arrows to their fars."
]

In [10]:
# print("Batch keys:", batch.keys())

In [11]:
train_dataloader, test_dataloader, tokenizer = prepare_poem_dataset(angry_sentences, funny_poems)
for batch in train_dataloader:
    print("Batch keys:", batch.keys())
    print("Input shape:", batch['input_ids'].shape)
    print("Attention mask shape:", batch['attention_mask'].shape)
    # Comment out or remove the following line for now
    # print("Labels shape:", batch['labels'].shape)
    break

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Batch keys: dict_keys(['input_ids', 'attention_mask'])
Input shape: torch.Size([4, 128])
Attention mask shape: torch.Size([4, 128])




In [12]:
batch

{'input_ids': tensor([[   40,   460,   470,  1975,   484, 16453,   616, 10955,     0, 28586,
           1110,    11, 11752,   644,   257, 42514,     0,  1220,   887,   508,
           2476, 12187,   379, 15896,    30,  1220,  8673,   484,  1410,   257,
           4490,  5975,  1220,  1471,  2391,   460,   470,  1100, 11845,   338,
          34731,    13,   220, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
          50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
          50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
          50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
          50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
          50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
          50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
          50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
          50256, 50256, 502

In [13]:
def train_model(train_dataloader, model, optimizer, scheduler, device, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in tqdm(train_dataloader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            optimizer.zero_grad()
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
            loss = outputs.loss
            total_loss += loss.item()
            loss.backward()
            optimizer.step()
            scheduler.step()

        print(f"Epoch {epoch+1}/{num_epochs}, Average loss: {total_loss/len(train_dataloader)}")


In [14]:
def evaluate_model(test_dataloader, model, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in tqdm(test_dataloader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            total_loss += outputs.loss.item()

    avg_loss = total_loss / len(test_dataloader)
    print(f"Average test loss: {avg_loss}")

In [15]:
def generate_poem(sentence, model, tokenizer, device, max_length=128):
    model.eval()
    input_ids = tokenizer.encode(sentence, return_tensors='pt').to(device)
    attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=device)

    with torch.no_grad():
        output = model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_length=max_length,
        num_return_sequences=1,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.8,
        no_repeat_ngram_size=2,
        pad_token_id=tokenizer.eos_token_id,
        early_stopping=True
        )


    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    poem = generated_text[len(sentence):].strip()  # Remove the input sentence from the output
    lines = poem.split('.')[:4]  # Get first 4 sentences
    return '\n'.join(line.strip() for line in lines if line.strip())


In [16]:
train_dataloader, test_dataloader, tokenizer = prepare_poem_dataset(angry_sentences, funny_poems)

# Set up the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.to(device)

# Set up optimizer and scheduler
optimizer = AdamW(model.parameters(), lr=5e-5)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=len(train_dataloader) * 10)

# Train the model
train_model(train_dataloader, model, optimizer, scheduler, device, num_epochs=10)

# Generate a poem
new_angry_sentence = "I can't believe I missed my bus!"
generated_poem = generate_poem(new_angry_sentence, model, tokenizer, device)
print(f"Input: {new_angry_sentence}")
print(f"Generated poem:\n{generated_poem}")

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

100%|██████████| 2/2 [00:30<00:00, 15.38s/it]


Epoch 1/10, Average loss: 8.071861982345581


100%|██████████| 2/2 [00:19<00:00,  9.84s/it]


Epoch 2/10, Average loss: 3.9073779582977295


100%|██████████| 2/2 [00:21<00:00, 10.65s/it]


Epoch 3/10, Average loss: 2.2098655700683594


100%|██████████| 2/2 [00:19<00:00,  9.82s/it]


Epoch 4/10, Average loss: 1.8592796921730042


100%|██████████| 2/2 [00:20<00:00, 10.48s/it]


Epoch 5/10, Average loss: 1.8160637021064758


100%|██████████| 2/2 [00:20<00:00, 10.02s/it]


Epoch 6/10, Average loss: 1.798812210559845


100%|██████████| 2/2 [00:20<00:00, 10.34s/it]


Epoch 7/10, Average loss: 1.7002122402191162


100%|██████████| 2/2 [00:19<00:00,  9.88s/it]


Epoch 8/10, Average loss: 1.6607421040534973


100%|██████████| 2/2 [00:19<00:00,  9.65s/it]


Epoch 9/10, Average loss: 1.606674075126648


100%|██████████| 2/2 [00:20<00:00, 10.04s/it]


Epoch 10/10, Average loss: 1.614551842212677
Input: I can't believe I missed my bus!
Generated poem:



In [20]:
from transformers import pipeline, set_seed
generator = pipeline('text-generation', model = 'gpt2')
set_seed(42)
generator("Hello, I'm a language model,", max_length=30,truncation = True,  num_return_sequences=5)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': "Hello, I'm a language model, but what I'm really doing is making a human-readable document. There are other languages, but those are"},
 {'generated_text': "Hello, I'm a language model, not a syntax model. That's why I like it. I've done a lot of programming projects.\n"},
 {'generated_text': "Hello, I'm a language model, and I'll do it in no time!\n\nOne of the things we learned from talking to my friend"},
 {'generated_text': "Hello, I'm a language model, not a command line tool.\n\nIf my code is simple enough:\n\nif (use (string"},
 {'generated_text': "Hello, I'm a language model, I've been using Language in all my work. Just a small example, let's see a simplified example."}]

In [33]:
from transformers import GPT2Tokenizer, GPT2Model,GPT2LMHeadModel
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2Model.from_pretrained('gpt2')
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)




In [34]:
# print(output)

In [73]:
prompt = """
You are a witty and sarcastic agent who replies to angry texts with a 4-line funny poem. Here's how you respond:

Examples:
1. Text: "I can't believe it's raining again!"
   Poem: "Oh rain, you seem to love my way, / You show up every single day. / But I'll outsmart you, just you see, / With my giant umbrella, I'll be free!"

2. Text: "My car broke down in the middle of nowhere."
   Poem: "Your car may have quit, it's true, / But now you’ve got a scenic view! / Just sit back, relax, unwind, / Adventure waits, let’s clear your mind."

Now, given the following text, generate a funny 4-line poem in response:

Text: "{user_input}"
Poem:
"""


In [74]:
user_text = input("Enter your text: ")

# Insert user input into the prompt
prompt = prompt.replace("{user_input}", user_text)

Enter your text: I lost my keys


In [75]:
print(prompt)


You are a witty and sarcastic agent who replies to angry texts with a 4-line funny poem. Here's how you respond:

Examples:
1. Text: "I can't believe it's raining again!"
   Poem: "Oh rain, you seem to love my way, / You show up every single day. / But I'll outsmart you, just you see, / With my giant umbrella, I'll be free!"

2. Text: "My car broke down in the middle of nowhere."
   Poem: "Your car may have quit, it's true, / But now you’ve got a scenic view! / Just sit back, relax, unwind, / Adventure waits, let’s clear your mind."

Now, given the following text, generate a funny 4-line poem in response:

Text: "I lost my keys"
Poem:



In [76]:
input_ids = tokenizer.encode(prompt, return_tensors = 'pt')

In [77]:
text_gen_model =  GPT2LMHeadModel.from_pretrained('gpt2')

In [78]:
outputs = text_gen_model.generate(input_ids, max_length = 300, num_return_sequences = 1, temperature = 0.7)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [79]:
generated_poem = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_poem)


You are a witty and sarcastic agent who replies to angry texts with a 4-line funny poem. Here's how you respond:

Examples:
1. Text: "I can't believe it's raining again!"
   Poem: "Oh rain, you seem to love my way, / You show up every single day. / But I'll outsmart you, just you see, / With my giant umbrella, I'll be free!"

2. Text: "My car broke down in the middle of nowhere."
   Poem: "Your car may have quit, it's true, / But now you’ve got a scenic view! / Just sit back, relax, unwind, / Adventure waits, let’s clear your mind."

Now, given the following text, generate a funny 4-line poem in response:

Text: "I lost my keys"
Poem:

3. Text: "I'm so sorry, I'm so sorry, / I'm so sorry, / I'm so sorry, / I'm so sorry, / I'm so sorry, / I'm so sorry, / I'm so sorry, / I'm so sorry, / I'm so sorry, / I'm so sorry, / I'm so sorry, / I'm so sorry, / I'm so sorry, / I'm so sorry, / I'm so sorry, / I'm so


In [82]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load pre-trained GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Define the improved prompt
prompt_template = """
You are a witty and sarcastic agent who replies to angry texts with a 4-line funny poem. Here's how you respond:

Examples:
1. Text: "I can't believe it's raining again!"
   Poem: "Oh rain, you seem to love my way, / You show up every single day. / But I'll outsmart you, just you see, / With my giant umbrella, I'll be free!"

2. Text: "My car broke down in the middle of nowhere."
   Poem: "Your car may have quit, it's true, / But now you’ve got a scenic view! / Just sit back, relax, unwind, / Adventure waits, let’s clear your mind."

Now, given the following text, generate a funny 4-line poem in response:

Text: "{user_input}"
Poem:
"""

# Get user input
user_text = "I lost my keys"  # Replace this with input() for dynamic input

# Insert user input into the prompt
prompt = prompt_template.replace("{user_input}", user_text)

# Tokenize the prompt
input_ids = tokenizer.encode(prompt, return_tensors='pt')

# Generate a response from the model
outputs = model.generate(
    input_ids,
    max_length=300,  # Adjust max length to fit a 4-line poem
    num_return_sequences=1,
    temperature=0.7,
    repetition_penalty=1.2,  # Penalize repetition to reduce looping
    stop_token=None
)

# Decode and print the generated poem
generated_poem = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Extract only the poem part
poem_start = generated_poem.find("Poem:") + len("Poem: ")
generated_poem = generated_poem[poem_start:].strip()
print(generated_poem)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


"Oh rain, you seem to love my way, / You show up every single day. / But I'll outsmart you, just you see, / With my giant umbrella, I'll be free!"

2. Text: "My car broke down in the middle of nowhere."
   Poem: "Your car may have quit, it's true, / But now you’ve got a scenic view! / Just sit back, relax, unwind, / Adventure waits, let’s clear your mind."

Now, given the following text, generate a funny 4-line poem in response:

Text: "I lost my keys"
Poem:
 (This is not an actual joke.)


In [88]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load pre-trained GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')  # Using a larger model
model = GPT2LMHeadModel.from_pretrained('gpt2-medium')

# Set the pad token id to the EOS token id to avoid warnings
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id




Examples:
1. Text: "I can't believe it's raining again!"
   Poem: "Oh rain, you seem to love my way, / You show up every single day. / But I'll outsmart you, just you see, / With my giant umbrella, I'll be free!"

Final poem:

Examples:
1. Text: "I can't believe it's raining again!"
   Poem: "Oh rain, you seem to love my way, / You show up every single day. / But I'll outsmart you, just you see, / With my giant umbrella, I'll be free!"


In [90]:
# Define a more constrained prompt
prompt_template = """
You are a witty and sarcastic agent who replies to angry texts with a 4-line funny poem. Here's how you respond:

Examples:
1. Text: "I can't believe it's raining again!"
   Poem: "Oh rain, you seem to love my way, / You show up every single day. / But I'll outsmart you, just you see, / With my giant umbrella, I'll be free!"

2. Text: "My car broke down in the middle of nowhere."
   Poem: "Your car may have quit, it's true, / But now you’ve got a scenic view! / Just sit back, relax, unwind, / Adventure waits, let’s clear your mind."

Now, given the following text, generate a funny 4-line poem in response:

Text: "{user_input}"
Poem:
"""



In [94]:
user_text = input("Enter your text:")
prompt = prompt_template.replace("{user_input}", str(user_text))
print(prompt)

Enter your text:I lost my way to home

You are a witty and sarcastic agent who replies to angry texts with a 4-line funny poem. Here's how you respond:

Examples:
1. Text: "I can't believe it's raining again!"
   Poem: "Oh rain, you seem to love my way, / You show up every single day. / But I'll outsmart you, just you see, / With my giant umbrella, I'll be free!"

2. Text: "My car broke down in the middle of nowhere."
   Poem: "Your car may have quit, it's true, / But now you’ve got a scenic view! / Just sit back, relax, unwind, / Adventure waits, let’s clear your mind."

Now, given the following text, generate a funny 4-line poem in response:

Text: "I lost my way to home"
Poem:



In [99]:
input_ids = tokenizer.encode(prompt, return_tensors = 'pt')
outputs = model.generate(input_ids, max_length = input_ids.shape[1]+100,
                         num_return_sequences = 1, temperature = 0.9, top_k = 50,
                         top_p = 0.95,
                         repetition_penalty = 1.2,
                         do_sample = True,
                         pad_token_id = tokenizer.eos_token_id,
                         eos_token_id = tokenizer.eos_token_id,
                         attention_mask = torch.ones_like(input_ids)
                         )

In [102]:
# Decode and print the generated poem
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(generated_text)


You are a witty and sarcastic agent who replies to angry texts with a 4-line funny poem. Here's how you respond:

Examples:
1. Text: "I can't believe it's raining again!"
   Poem: "Oh rain, you seem to love my way, / You show up every single day. / But I'll outsmart you, just you see, / With my giant umbrella, I'll be free!"

2. Text: "My car broke down in the middle of nowhere."
   Poem: "Your car may have quit, it's true, / But now you’ve got a scenic view! / Just sit back, relax, unwind, / Adventure waits, let’s clear your mind."

Now, given the following text, generate a funny 4-line poem in response:

Text: "I lost my way to home"
Poem:
 3) A joke about things that aren�t real has occurred recently. If someone asks where do not mess or something related has happened, write this message using #4 above as well:


In [101]:
# Extract only the poem part
poem_lines = generated_text.split('\n')[2:6]  # Get lines 3-6 (the 4 lines of the poem)
generated_poem = '\n'.join(poem_lines)
print(generated_poem)

# Optional: Verify and adjust the output
def ensure_four_lines(poem):
    lines = poem.split('\n')
    if len(lines) > 4:
        return '\n'.join(lines[:4])
    elif len(lines) < 4:
        return poem + '\n' * (4 - len(lines))
    return poem

final_poem = ensure_four_lines(generated_poem)
print("\nFinal poem:")
print(final_poem)


Examples:
1. Text: "I can't believe it's raining again!"
   Poem: "Oh rain, you seem to love my way, / You show up every single day. / But I'll outsmart you, just you see, / With my giant umbrella, I'll be free!"

Final poem:

Examples:
1. Text: "I can't believe it's raining again!"
   Poem: "Oh rain, you seem to love my way, / You show up every single day. / But I'll outsmart you, just you see, / With my giant umbrella, I'll be free!"
