In [33]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

text = """Once upon a time, there was a little girl named Red Riding Hood. She loved to visit her grandmother, who lived in the woods. One day, her mother asked her to take a basket of goodies to her grandmother. On her way through the woods, she met a big bad wolf who wanted to eat her."""

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

tokenizer.pad_token = tokenizer.eos_token

tokens = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)

input_ids = tokens['input_ids']

print(f"Tokenized input ids: {input_ids}")

model = GPT2LMHeadModel.from_pretrained('gpt2')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

output = model(input_ids.to(device), labels=input_ids.to(device))
loss = output.loss
logits = output.logits

print(f"Loss: {loss.item()}")




Tokenized input ids: tensor([[ 7454,  2402,   257,   640,    11,   612,   373,   257,  1310,  2576,
          3706,  2297, 36032, 17233,    13,  1375,  6151,   284,  3187,   607,
         18410,    11,   508,  5615,   287,   262, 16479,    13,  1881,  1110,
            11,   607,  2802,  1965,   607,   284,  1011,   257,  7988,   286,
         39863,   284,   607, 18410,    13,  1550,   607,   835,   832,   262,
         16479,    11,   673,  1138,   257,  1263,  2089, 17481,   508,  2227,
           284,  4483,   607,    13]])
Loss: 2.5232996940612793


In [None]:
# Define the Transformer model architecture
inputs = tf.keras.Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x, training=True)  # Pass `training=True` for training phase

# Modify the output layer
x = Dense(vocab_size, activation="softmax")(x)
x = tf.keras.layers.GlobalAveragePooling1D()(x)  # Average across the sequence dimension

model = tf.keras.Model(inputs=inputs, outputs=x)
model.compile(optimizer=Adam(learning_rate=0.001), loss="sparse_categorical_crossentropy", metrics=["accuracy"])


In [None]:
# Training on different epochs
epochs_list = [20, 60, 70]
for epochs in epochs_list:
    print(f"\nTraining for {epochs} epochs")
    model.fit(input_sequences, output_words, epochs=epochs, batch_size=32)



Training for 20 epochs
Epoch 1/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - accuracy: 0.1182 - loss: 3.8374
Epoch 2/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.1286 - loss: 3.5047
Epoch 3/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.1495 - loss: 3.3946
Epoch 4/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.1535 - loss: 3.4047
Epoch 5/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.1495 - loss: 3.3305
Epoch 6/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.1390 - loss: 3.3515
Epoch 7/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.1390 - loss: 3.3368
Epoch 8/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.1286 - loss: 3.3476
Epoch 9/20
[1m2/2[0m [32m━━━━━━━━━━━━

In [47]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

text = """Once upon a time, there was a little girl named Red Riding Hood. She loved to visit her grandmother, who lived in the woods. One day, her mother asked her to take a basket of goodies to her grandmother. On her way through the woods, she met a big bad wolf who wanted to eat her."""

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token

tokens = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)

input_ids = tokens['input_ids']
attention_mask = tokens.get('attention_mask', torch.ones_like(input_ids))

print(f"Tokenized input ids: {input_ids}")

model = GPT2LMHeadModel.from_pretrained('gpt2')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

output = model(input_ids.to(device), attention_mask=attention_mask.to(device), labels=input_ids.to(device))
loss = output.loss
logits = output.logits

print(f"Loss: {loss.item()}")

model.eval()
generated_text = model.generate(
    input_ids.to(device),
    attention_mask=attention_mask.to(device),
    max_length=100,
    do_sample=True,
    top_k=50,
    top_p=0.92,
    temperature=0.7,
    num_return_sequences=1,
    pad_token_id=tokenizer.eos_token_id
)

generated_text = tokenizer.decode(generated_text[0], skip_special_tokens=True)
print(f"Generated Text: {generated_text}\n")

Tokenized input ids: tensor([[ 7454,  2402,   257,   640,    11,   612,   373,   257,  1310,  2576,
          3706,  2297, 36032, 17233,    13,  1375,  6151,   284,  3187,   607,
         18410,    11,   508,  5615,   287,   262, 16479,    13,  1881,  1110,
            11,   607,  2802,  1965,   607,   284,  1011,   257,  7988,   286,
         39863,   284,   607, 18410,    13,  1550,   607,   835,   832,   262,
         16479,    11,   673,  1138,   257,  1263,  2089, 17481,   508,  2227,
           284,  4483,   607,    13]])
Loss: 2.5232996940612793
Generated Text: Once upon a time, there was a little girl named Red Riding Hood. She loved to visit her grandmother, who lived in the woods. One day, her mother asked her to take a basket of goodies to her grandmother. On her way through the woods, she met a big bad wolf who wanted to eat her. They fought and then fell into a deep, dark river. When the wolf turned around and killed the girl, Red Riding Hood jumped into the river. He grab