In [8]:
  from google.colab import files
  uploaded = files.upload()  # Upload quotes_plain.txt

Saving quotes_sample.json to quotes_sample.json


In [16]:
import os
from transformers import GPT2Tokenizer, GPT2LMHeadModel, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments
import torch

In [17]:
data_path = 'quotes_sample.json'
model_output_dir = 'gpt2-finetuned'

In [18]:
epochs = 3
batch_size = 2
block_size = 64
learning_rate = 5e-5


In [19]:
# 1. Load GPT-2 tokenizer and model
print('Loading GPT-2 tokenizer and model...')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

Loading GPT-2 tokenizer and model...


In [20]:
# 2. Prepare dataset
def load_dataset(file_path, tokenizer, block_size=block_size):
    return TextDataset(
        tokenizer=tokenizer,
        file_path=file_path,
        block_size=block_size
    )

dataset = load_dataset(data_path, tokenizer)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)



In [21]:
# 3. Set up training arguments
training_args = TrainingArguments(
    output_dir=model_output_dir,
    overwrite_output_dir=True,
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    save_steps=500,
    save_total_limit=2,
    prediction_loss_only=True,
    learning_rate=learning_rate,
    logging_steps=100
)

In [22]:
# 4. Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=dataset
)

In [24]:
# 5. Train
print('Starting training...')
trainer.train()

Starting training...


Step,Training Loss
100,0.8563
200,0.7313
300,0.5948


TrainOutput(global_step=327, training_loss=0.7198979438991722, metrics={'train_runtime': 58.5342, 'train_samples_per_second': 11.173, 'train_steps_per_second': 5.586, 'total_flos': 21360623616000.0, 'train_loss': 0.7198979438991722, 'epoch': 3.0})

In [25]:
# 6. Save the model
print(f'Saving model to {model_output_dir}...')
trainer.save_model(model_output_dir)
tokenizer.save_pretrained(model_output_dir)
print('Training complete!')

Saving model to gpt2-finetuned...
Training complete!


In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load your fine-tuned model (make sure gpt2-finetuned is in your Colab working directory)
model_dir = 'gpt2-finetuned'
tokenizer = GPT2Tokenizer.from_pretrained(model_dir)
model = GPT2LMHeadModel.from_pretrained(model_dir)

def generate_quote(prompt=""):
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    attention_mask = torch.ones(input_ids.shape, device=input_ids.device) # Create attention mask
    output = model.generate(
        input_ids,
        attention_mask=attention_mask, # Pass attention mask
        max_length=50,
        num_return_sequences=1,
        do_sample=True,
        temperature=0.9,
        top_p=0.95,
        top_k=50,
        pad_token_id=tokenizer.eos_token_id
    )
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Example usage
print(generate_quote("Believe in yourself."))

In [None]:
!pip install gradio --quiet

import gradio as gr

def gradio_generate(prompt):
    return generate_quote(prompt)

iface = gr.Interface(
    fn=gradio_generate,
    inputs=gr.Textbox(lines=2, placeholder="Enter a prompt or leave blank..."),
    outputs="text",
    title="Motivational Quote Generator"
)

iface.launch(share=True)