In [None]:
!pip install transformers datasets accelerate
# accelerate is often needed for Trainer

In [None]:
from datasets import load_dataset
dataset = load_dataset("DialogueCharacter/english_general_instruction_with_reward_score_judged_by_13B_llama2")

In [None]:
from transformers import GPT2Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token # GPT-2 typically uses the EOS token for padding

In [None]:
def format_and_tokenize(examples):
    # Customize this template based on your dataset structure
    formatted_texts = []
    
    # 1. Define loop variables as 'instruction' and 'response'
    for instruction, response in zip(examples['input'], examples['output']):
        
        # 2. ERROR HERE: Changed 'output' to the defined variable 'response'
        text = f"input: {instruction}\noutput: {response}{tokenizer.eos_token}" 
        
        formatted_texts.append(text)

    # Tokenize the batch of formatted texts
    return tokenizer(
        formatted_texts, 
        truncation=True, 
        max_length=512, # Adjust max_length based on your resources and data
        padding='max_length' 
    )

tokenized_datasets = dataset.map(format_and_tokenize, batched=True, remove_columns=['input', 'output', 'reward_score'])

# Set the 'input_ids' as 'labels' for Causal Language Modeling
tokenized_datasets = tokenized_datasets.map(lambda examples: {'labels': examples['input_ids']}, batched=True)

In [None]:
from transformers import GPT2LMHeadModel
model = GPT2LMHeadModel.from_pretrained("gpt2")
# You might want to use a smaller model like 'gpt2-medium' if you run out of GPU memory.

In [None]:
from transformers import TrainingArguments, DataCollatorForLanguageModeling

training_args = TrainingArguments(
    output_dir="./gpt2_instruction_finetuned",
    overwrite_output_dir=True,
    num_train_epochs=3, # Adjust epochs
    per_device_train_batch_size=4, # Adjust batch size based on GPU memory
    save_steps=1000,
    save_total_limit=2,
    prediction_loss_only=True,
    logging_steps=100,
)

# Use DataCollatorForLanguageModeling for Causal Language Modeling
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, 
    mlm=False # Set to False for Causal Language Modeling (GPT-style)
)

In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=tokenized_datasets["train"], # Assumes your dataset has a 'train' split
)

trainer.train()

In [None]:
trainer.save_model("./final_gpt2_model")
tokenizer.save_pretrained("./final_gpt2_model")

In [None]:
from transformers import pipeline

generator = pipeline('text-generation', model='./final_gpt2_model', tokenizer='gpt2', device=0) # use device=0 for GPU

prompt = "Instruction: Write a short, encouraging poem.\nResponse:"
result = generator(prompt, max_length=100, num_return_sequences=1)
print(result[0]['generated_text'])