In [None]:
!pip install ipywidgets

In [None]:
!pip install transformers datasets torch nltk evaluate -q


In [None]:


import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
import nltk
import evaluate
import numpy as np

data = {
    "context": [
        "User B: How are you?\nUser A:",
        "User B: What are you doing today?\nUser A:",
        "User B: Do you like coffee?\nUser A:"
    ],
    "reply": [
        "I'm doing well, thanks for asking!",
        "Just finishing some work and relaxing.",
        "Yes, I love coffee, especially in the morning."
    ]
}

dataset = Dataset.from_dict(data)
dataset = dataset.train_test_split(test_size=0.2)
train_dataset = dataset["train"]
test_dataset = dataset["test"]

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

def tokenize(batch):
    return tokenizer(batch["context"], batch["reply"], truncation=True, padding="max_length", max_length=128)

tokenized_train = train_dataset.map(tokenize, batched=True)
tokenized_test = test_dataset.map(tokenize, batched=True)

model = GPT2LMHeadModel.from_pretrained("gpt2")

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
    save_strategy="no",
    logging_steps=10,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
)

trainer.train()

def generate_reply(context, max_length=50):
    input_ids = tokenizer.encode(context, return_tensors="pt")
    output = model.generate(input_ids, max_length=max_length, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
    return tokenizer.decode(output[0], skip_special_tokens=True)

context_test = "User B: What are your hobbies?\nUser A:"
print("Generated Reply:", generate_reply(context_test))

bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")

predictions = []
references = []

for i in range(len(test_dataset)):
    ctx = test_dataset[i]["context"]
    ref = test_dataset[i]["reply"]
    pred = generate_reply(ctx)
    predictions.append(pred)
    references.append([ref])

bleu_score = bleu.compute(predictions=predictions, references=references)
rouge_score = rouge.compute(predictions=predictions, references=[r[0] for r in references])

print("BLEU Score:", bleu_score["bleu"])
print("ROUGE:", rouge_score)

from math import exp
eval_results = trainer.evaluate()
print(f"Perplexity: {exp(eval_results['eval_loss']):.2f}")