In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset
import torch
import tqdm
import json

# Define ChatData class for loading dataset
class ChatData(Dataset):
    def __init__(self, filepath, tokenizer, max_length=128):
        with open(filepath, "r") as f:
            self.data = json.load(f)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        dialog = " ".join([d["text"] for d in item["dialog"]])
        tokenized = self.tokenizer(
            dialog,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
        )
        return tokenized["input_ids"].squeeze(0), tokenized["attention_mask"].squeeze(0)

# Training function
def train(chatData, model, optim, epochs=12):
    model.train()
    for epoch in range(epochs):
        epoch_loss = 0
        for batch in tqdm.tqdm(chatData, desc=f"Epoch {epoch+1}/{epochs}"):
            input_ids, attention_mask = batch
            input_ids, attention_mask = input_ids.to(device), attention_mask.to(device)

            # Zero the optimizer
            optim.zero_grad()

            # Forward pass
            outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
            loss = outputs.loss
            epoch_loss += loss.item()

            # Backward pass
            loss.backward()
            optim.step()

        # Save model state
        torch.save(model.state_dict(), f"model_state_epoch_{epoch+1}.pt")
        print(f"Epoch {epoch+1} Loss: {epoch_loss / len(chatData)}")

# Inference function
def infer(inp):
    inp = "<startofstring> " + inp + " <bot>: "
    inputs = tokenizer(inp, return_tensors="pt").to(device)
    outputs = model.generate(
        inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_length=100,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"

# Tokenizer and model setup
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.add_special_tokens({
    "pad_token": "<pad>",
    "bos_token": "<startofstring>",
    "eos_token": "<endofstring>"
})
tokenizer.add_tokens(["<bot>:"])

model = GPT2LMHeadModel.from_pretrained("gpt2")
model.resize_token_embeddings(len(tokenizer))
model = model.to(device)

# Load data and prepare DataLoader
chat_data_path = "/content/chatbot_dataset.json"  # Update with the correct path
chatData = ChatData(chat_data_path, tokenizer)
chatDataLoader = DataLoader(chatData, batch_size=8, shuffle=True)

# Training setup
optim = Adam(model.parameters(), lr=1e-4)

# Train the model
print("Training the model...")
train(chatDataLoader, model, optim)

# Interactive inference
print("Chat with the bot (type 'exit' to stop):")
model.eval()
while True:
    inp = input("You: ")
    if inp.lower() == "exit":
        break
    response = infer(inp)
    print(f"Bot: {response}")
