In [None]:
pip install accelerate

Collecting accelerate
  Downloading accelerate-0.27.2-py3-none-any.whl (279 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.27.2


In [None]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from torch.utils.data import Dataset, DataLoader
from transformers import AdamW
from tqdm.auto import tqdm

class CustomDataset(Dataset):
    def __init__(self, file_path, tokenizer, max_length=128):
        self.examples = []
        self.tokenizer = tokenizer
        self.max_length = max_length

        with open(file_path, "r", encoding="utf-8") as file:
            for line in file:
                parts = line.strip().split("\t")
                if len(parts) != 2:
                    print(f"Skipping line: {line.strip()}. Expected format: question<TAB>response")
                    continue

                question, response = parts
                input_text = f"Question: {question} Answer: {response}"

                # Tokenize and pad input
                tokenized_input = self.tokenizer(input_text, max_length=self.max_length, padding="max_length", truncation=True, return_tensors="pt")
                if "input_ids" not in tokenized_input or "attention_mask" not in tokenized_input:
                    print(f"Skipping line: {line.strip()}. Tokenization failed.")
                    continue

                self.examples.append(tokenized_input)

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, idx):
        return self.examples[idx]

# Load pre-trained GPT-2 model and tokenizer
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Set pad_token to eos_token
model = GPT2LMHeadModel.from_pretrained(model_name)


# Load your dataset
train_path = "/content/drive/MyDrive/dialogs.txt"
train_dataset = CustomDataset(train_path, tokenizer)

# Define optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.train()

epochs = 30
for epoch in range(epochs):
    total_loss = 0
    for batch in tqdm(DataLoader(train_dataset, batch_size=8, shuffle=True)):
        optimizer.zero_grad()
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = input_ids.clone()
        labels[labels == tokenizer.pad_token_id] = -100  # Ignore loss on padding tokens
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss}")


# Save the fine-tuned model
model.save_pretrained("/content/drive/MyDrive/gpt2-finetuned_m1")
tokenizer.save_pretrained("/content/drive/MyDrive/gpt2-finetuned1")


In [4]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load pre-trained model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("/content/drive/MyDrive/gpt2-finetuned")
model = GPT2LMHeadModel.from_pretrained("/content/drive/MyDrive/gpt2-finetuned_m")

# Set the model to evaluation mode
model.eval()

def generate_response(question, max_length=40):
    # Encode the user's question to prepare it for the model
    input_ids = tokenizer.encode(question, return_tensors="pt")

    # Generate a response with adjusted parameters for better control
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=max_length,
            num_return_sequences=1,
            num_beams=5,  # Use beam search with 5 beams
            early_stopping=True,  # Enable early stopping
            no_repeat_ngram_size=1,  # Prevent repeating n-grams
            eos_token_id=tokenizer.eos_token_id,  # Specify EOS token for stopping the generation
            pad_token_id=tokenizer.eos_token_id,  # Ensure correct padding token is used
        )

    # Decode the generated response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response




# Start conversation
print("Bot: Hi! I'm a chatbot. You can start by asking me anything.")
while True:
    user_input = input("You: ")
    if user_input.lower() in ['exit', 'quit']:
        print("Bot: Goodbye!")
        break
    response = generate_response(user_input)
    print("Bot:", response)

Bot: Hi! I'm a chatbot. You can start by asking me anything.
You: hi, how are you ?
Bot: hi, how are you? Answer: i've been working a lot lately. what's going on? is something wrong with my computer.? why not just type in the email address and see if it
You: hi, how are you doing?
Bot: hi, how are you doing? Answer: i've been working a lot lately. what's the weather going to be like in about 90 minutes or so.? it depends on your point of view and
You: i know. i think it may rain.
Bot: i know. i think it may rain. Answer: what's the weather going to be like? we'll have hot and sunny days every sunday, except on friday or thirteenth.?
You: exit
Bot: Goodbye!
