In [None]:

!pip install transformers[torch] accelerate datasets pandas

# Import necessary libraries
import pandas as pd
from datasets import Dataset
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments

# Load the dataset
df = pd.read_csv('personality.csv')

# Combine Persona and chat columns for training
df['text'] = 'Persona: ' + df['Persona'].astype(str) + ' Chat: ' + df['chat'].astype(str)

# Create a Hugging Face Dataset
dataset = Dataset.from_pandas(df[['text']])

# Load the GPT2 tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Set the pad_token to be the same as eos_token
tokenizer.pad_token = tokenizer.eos_token

# Tokenization function
def tokenize_function(examples):
    tokens = tokenizer(examples['text'], padding="max_length", truncation=True, max_length=512)
    tokens['labels'] = tokens['input_ids'].copy()
    return tokens

# Apply the tokenization function
tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Load the pre-trained GPT-2 model
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Set the pad_token_id
model.config.pad_token_id = tokenizer.pad_token_id

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    save_steps=10_000,
    save_total_limit=2,
)

# Create a Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets,
)

# Train the model
trainer.train()

# Save the fine-tuned model and tokenizer
model.save_pretrained('./fine_tuned_model')
tokenizer.save_pretrained('./fine_tuned_model')

# Create an interface to interact with the chatbot
from transformers import pipeline, GPT2LMHeadModel, GPT2Tokenizer

# Load the fine-tuned model and tokenizer
model = GPT2LMHeadModel.from_pretrained('./fine_tuned_model')
tokenizer = GPT2Tokenizer.from_pretrained('./fine_tuned_model')

# Create a text generation pipeline
chatbot = pipeline('text-generation', model=model, tokenizer=tokenizer)

def chat_with_bot():
    print("Welcome to the chatbot! Type 'exit' to end the conversation.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Bot: Goodbye!")
            break
        # Generate a response
        response = chatbot("Persona: Your persona here. Chat: " + user_input, max_length=150, num_return_sequences=1)
        print("Bot:", response[0]['generated_text'])

if __name__ == "__main__":
    chat_with_bot()
