In [8]:
from datasets import load_dataset
from sklearn.model_selection import train_test_split

# Load the dataset
dataset = load_dataset("Kaludi/Customer-Support-Responses")

# Combine the query and response into a single text for training
data = [f"Query: {query}\nResponse: {response}" for query, response in zip(dataset['train']['query'], dataset['train']['response'])]

# Split the dataset into training and validation sets
train_data, val_data = train_test_split(data, test_size=0.1)

# Save the processed data to text files
with open('train.txt', 'w') as f:
    for item in train_data:
        f.write("%s\n" % item)

with open('val.txt', 'w') as f:
    for item in val_data:
        f.write("%s\n" % item)


In [14]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from datasets import load_dataset, Dataset

# Load the tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Add a padding token to the tokenizer
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

# Load the datasets from text files
train_dataset = load_dataset('text', data_files={'train': 'train.txt'})
val_dataset = load_dataset('text', data_files={'validation': 'val.txt'})

# Tokenize the datasets
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=128)

tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
tokenized_val_dataset = val_dataset.map(tokenize_function, batched=True)

# Set up data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

# Set up training arguments
training_args = TrainingArguments(
    output_dir='./results',
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    save_steps=10_000,
    save_total_limit=2,
    prediction_loss_only=True,
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=tokenized_train_dataset['train'],
    eval_dataset=tokenized_val_dataset['validation']
)

# Train the model
trainer.train()





Step,Training Loss


TrainOutput(global_step=99, training_loss=1.4870066016611427, metrics={'train_runtime': 540.8764, 'train_samples_per_second': 0.732, 'train_steps_per_second': 0.183, 'total_flos': 25867911168000.0, 'train_loss': 1.4870066016611427, 'epoch': 3.0})

In [16]:
def generate_response(query, model, tokenizer, max_length=50):
    input_text = f"Query: {query}\nResponse:"
    input_ids = tokenizer.encode(input_text, return_tensors='pt')
    attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
    output = model.generate(input_ids, attention_mask=attention_mask, max_length=max_length, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response

# Example usage
query = "How can I reset my password?"
response = generate_response(query, model, tokenizer)
print(response)



Query: How can I reset my password?
Response: We'd be happy to help. Can you please provide your email address so we can send you instructions on how to reset your password?

We'd be happy to help. Can you


In [17]:
sample_queries = [
    "How can I reset my password?",
    "What is the status of my order?",
    "Can I return a product after 30 days?"
]

for query in sample_queries:
    response = generate_response(query, model, tokenizer)
    print(f"Query: {query}")
    print(f"Response: {response}")
    print("\n")


Query: How can I reset my password?
Response: Query: How can I reset my password?
Response: We'll notify you of any changes. Can you please provide your email address so we can send you instructions on how to reset your password? Can you please provide your email address so we can


Query: What is the status of my order?
Response: Query: What is the status of my order?
Response: We apologize for the inconvenience. Can you please provide your email address so we can check if there's a problem? Can you please provide your email address so we can check if there's


Query: Can I return a product after 30 days?
Response: Query: Can I return a product after 30 days?
Response: We can return an item for a refund or exchange. Can you please provide the product name or SKU and the product name or SKU you'd like to return it to?


