In [None]:
import pandas as pd


file_path = 'CustomerInteractionData.csv'
data = pd.read_csv(file_path)


data.head()


In [None]:

data['LocationID'].fillna(-1, inplace=True)


median_call_duration = data['CallDurationSeconds'].median()
data['CallDurationSeconds'].fillna(median_call_duration, inplace=True)


data['AgentID'].fillna('Unknown', inplace=True)
data['CustomerID'].fillna('Unknown', inplace=True)


missing_values_after = data.isnull().sum()
missing_values_after


In [None]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments
from transformers import DataCollatorForLanguageModeling
from datasets import Dataset

In [None]:
from sklearn.model_selection import train_test_split  
from datasets import Dataset, DatasetDict
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import re
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, DataCollatorForLanguageModeling


file_path = 'CustomerInteractionData.csv'
data = pd.read_csv(file_path)


nltk.download('stopwords', quiet=True)
nltk.download('punkt', quiet=True)
STOPWORDS = set(stopwords.words('english'))
REGEX = re.compile(r'[^a-zA-Z0-9\s]')

def normalize_text(text):
    text = text.lower()
    text = REGEX.sub('', text)
    tokens = word_tokenize(text)
    tokens = [word for word in tokens if word not in STOPWORDS]
    return ' '.join(tokens)

data['CustomerInteractionRawText'] = data['CustomerInteractionRawText'].apply(normalize_text)


dataset = Dataset.from_pandas(data[['CustomerInteractionRawText']])


train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)


train_dataset = Dataset.from_pandas(train_data[['CustomerInteractionRawText']])
val_dataset = Dataset.from_pandas(val_data[['CustomerInteractionRawText']])

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    return tokenizer(examples['CustomerInteractionRawText'], padding='max_length', truncation=True, max_length=512)

tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
tokenized_val_dataset = val_dataset.map(tokenize_function, batched=True)

dataset_split = tokenized_train_dataset.train_test_split(test_size=0.1)  
dataset_dict = DatasetDict({
    'train': dataset_split['train'],
    'test': dataset_split['test']
})

model = GPT2LMHeadModel.from_pretrained('gpt2')

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

training_args = TrainingArguments(
    output_dir='./results',
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    save_steps=10_000,
    save_total_limit=2,
    evaluation_strategy="epoch",
)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=dataset_dict['train'],
    eval_dataset=dataset_dict['test'], 
)


trainer.train()

In [None]:
eval_results = trainer.evaluate(eval_dataset=tokenized_val_dataset)
print(eval_results)

In [None]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
while(True):
    customer_interaction = input("Enter customer interaction: ")
    if customer_interaction == "exit":
        break
    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    model = GPT2LMHeadModel.from_pretrained('gpt2')

    tokenizer.pad_token_id = tokenizer.eos_token_id

    input_ids = tokenizer.encode(customer_interaction, return_tensors='pt')

    attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=input_ids.device)
    output = model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_length=150,
        num_return_sequences=1,
        temperature=1,
        top_k=50,
        do_sample=True,  
        top_p=0.95  
    )
    response = tokenizer.decode(output[0], skip_special_tokens=True)

    print("Generated Response: ", response)


In [None]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
while(True):
    
    customer_name = input("Enter the customer's name: ")
    customer_interaction_history = input("Enter the customer's interaction history: ")

    customer_data = {
        "name": customer_name,
        "interaction_history": customer_interaction_history
    }
    if customer_name == "exit":
        break

    personalized_input = f"{customer_data['name']} has the following history: {customer_data['interaction_history']}"

    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    model = GPT2LMHeadModel.from_pretrained('gpt2')

    tokenizer.pad_token_id = tokenizer.eos_token_id

    input_ids = tokenizer.encode(personalized_input, return_tensors='pt')

    attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=input_ids.device)
    output = model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_length=150,
        num_return_sequences=1,
        temperature=0.9,  
        top_k=50,
        top_p=0.92,  
        do_sample=True,  
        early_stopping=True  
    )
    personalized_response = tokenizer.decode(output[0], skip_special_tokens=True)

    print("Personalized Response: ", personalized_response)
