In [1]:
import pandas as pd
import random

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

### Generate population prompts, responses and ideal responses

In [2]:
df = pd.read_csv("/Users/leiyang/competitions/opendi/corrected_e_commerce_chatbot_prompts_responses_ideal.csv")
df.head()
prompts = df["User Prompt"].tolist()
responses = df["Response"].tolist()
ideal_responses = df["Ideal Response"].tolist()

In [3]:
prompts

['How do I create an account?',
 'Where can I find electronics on sale?',
 'I need help with my order.',
 'Can I track my shipment?',
 'How do I return a product?',
 'Are there any new user discounts?',
 'What are the payment options available?',
 'How can I update my shipping address?',
 'Can you recommend some gift ideas?',
 'How do I change my password?',
 'Is there a loyalty program?',
 'What are the best-selling books right now?',
 'How do I apply a promo code?',
 'Can you help me find vegan skincare products?',
 'How do I subscribe to the newsletter?',
 "What's the warranty on electronic gadgets?",
 'How do I leave a product review?',
 'Can I buy a gift card?',
 'What are the customer service hours?',
 'How do I delete my account?']

In [4]:
responses

["To create an account, click on the 'Sign Up' button at the top right corner and fill in your details. You'll need to provide your email and create a password.",
 "You can find electronics on sale by clicking on the 'Deals' section. There, choose the 'Electronics' category to see all the discounted items.",
 'Sure, I can help with that. Please provide your order number or the email address used for the order so I can look into it for you.',
 "Yes, you can track your shipment. Once your order is shipped, you'll receive an email with a tracking link. You can also find this link in your order history.",
 "To return a product, go to your order history, select the item, and click 'Return Item'. Follow the instructions to complete the return process.",
 'Yes, as a new user, you get a 10% discount on your first purchase. Use the code WELCOME10 at checkout.',
 'We accept various payment options including credit/debit cards, PayPal, and Apple Pay. You can choose your preferred method at checko

In [5]:
ideal_responses

['To create an account, simply click on the "Create Account" button in the top right corner of the page. You will be prompted to enter your name, email address, and password. Once you have entered this information, click on the "Create Account" button again and you will be logged in to your new account.',
 'To find electronics on sale, you can either browse the "Electronics" category on the main page or use the search bar to search for specific products. Once you have found a product that you are interested in, you can click on the product image to view more information about the product. If the product is on sale, you will see a "Sale" sticker on the product image.',
 'If you need help with your order, you can contact customer service by clicking on the "Contact Us" link in the footer of the page. You can also contact customer service by phone or email.',
 "Absolutely! To track your shipment, go to 'My Orders' in your account and select the order you want to track. You'll see a tracki

In [6]:
len(prompts)

20

### GA

#### Fitness function

In [8]:
# Define a simple tokenizer function
def tokenizer(text):
    return text.split()

# Initialize TF-IDF Vectorizer with the custom tokenizer
tfidf_vectorizer = TfidfVectorizer(tokenizer=tokenizer)

# Fit the vectorizer on the ideal responses and transform the responses
tfidf_vectorizer.fit(ideal_responses)
ideal_response_vectors = tfidf_vectorizer.transform(ideal_responses)
response_vectors = tfidf_vectorizer.transform(responses)

# Fitness function components
def relevance_score(response_vector, ideal_response_vector):
    # Calculate the cosine similarity between response and ideal response vectors
    return cosine_similarity(response_vector, ideal_response_vector)[0][0]

def informativeness_score(response, key_information):
    # Check if key information is present in the response
    return 1 if any(key_word in response for key_word in key_information) else 0

def engagement_score(response):
    # Simple heuristic: check if the response asks a question or provides a call-to-action
    if '?' in response or 'click' in response or 'select' in response or 'visit' in response.lower():
        return 1
    return 0

#list of key information words/phrases
key_information=['account','address']

# Calculate overall fitness for each response
fitness_scores = []
key_information = ['']
for i in range(len(responses)):
    # Calculate each component of fitness
    relevance = relevance_score(response_vectors[i], ideal_response_vectors[i])
    informativeness = informativeness_score(responses[i], key_information)  # Example keyword
    engagement = engagement_score(responses[i])
    
    # Weights for each component (example weights)
    weight1, weight2, weight3 = 0.4, 0.4, 0.2
    overall_fitness = weight1 * relevance + weight2 * informativeness + weight3 * engagement
    
    fitness_scores.append(overall_fitness)

fitness_scores


[0.8178121765022421,
 0.3608270887051476,
 0.47768757761823083,
 0.15235244129820474,
 0.3691896119456559,
 0.2538393907906631,
 0.2355860837655421,
 0.5981838263554095,
 0.38756992451507255,
 0.8691864280354631,
 0.2273922938211036,
 0.14607087263191923,
 0.24299915623978416,
 0.4159332198770767,
 0.6675983678867905,
 0.27982807193249093,
 0.4356410625431848,
 0.259209828436252,
 0.24384264736969818,
 0.6388928488559167]

In [117]:
fitness_scores[1]

0.3608270887051476

#### The other GA operators

In [118]:
def select_parents(population, fitness_values):
    # Roulette wheel selection
    total_fitness = sum(fitness_values)
    selection_probabilities = [f / total_fitness for f in fitness_values]
    return random.choices(population, weights=selection_probabilities, k=len(population) // 2)

In [119]:
def crossover(parents):
    children = []
    for _ in range(len(parents)):
        parent1, parent2 = random.sample(parents, 2)
        child = parent1 if random.random() < 0.5 else parent2
        children.append(child)
    return children

In [120]:
def mutate(children, mutation_rate=0.1):
    for i in range(len(children)):
        if random.random() < mutation_rate:
            children[i] = random.randint(0, num_prompts - 1)

In [121]:
num_prompts = len(prompts)
population_size = 20
num_generations = 50

# # Fitness function 
# def calculate_fitness(solution):
#     # Implement the fitness calculation here
#     return random.uniform(0, 1)  # Placeholder

# Initialize population with random solutions
population = [random.randint(0, num_prompts-1) for _ in range(population_size)]
best_prompts = []

for generation in range(num_generations):
    # Calculate fitness for each solution
#     fitness_values = [calculate_fitness(sol) for sol in population]
    fitness_values = [fitness_scores[sol] for sol in population]

    # Selection
    parents = select_parents(population, fitness_values)

    # Crossover
    children = crossover(parents)

    # Mutation
    mutate(children)

    # Create new generation
    population = children + parents

    # Optional: Print best solution in this generation
    best_sol = population[fitness_values.index(max(fitness_values))]
    print(f"Generation {generation}: Best Prompt - {prompts[best_sol]}")
    best_prompts.append((prompts[best_sol],max(fitness_values)))

# Final best solution
best_overall_sol = population[fitness_values.index(max(fitness_values))]
print(f"Best overall prompt: {prompts[best_overall_sol]}")
best_prompts.sort(key=lambda x: x[1],reverse=True)
top_5_prompts = best_prompts[:5]

Generation 0: Best Prompt - Can I buy a gift card?
Generation 1: Best Prompt - How can I update my shipping address?
Generation 2: Best Prompt - How can I update my shipping address?
Generation 3: Best Prompt - How can I update my shipping address?
Generation 4: Best Prompt - How can I update my shipping address?
Generation 5: Best Prompt - How can I update my shipping address?
Generation 6: Best Prompt - How can I update my shipping address?
Generation 7: Best Prompt - How do I delete my account?
Generation 8: Best Prompt - How can I update my shipping address?
Generation 9: Best Prompt - How can I update my shipping address?
Generation 10: Best Prompt - How can I update my shipping address?
Generation 11: Best Prompt - How can I update my shipping address?
Generation 12: Best Prompt - How can I update my shipping address?
Generation 13: Best Prompt - How do I subscribe to the newsletter?
Generation 14: Best Prompt - How can I update my shipping address?
Generation 15: Best Prompt - H

In [122]:
top_5_prompts

[('How can I update my shipping address?', 0.8691864280354631),
 ('How do I create an account?', 0.8691864280354631),
 ('Can I buy a gift card?', 0.8178121765022421),
 ('How can I update my shipping address?', 0.8178121765022421),
 ('How can I update my shipping address?', 0.8178121765022421)]

In [123]:
best_overall_sol

7

In [124]:
responses[9]

"To change your password, go to account settings and click on 'Change Password'. You'll need to enter your current password and then your new one."

In [125]:
ideal_responses[9]

"To change your password, visit 'Account Settings' and select 'Change Password.' You'll need to enter your current password for verification before setting a new one."

### Analyse the best prompts 

#### Analyse patterns and user intent recognition

In [126]:
fitness_values

[0.5981838263554095,
 0.5981838263554095,
 0.5981838263554095,
 0.8178121765022421,
 0.5981838263554095,
 0.5981838263554095,
 0.5981838263554095,
 0.5981838263554095,
 0.5981838263554095,
 0.5981838263554095,
 0.5981838263554095,
 0.8178121765022421,
 0.5981838263554095,
 0.5981838263554095,
 0.5981838263554095,
 0.5981838263554095,
 0.5981838263554095,
 0.5981838263554095,
 0.5981838263554095,
 0.5981838263554095]

In [129]:
# Example: Identify common themes or keywords in top prompts
from collections import Counter

# Assuming 'top_prompts' is a list of best-performing prompts identified by GA
top_prompts = top_5_prompts
word_counts = Counter(word for prompt, _ in top_prompts for word in prompt.split())

In [136]:
# Identify most common words or themes
common_words = word_counts.most_common(5)
print("Common themes:", common_words)

# Enhance intent recognition based on these themes
def recognize_intent(user_input):
    # Simplified intent recognition logic
    if "password" in user_input:
        return "change_password"
    elif "account" in user_input:
        return "create_account"
    elif "update" in user_input:
        # Additional check to distinguish between different types of updates
        if "address" in user_input:
            return "update_address"
        elif "account" in user_input:
            return "update_account_info"
    # Add more conditions based on common themes
    else:
        return "unknown"


# Test the function
print("Recognized intent:", recognize_intent(top_5_prompts[0][0]))


Common themes: [('I', 5), ('How', 4), ('can', 3), ('update', 3), ('my', 3)]
Recognized intent: update_address


In [135]:
top_5_prompts[0][0]

'How can I update my shipping address?'

#### Enhance Interaction logic

In [139]:
# Example: Prioritize responses based on GA insights and enhanced intent recognition
def generate_response(user_intent):
    if user_intent == "change_password":
        return "To change your password, go to account settings and select 'Change Password'."
    elif user_intent == "create_account":
        return "Creating an account is easy! Just click on 'Sign Up' and follow the instructions."
    elif user_intent == "update_address":
        return "You can update your shipping address in account settings under 'Address Book'."
    elif user_intent == "update_account_info":
        return "To update your account information, navigate to account settings and choose the information you wish to update."
    # Add more responses based on top intents
    else:
        return "Sorry, I didn't understand that. Can you please rephrase your question?"

# Test the function
user_intent = recognize_intent("How can I update my shipping address?")
print("Chatbot response:", generate_response(user_intent))


Chatbot response: You can update your shipping address in account settings under 'Address Book'.


#### Improve question framing

In [140]:
# Example: Frame questions based on user language patterns and intents
def ask_for_details(user_intent):
    if user_intent == "change_password":
        return "Would you like to reset your password, or are you trying to update it?"
    elif user_intent == "create_account":
        return "Do you need help with the steps to create a new account?"
    elif user_intent == "update_address":
        return "Are you looking to add a new address or modify an existing one?"
    elif user_intent == "update_account_info":
        return "What specific account information are you looking to update?"
    else:
        return "Could you provide more details to assist you better?"

# Test the function
user_intent = recognize_intent("I need to change my delivery address")
print("Chatbot question:", ask_for_details(user_intent))


Chatbot question: Could you provide more details to assist you better?


The chatbot asks more specific follow-up questions based on the recognized intent.

#### Optimize user input processing

In [141]:
# Example: Enhanced input processing
def process_input(user_input):
    intent = recognize_intent(user_input)
    response = generate_response(intent)
    return response

# Test the function
print("Processed response:", process_input("How can I update my email address?"))


Processed response: You can update your shipping address in account settings under 'Address Book'.


The chatbot processes user inputs, recognizes their intent, and generates an appropriate response.

#### Structure response effectively

In [142]:
# Example: Structuring responses for clarity and action
def structured_response(user_intent):
    if user_intent == "change_password":
        return {"message": "Click here to reset your password", "action": "open_link", "url": "/reset-password"}
    elif user_intent == "create_account":
        return {"message": "Click here to start creating your account", "action": "open_link", "url": "/signup"}
    elif user_intent == "update_address":
        return {"message": "Click here to update your address", "action": "open_link", "url": "/address-book"}
    elif user_intent == "update_account_info":
        return {"message": "You can update your account info here", "action": "open_link", "url": "/account-settings"}
    else:
        return {"message": "Sorry, I'm not sure how to help with that. Can you try asking in a different way?"}

# Test the function
user_intent = recognize_intent("I forgot my password")
print("Structured response:", structured_response(user_intent))

Structured response: {'message': 'Click here to reset your password', 'action': 'open_link', 'url': '/reset-password'}


Responses are structured with clear messages and actionable steps, like providing links for direct actions.