In [1]:
!pip install pandas


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [5]:
import pandas as pd
import json
import random

# Load the new keywords CSV file
file_path = '../data/formatted/SalesKeywords.csv'  # Replace with the actual path to your CSV file
keywords_df = pd.read_csv(file_path)

# Define multiple response templates and varied instructions for each category
response_templates = {
    "Payment Method": [
        "Could you tell me which payment method you would like to use? We support T/T, L/C, D/P, D/A, MoneyGram, and Credit Card.",
        "What payment option works best for you? We accept T/T, L/C, D/P, D/A, MoneyGram, and Credit Card.",
        "Please let me know your preferred payment method. We offer T/T, L/C, D/P, D/A, MoneyGram, and Credit Card.",
        "Could you share your preferred payment option? We can accept T/T, L/C, D/P, D/A, MoneyGram, and Credit Card.",
        "For payment, would T/T, L/C, D/P, D/A, MoneyGram, or Credit Card work best for you?",
        "To proceed with your order, which payment method suits you best? We have options like T/T, L/C, D/P, D/A, MoneyGram, and Credit Card."
    ],
    "Freight Forwarder": [
        "Could you provide your company and forwarder’s contact information to facilitate shipping?",
        "To ensure timely delivery, may we have your forwarder’s contact information?",
        "Please share the contact details for your freight forwarder to assist with coordination.",
        "To help arrange your shipment, could you provide your forwarder’s contact information?",
        "Could you confirm your forwarder’s details so we can coordinate shipping?",
        "For shipment, could you provide us with your forwarding agent’s contact details?"
    ],
    "Delivery Inquiry": [
        "Could you tell us your preferred delivery date or timing? We’ll align production and shipping accordingly.",
        "When would be an ideal time for delivery? This helps us plan our schedule.",
        "Please share the timing you have in mind for delivery, and we’ll coordinate to meet your needs.",
        "Could you specify when you’d like the items to be delivered? We’ll handle the arrangements.",
        "What timeline works best for you for delivery? Let us know so we can coordinate.",
        "For scheduling, could you let us know the delivery timing you have in mind?"
    ],
    "Specification Inquiry": [
        "The model’s dimensions are [dimensions]. Do you need additional specifications?",
        "This model is sized at [dimensions]. Let us know if you’d like further details.",
        "Would you like more information beyond the dimensions ([dimensions])?",
        "The size of this model is [dimensions]. Are there other specifications you need?",
        "Our model measures [dimensions]. Let us know if you’d like more details.",
        "The dimensions for this model are [dimensions]. Would you like further specifics?"
    ]
}

# Define even more varied instructions and inputs for each category
instruction_templates = {
    "Payment Method": [
        "Ask the customer about their payment method preferences if they mention payment.",
        "Respond with our supported payment methods if the customer asks about payment options.",
        "Guide the customer on available payment methods when they bring up payment.",
        "Mention payment options if the customer expresses interest in payment methods.",
        "Provide payment method details if the customer mentions paying or payment methods."
    ],
    "Freight Forwarder": [
        "Request forwarding contact information if the customer brings up shipping.",
        "Ask for the customer’s freight forwarder details when they inquire about shipping.",
        "When freight forwarding is mentioned, prompt the customer for forwarding details.",
        "Inquire about the freight forwarder if the customer discusses shipping needs.",
        "Request the forwarder’s contact info if the customer talks about arranging freight."
    ],
    "Delivery Inquiry": [
        "Ask for the delivery timing if the customer mentions delivery or timing.",
        "Request delivery preferences when the customer discusses timing or delivery.",
        "Inquire about preferred delivery date if the customer brings up timing.",
        "Ask for delivery details when the customer inquires about delivery timing.",
        "When timing is mentioned, ask for the customer’s expected delivery schedule."
    ],
    "Specification Inquiry": [
        "Provide the model’s dimensions when the customer asks about specifications.",
        "Respond with model size if the customer inquires about product specifications.",
        "Mention product dimensions when the customer requests specifications.",
        "When the customer asks about size, provide model dimensions and ask if they need more.",
        "Offer the dimensions of the model if specifications are requested."
    ]
}

# Generate JSON structure with maximum variety and careful exclusion of specific years
training_data = []

# Iterate through each row in the DataFrame to build diverse training examples
for _, row in keywords_df.iterrows():
    category = row.iloc[0]  # First column is the category
    keywords = ', '.join([str(keyword).strip() for keyword in row[1:] if pd.notna(keyword)])  # Combine all keyword columns
    
    # Retrieve multiple response templates and instruction variations for the current category
    responses = response_templates.get(category, ["Please provide additional details."])
    instructions = instruction_templates.get(category, ["Respond appropriately based on customer inquiry."])
    
    # Create multiple entries by pairing instruction variations with response variations
    for response in responses:
        for instruction in instructions:
            # Randomize inputs to add subtle variability in how the customer query is described
            input_variations = [
                f"The customer mentioned {keywords}.",
                f"The customer asked about {category.lower()}.",
                f"The customer inquired regarding {keywords}.",
                f"The customer brought up {category.lower()} options.",
                f"Keywords such as {keywords} were mentioned by the customer."
            ]
            input_text = random.choice(input_variations)

            training_data.append({
                "instruction": instruction,
                "input": input_text,
                "output": response.replace("[dimensions]", "1960×820×1220 mm")  # Replace placeholder if necessary
            })

# Save to JSON file
json_output_path = '../data/training/sales_terms_training_data.json'
with open(json_output_path, 'w') as f:
    json.dump(training_data, f, indent=4)

print("Training data has been saved to 'generalized_sales_bot_training_data.json'")


Training data has been saved to 'generalized_sales_bot_training_data.json'
