In [None]:
import pandas as pd
import random

# Define categories
categories = [
    "order_status", "refund", "technical_support", "cancellation", "payment_issue", 
    "general_query", "subscription", "feedback", "invoice", "shipping"
]

# Sample Products & Descriptions
products = {
    "order_status": [("iPhone 15 Pro", "Latest Apple smartphone"), ("Dell XPS 15", "High-end laptop")],
    "refund": [("Nike Shoes", "Lightweight running shoes"), ("Apple AirPods Pro", "Noise-canceling earbuds")],
    "technical_support": [("MacBook Pro", "Apple's M2-powered laptop"), ("PS5 Console", "Gaming console")],
    "cancellation": [("Amazon Prime", "Streaming & shopping membership"), ("Gym Membership", "Fitness access")],
    "payment_issue": [("Netflix Subscription", "OTT streaming service"), ("Amazon Order", "E-commerce purchase")],
    "general_query": [("Customer Support", "24/7 service assistance"), ("Retail Store", "Physical locations")],
    "subscription": [("Netflix", "Streaming service"), ("Disney+", "OTT with Marvel & Star Wars")],
    "feedback": [("Mobile App", "User-friendly shopping experience"), ("Customer Service", "Support team")],
    "invoice": [("Microsoft 365", "Subscription-based software"), ("AWS Cloud", "Cloud storage solutions")],
    "shipping": [("Courier Service", "Fast nationwide delivery"), ("Express Shipping", "Premium next-day delivery")],
}

# Sample Questions
sample_questions = {
    "order_status": [
        "Where is my order?", "Has my order been shipped?", "Can I track my package?",
        "What is the expected delivery date?", "My order is delayed, what should I do?"
    ],
    "refund": [
        "How do I get a refund?", "I want to return my product.", "Refund status for my order?",
        "How long does a refund take?", "Will I get a full refund if I return the item?"
    ],
    "technical_support": [
        "My device is not working.", "How do I reset my product?", "The software is crashing.",
        "Bluetooth is not connecting, can you help?", "How do I fix a frozen screen?"
    ],
    "cancellation": [
        "Can I cancel my order?", "I need to cancel my subscription.", "How do I stop my service?",
        "Can I pause my subscription instead of canceling?", "How do I cancel automatic payments?"
    ],
    "payment_issue": [
        "Why was I double charged?", "Payment failed but money deducted.", "Do you accept PayPal?",
        "My card was declined, what should I do?", "I was charged extra, how do I fix it?"
    ],
    "general_query": [
        "What are your business hours?", "Do you have a store location?", "Where can I contact support?",
        "How do I create an account?", "What payment methods do you accept?"
    ],
    "subscription": [
        "Can I upgrade my plan?", "How do I cancel my membership?", "What benefits does premium offer?",
        "Is there a student discount?", "Can I share my subscription with others?"
    ],
    "feedback": [
        "I want to submit feedback.", "Where can I leave a review?", "How do I rate your service?",
        "Do you have a feedback survey?", "Can I suggest a new feature?"
    ],
    "invoice": [
        "Can I get a copy of my invoice?", "Where can I download my receipt?", "Billing statement request.",
        "How do I update my billing information?", "Can I get an invoice for past months?"
    ],
    "shipping": [
        "What are your shipping options?", "How long does delivery take?", "Can I change my shipping address?",
        "Do you offer free shipping?", "Can I schedule a delivery time?"
    ],
}

# Sample Responses for Each Category
sample_responses = {
    "order_status": [
        "Your order is on the way! Check tracking for updates.", 
        "We have shipped your order. It should arrive soon.", 
        "Delivery is expected within 3-5 business days."
    ],
    "refund": [
        "Refunds are processed within 7 business days.", 
        "You can request a refund from the order details page.", 
        "Your refund request has been submitted for review."
    ],
    "technical_support": [
        "Please restart your device and try again.", 
        "Ensure your software is updated to the latest version.", 
        "If the issue persists, contact our support team."
    ],
    "cancellation": [
        "Your order has been canceled successfully.", 
        "Subscription cancellation is confirmed.", 
        "To cancel, visit your account settings or contact support."
    ],
    "payment_issue": [
        "Please verify with your bank for failed transactions.", 
        "Try using an alternate payment method.", 
        "Your payment was received successfully."
    ],
    "general_query": [
        "Our support team is available 24/7.", 
        "Visit our website for more details about our services.", 
        "You can reach us via live chat or email."
    ],
    "subscription": [
        "You can upgrade your subscription in the account section.", 
        "Membership cancellation will take effect at the end of the billing cycle.", 
        "Premium offers exclusive content and features."
    ],
    "feedback": [
        "We appreciate your feedback! It helps us improve.", 
        "You can leave a review on our website.", 
        "Thank you for your valuable input!"
    ],
    "invoice": [
        "Invoices can be downloaded from your account.", 
        "We have sent a copy of your invoice to your registered email.", 
        "Billing statements are available under the 'My Orders' section."
    ],
    "shipping": [
        "Standard shipping takes 5-7 days.", 
        "Express shipping is available for faster delivery.", 
        "You can change your shipping address before dispatch."
    ],
}

# Generate dataset with 100K records, including customer info
data = []
for _ in range(100000):  
    category = random.choice(categories)
    question = random.choice(sample_questions[category])
    response = random.choice(sample_responses[category])  
    product, description = random.choice(products[category])
    
    # Fake customer details for personalization
    customer_id = random.randint(1000, 9999)
    customer_name = random.choice(["Alice Johnson", "Bob Smith", "Charlie Brown", "David Lee", "Emily Davis"])
    customer_email = customer_name.lower().replace(" ", ".") + "@example.com"
    customer_satisfaction = random.choice([1, 2, 3, 4, 5])
    resolution = random.choice(["Resolved via call", "Refund issued", "Pending customer response", "Issue escalated"])

    data.append([
        customer_id, customer_name, customer_email, category, product, description, 
        question, response, resolution, customer_satisfaction
    ])

# Convert to DataFrame
df = pd.DataFrame(data, columns=[
    "Customer ID", "Customer Name", "Customer Email", "Category", 
    "Product Name", "Product Description", "Input Text", "Output Text", 
    "Resolution", "Customer Satisfaction Rating"
])

# Save full dataset for frontend
df.to_csv(r"C:\Users\siddh\Downloads\Master Thesis\Chatbot 2\DATA\full_dataset_100k.csv", index=False)

# Save training dataset (Only Input & Output for Model)
df[["Input Text", "Output Text"]].to_csv(r"C:\Users\siddh\Downloads\Master Thesis\Chatbot 2\DATA\train_dataset_100k.csv", index=False)

print("✅ Datasets saved successfully!")
