In [2]:
import os

os.chdir('..')

In [4]:
os.getcwd()

'/media/amritesh/bytesviewhdd1/ai_stuf/netomi/Netomi-Customer-Service-Optimization'

In [5]:
TOPICS = [
    "delivery_status",
    "change_shipping_address",
    "order_modification",
    "refund_compensation",
    "invoice_billing",
    "account_creation",
    "account_update",
    "account_deletion",
    "newsletter_subscription",
    "reviews_feedback",
    "other"
]


In [6]:
from openai import OpenAI
import json
import pandas as pd
import re
from dotenv import load_dotenv
load_dotenv()

client = OpenAI()


In [None]:
def classify_topic_llm(query: str) -> str:
    system_prompt = (
        "You are an intent classification system for an e-commerce customer support chatbot.\n"
        "Classify the customer query into ONE of the predefined topics.\n"
        "Return ONLY valid JSON.\n"
        f"Allowed topics: {TOPICS}"
    )

    user_prompt = f"""
Customer query:
\"{query}\"

Return response in JSON:
{{
  "topic": "<one_of_allowed_topics>"
}}
"""

    response = client.chat.completions.create(
        model="gpt-4.1-nano",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0
    )

    return json.loads(response.choices[0].message.content)["topic"]


In [8]:
ENTITY_SCHEMA = {
    "order_id": "string or null",
    "invoice_id": "string or null",
    "account_type": "string or null",
    "account_category": "string or null",
    "shipping_address": {
        "street": "string or null",
        "city": "string or null",
        "pincode": "string or null",
        "country": "string or null"
    },
    "refund_reason": "string or null",
    "payment_method": "string or null",
    "email": "string or null"
}


In [9]:
def extract_ids_regex(text: str):
    return {
        "order_id": re.search(r"\b\d{5,}\b", text),
        "invoice_id": re.search(r"\bINV[-]?\d+\b", text)
    }


In [None]:
def extract_entities_llm(query: str) -> dict:
    system_prompt = (
        "You extract structured entities from customer support queries.\n"
        "Only extract information explicitly present.\n"
        "Return valid JSON following the given schema.\n"
        "If an entity is missing, return null."
    )

    user_prompt = f"""
Extract entities from the query below.

Query:
\"{query}\"

Entity schema:
{json.dumps(ENTITY_SCHEMA, indent=2)}

Return JSON only.
"""

    response = client.chat.completions.create(
        model="gpt-4.1-nano",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0
    )

    return json.loads(response.choices[0].message.content)


In [11]:
queries_df = pd.read_csv("data/SkyRocket Data_GenAI - Queries.csv")

results = []

for q in queries_df["Queries"].head(200):  # limit for demo
    topic = classify_topic_llm(q)
    entities = extract_entities_llm(q)

    results.append({
        "query": q,
        "topic": topic,
        "entities": entities
    })

enriched_queries_llm = pd.DataFrame(results)
enriched_queries_llm.to_json(
    "data/SkyRocket_Queries_LLM_Enriched.json",
    orient="records",
    indent=2
)
