In [None]:
import pandas as pd

In [None]:
# Load dataset
data = pd.read_csv("/content/Bitext_Sample_Customer_Support_Training_Dataset_27K_responses-v11.csv")

# Preview
print(data.head())


   flags                                        instruction category  \
0      B   question about cancelling order {{Order Number}}    ORDER   
1    BQZ  i have a question about cancelling oorder {{Or...    ORDER   
2   BLQZ    i need help cancelling puchase {{Order Number}}    ORDER   
3     BL         I need to cancel purchase {{Order Number}}    ORDER   
4  BCELN  I cannot afford this order, cancel purchase {{...    ORDER   

         intent                                           response  
0  cancel_order  I've understood you have a question regarding ...  
1  cancel_order  I've been informed that you have a question ab...  
2  cancel_order  I can sense that you're seeking assistance wit...  
3  cancel_order  I understood that you need assistance with can...  
4  cancel_order  I'm sensitive to the fact that you're facing f...  


In [None]:
data.shape

(26872, 5)

In [None]:
data.columns

Index(['flags', 'instruction', 'category', 'intent', 'response'], dtype='object')

In [None]:
data.describe

In [None]:
data.info

In [None]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
# Note: These import statements execute very quickly. The longer execution time is likely from subsequent data preprocessing steps.

In [None]:
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [None]:
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

In [None]:
def preprocess_text(text):
    text = text.lower()                          # Lowercase
    text = re.sub(r'\d+', '', text)              # Remove numbers
    text = re.sub(r'[^\w\s]', '', text)          # Remove punctuation
    tokens = text.split()
    tokens = [lemmatizer.lemmatize(t) for t in tokens if t not in stop_words]
    return " ".join(tokens)

In [None]:
# Preprocess the 'instruction' column
data['clean_text'] = data['instruction'].apply(preprocess_text)

# Preview results
print(data[['instruction', 'clean_text']].head())

                                         instruction  \
0   question about cancelling order {{Order Number}}   
1  i have a question about cancelling oorder {{Or...   
2    i need help cancelling puchase {{Order Number}}   
3         I need to cancel purchase {{Order Number}}   
4  I cannot afford this order, cancel purchase {{...   

                                         clean_text  
0            question cancelling order order number  
1           question cancelling oorder order number  
2         need help cancelling puchase order number  
3                 need cancel purchase order number  
4  cannot afford order cancel purchase order number  


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [None]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(data['clean_text'], data['intent'], test_size=0.2, random_state=42)


In [None]:
# Vectorize text
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [None]:
# Train classifier
clf = LogisticRegression()
clf.fit(X_train_vec, y_train)

In [None]:
# Evaluate
y_pred = clf.predict(X_test_vec)
print(classification_report(y_test, y_pred))

                          precision    recall  f1-score   support

            cancel_order       1.00      0.96      0.98       187
            change_order       0.93      0.99      0.96       187
 change_shipping_address       0.99      1.00      0.99       216
  check_cancellation_fee       1.00      1.00      1.00       199
           check_invoice       0.96      0.99      0.98       192
   check_payment_methods       1.00      1.00      1.00       206
     check_refund_policy       0.99      0.99      0.99       200
               complaint       1.00      1.00      1.00       203
contact_customer_service       1.00      0.99      0.99       208
     contact_human_agent       0.99      1.00      0.99       201
          create_account       1.00      0.97      0.98       217
          delete_account       0.96      0.99      0.98       178
        delivery_options       1.00      1.00      1.00       218
         delivery_period       0.99      1.00      1.00       171
         

In [None]:
!pip install spacy
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m73.8 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
import spacy

# Load pre-trained English NER model
nlp = spacy.load("en_core_web_sm")

def extract_entities(text):
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

data['entities'] = data['instruction'].apply(extract_entities)
print(data[['instruction', 'entities']].head())

                                         instruction entities
0   question about cancelling order {{Order Number}}       []
1  i have a question about cancelling oorder {{Or...       []
2    i need help cancelling puchase {{Order Number}}       []
3         I need to cancel purchase {{Order Number}}       []
4  I cannot afford this order, cancel purchase {{...       []


In [None]:
import re
def extract_entities_placeholder(text):
    entities = re.findall(r'\{\{.*?\}\}', text)
    return entities if entities else ["NO_ENTITY"]
data['entities'] = data['instruction'].apply(extract_entities_placeholder)
print(data[['instruction', 'entities']].head())

                                         instruction            entities
0   question about cancelling order {{Order Number}}  [{{Order Number}}]
1  i have a question about cancelling oorder {{Or...  [{{Order Number}}]
2    i need help cancelling puchase {{Order Number}}  [{{Order Number}}]
3         I need to cancel purchase {{Order Number}}  [{{Order Number}}]
4  I cannot afford this order, cancel purchase {{...  [{{Order Number}}]


In [None]:
from sentence_transformers import SentenceTransformer, util

# Load pre-trained embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Encode all cleaned queries
embeddings = model.encode(data['clean_text'].tolist(), convert_to_tensor=True)

# Example query
query = "Where is my order?"
query_embedding = model.encode([query], convert_to_tensor=True)  # ✅ wrap in list

# Cosine similarity
cos_scores = util.cos_sim(query_embedding, embeddings)

# Get index of most similar query
top_idx = cos_scores.argmax().item()  # ✅ convert tensor to int

# Print results
print("Most similar query:", data['instruction'].iloc[top_idx])
print("Response:", data['response'].iloc[top_idx])


Most similar query: wanna locate order {{Order Number}} how can i do it
Response: Thank you for reaching out! I'm here to assist you in locating the order with the order number {{Order Number}}. To find the current status of your order, you can visit our website and navigate to the 'Order History' section. There, you should be able to view the details and status of your purchase. If you need any further guidance or have any other questions, please feel free to let me know. I'm here to ensure a seamless experience for you!


In [None]:
def answer_customer(query):
    # Embed query
    query_embedding = model.encode([query], convert_to_tensor=True)
    # Cosine similarity
    cos_scores = util.cos_sim(query_embedding, embeddings)
    top_idx = cos_scores.argmax().item()
    # Return pre-defined response
    return data['response'].iloc[top_idx]

example_query = "I want a refund for order 12345"
print(answer_customer(example_query))



I'm following your eagerness to know when you will receive your refund. Refunds are typically processed within {{Refund Processing Time}}. However, please note that the exact time frame may depend on various factors, such as the payment method used and the specific policies of the company from where the purchase was made. To get a more accurate estimate and stay updated on the status of your refund, I recommend reaching out to our customer support team. They will be able to provide you with the most up-to-date information and assist you further. Rest assured, we are committed to ensuring a smooth refund process for you.


In [None]:
# 7️⃣ Final Mini-Agent
def customer_support_agent(query):
    # 1️⃣ Intent detection
    clean_q = preprocess_text(query)
    intent = clf.predict(vectorizer.transform([clean_q]))[0]

    # 2️⃣ Entities / placeholders
    # Extract placeholders like {{Order Number}} from query
    entities = re.findall(r'\{\{.*?\}\}', query)
    entities = entities if entities else []

    # Extract actual order numbers from query
    order_numbers = re.findall(r'\d+', query)
    order_numbers = order_numbers if order_numbers else []

    # 3️⃣ Response generation using semantic search
    query_embedding = model.encode([preprocess_text(query)], convert_to_tensor=True)
    cos_scores = util.cos_sim(query_embedding, embeddings)
    top_idx = cos_scores.argmax().item()

    # Fetch template response from dataset
    response = data['response'].iloc[top_idx]

    # Replace placeholders dynamically
    if "{{Order Number}}" in response and order_numbers:
        response = response.replace("{{Order Number}}", order_numbers[0])
    if "{{Refund Processing Time}}" in response:
        response = response.replace("{{Refund Processing Time}}", "3-5 business days")  # Example default

    return {
        "intent": intent,
        "entities": entities,
        "order_numbers": order_numbers,
        "response": response
    }

# 8️⃣ Test the agent
query = "My order 98765 is delayed"
result = customer_support_agent(query)
print(result)


{'intent': 'change_order', 'entities': [], 'order_numbers': ['98765'], 'response': 'We genuinely understand your desire to track the estimated arrival time of your order. To provide you with the most accurate information, could you please provide us with the 98765? With that information, we will be able to update you on the estimated delivery date. Your patience and cooperation are greatly appreciated.'}


In [None]:
# Test your agent live in Colab
while True:
    query = input("Customer: ")  # Type your customer query here
    if query.lower() in ["exit", "quit"]:
        print("Exiting...")
        break
    result = customer_support_agent(query)
    print("Agent Response:", result['response'])
    print("Intent:", result['intent'])
    print("Entities:", result['entities'])
    print("Order Numbers:", result['order_numbers'])
    print("-" * 50)


Agent Response: Of course! I'd be happy to provide you with information about canceling orders. Canceling an order can usually be done through your account on our website or by contacting our customer support team. The exact process may vary depending on the platform or service you used to place the order. To cancel an order, follow these general steps:

1. Sign in to Your Account: Access your account on our platform by entering your login credentials.
2. Locate Your Order: Find the order you wish to cancel either in your order history or in the specific section for managing orders.
3. Initiate the Cancellation: Look for the "Cancel" or "Request Cancellation" option associated with the order and click on it.
4. Follow any Prompts or Instructions: Depending on the system, you may need to provide a reason for canceling or confirm your cancellation request.
5. Verify the Cancellation: After completing the cancellation steps, you should receive a confirmation message or email indicating th