In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import re

In [12]:
# Load the dataset
file_path = './data/Bitext_Sample_Customer_Service_Training_Dataset.csv'
data = pd.read_csv(file_path)

# Display the first few rows
data.head()

Unnamed: 0,flags,utterance,category,intent
0,BM,I have problems with canceling an order,ORDER,cancel_order
1,BIM,how can I find information about canceling ord...,ORDER,cancel_order
2,B,I need help with canceling the last order,ORDER,cancel_order
3,BIP,could you help me cancelling the last order I ...,ORDER,cancel_order
4,B,problem with cancelling an order I made,ORDER,cancel_order


# Data Preprocessing

In [16]:
# Clean the text data
def clean_text(text):
    text = text.lower()  # convert to lowercase
    text = re.sub(r"[^\w\s]", "", text)  # remove punctuation
    return text

data["utterance"] = data["utterance"].apply(clean_text)

# Encode labels (intent column)
label_encoder = LabelEncoder()
data["intent_label"] = label_encoder.fit_transform(data["intent"])

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    data["utterance"], data["intent_label"], test_size=0.2, random_state=42
)

# Convert text to TF-IDF features
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)



#  Model Training

In [17]:
# Initialize and train the model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)

# Model Evaluation

In [18]:
# Predict on the test set
y_pred = model.predict(X_test_tfidf)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.2f}")

# Show classification report
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

Test Accuracy: 0.99
                          precision    recall  f1-score   support

            cancel_order       0.98      0.98      0.98        62
            change_order       1.00      1.00      1.00        70
 change_shipping_address       1.00      0.98      0.99        60
  check_cancellation_fee       0.99      1.00      0.99        66
           check_invoice       1.00      1.00      1.00        63
   check_payment_methods       1.00      0.96      0.98        68
     check_refund_policy       0.98      1.00      0.99        59
               complaint       1.00      1.00      1.00        52
contact_customer_service       0.98      1.00      0.99        61
     contact_human_agent       1.00      1.00      1.00        57
          create_account       0.98      0.95      0.97        62
          delete_account       1.00      1.00      1.00        53
        delivery_options       0.96      1.00      0.98        55
         delivery_period       1.00      1.00      1.00

# Sample Query

In [20]:
# Function to predict intent for a sample query
def predict_intent(query):
    # Preprocess the input query
    query_cleaned = clean_text(query)
    query_tfidf = vectorizer.transform([query_cleaned])
    
    # Predict intent
    intent_pred = model.predict(query_tfidf)
    intent_label = label_encoder.inverse_transform(intent_pred)[0]
    
    # Define responses based on predicted intent
    responses = {
        "cancel_order": "Your order has been canceled.",
        "track_order": "Your order is on the way and will arrive soon.",
        "contact_customer_service": "You can contact our customer service team at any time.",
        # Add more intents with appropriate responses as needed
    }
    
    return responses.get(intent_label, "I'm sorry, I didn't understand that.")

# Test the model with a sample query
sample_query = "Can I cancel my order?"
predicted_response = predict_intent(sample_query)
print(f"Query: {sample_query}")
print(f"Response: {predicted_response}")


Query: Can I cancel my order?
Response: Your order has been canceled.
