In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
import spacy
import random
from spacy.util import minibatch, compounding
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
import joblib


In [3]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [4]:

df = pd.read_csv('/content/drive/MyDrive/customerchatboat/data.csv')

In [5]:
print(df.head())

   flags                                        instruction category  \
0      B   question about cancelling order {{Order Number}}    ORDER   
1    BQZ  i have a question about cancelling oorder {{Or...    ORDER   
2   BLQZ    i need help cancelling puchase {{Order Number}}    ORDER   
3     BL         I need to cancel purchase {{Order Number}}    ORDER   
4  BCELN  I cannot afford this order, cancel purchase {{...    ORDER   

         intent                                           response  
0  cancel_order  I've understood you have a question regarding ...  
1  cancel_order  I've been informed that you have a question ab...  
2  cancel_order  I can sense that you're seeking assistance wit...  
3  cancel_order  I understood that you need assistance with can...  
4  cancel_order  I'm sensitive to the fact that you're facing f...  


In [6]:


# Split the data into features and labels for intent recognition
X_intent = df['instruction']  # Features
y_intent = df['intent']  # Labels

# Split the data into training and testing sets for intent recognition
X_train_intent, X_test_intent, y_train_intent, y_test_intent = train_test_split(X_intent, y_intent, test_size=0.2, random_state=42)

# TF-IDF vectorization for intent recognition
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train_intent)
X_test_tfidf = tfidf_vectorizer.transform(X_test_intent)

# Train a Multinomial Naive Bayes classifier for intent recognition
intent_classifier = MultinomialNB()
intent_classifier.fit(X_train_tfidf, y_train_intent)

# Evaluate the classifier for intent recognition
y_pred_intent = intent_classifier.predict(X_test_tfidf)

# Calculate precision, recall, and F1 score for intent recognition
classification_report_intent = classification_report(y_test_intent, y_pred_intent)
print("Intent Recognition Model Evaluation:")
print(classification_report_intent)

# Load spaCy's NER model
nlp = spacy.load("en_core_web_sm")

# Define a function to extract entities from text
def extract_entities(text):
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

# Extract entities from each instruction
df['entities'] = df['instruction'].apply(extract_entities)

# Define a conversation context dictionary to store slot values
conversation_context = {}

# Define a function to fill slots based on recognized intents and extracted entities
def fill_slots(intent, entities):
    global conversation_context
    if intent == 'cancel_order':
        for entity, label in entities:
            if label == 'Order Number':
                conversation_context['order_number'] = entity
    # Add more logic for other intents and entities as needed

# Specify the file path where you want to save the DataFrame
file_path = '/content/drive/MyDrive/customerchatboat/extraction_results.csv'

# Save the DataFrame with entity extraction results to a new CSV file
df.to_csv(file_path, index=False)

# Display the DataFrame with entity extraction results
print(df[['instruction', 'entities']].head())



Intent Recognition Model Evaluation:
                          precision    recall  f1-score   support

            cancel_order       0.99      1.00      1.00       187
            change_order       1.00      1.00      1.00       187
 change_shipping_address       0.99      0.99      0.99       216
  check_cancellation_fee       1.00      1.00      1.00       199
           check_invoice       0.99      0.99      0.99       192
   check_payment_methods       1.00      1.00      1.00       206
     check_refund_policy       0.98      1.00      0.99       200
               complaint       1.00      1.00      1.00       203
contact_customer_service       1.00      0.99      0.99       208
     contact_human_agent       0.99      1.00      0.99       201
          create_account       1.00      0.98      0.99       217
          delete_account       0.99      0.99      0.99       178
        delivery_options       0.99      1.00      0.99       218
         delivery_period       0.99   

In [8]:
from sklearn.pipeline import Pipeline
import joblib

# Create a pipeline with the TF-IDF vectorizer and the intent classifier
pipeline = Pipeline([
    ('tfidf', tfidf_vectorizer),
    ('classifier', intent_classifier)
])

# Specify the file path where you want to save the model
file_path = '/content/drive/MyDrive/customerchatboat/intent_classification_model.pkl'

# Save the pipeline model
joblib.dump(pipeline, file_path)


['/content/drive/MyDrive/customerchatboat/intent_classification_model.pkl']

In [20]:
import pandas as pd
import joblib  # Assuming you used joblib to save the model

# Load your trained model for entity extraction
model_path = '/content/drive/MyDrive/customerchatboat/intent_classification_model.pkl'
model = joblib.load(model_path)

# Specify the file path for the dataset
dataset_path = '/content/drive/MyDrive/customerchatboat/data.csv'

# Load the dataset'

dataset = pd.read_csv(dataset_path)

# Function to extract entities from text using the model
def extract_entities(text):
    # Replace this placeholder with your actual entity extraction logic using the model
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

# Function to retrieve boat information based on the entity mentioned in the question
def retrieve_boat_information(entities):
    for entity, label in entities:
        if label == 'BOAT_ENTITY':
            boat_info = boat_data[boat_data['BoatName'] == entity]
            if not boat_info.empty:
                return boat_info
    return None

# Function to fetch boat responses based on the intent
def get_boat_response(intent):
    return dataset[dataset['intent'] == intent]['response'].values[0]

while True:
    # Get user input
    user_question = input("Please ask your question: ")

    # Extract entities from the user's question using your model
    entities = extract_entities(user_question)

    # Check if the question mentions any predefined boat entity
    boat_info = retrieve_boat_information(entities)

    # If boat information is found, provide the answer
    if boat_info is not None:
        print("Here is the information about the boat:")
        print(boat_info)
    else:
        # Determine intent based on the model (replace this with your actual intent classification logic)
        intent = model.predict([user_question])[0]

        # Check if the intent is present in the dataset and fetch the boat response
        if intent in dataset['intent'].values:
            boat_response = get_boat_response(intent)
            print(boat_response)
        else:
            print("I'm sorry, but I'm not sure how to respond to that.")

    # Option to stop the loop
    stop = input("any other qns? (y/n): ")
    if stop.lower() != "y":
        break


Please ask your question: cancel order
I've understood you have a question regarding canceling order {{Order Number}}, and I'm here to provide you with the information you need. Please go ahead and ask your question, and I'll do my best to assist you.
any other qns? (y/n): y
Please ask your question: ow could I track the compensation?
I understand your eagerness to stay updated on the status of your reimbursement. It's completely natural to want to know if there have been any recent updates. Let me quickly check for any new information regarding your reimbursement. Please bear with me for a moment while I gather the details. Your patience is greatly appreciated as we work towards ensuring your satisfaction and resolving any concerns you may have.
any other qns? (y/n): n
