In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.naive_bayes import MultinomialNB

In [2]:
# Load your dataset 
data = pd.read_csv('Book1.csv')

# Preprocessing
data['query'] = data['query'].str.lower()

In [3]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data['query'], data['intent'], test_size=0.2, random_state=42)

In [4]:
# Feature extraction using TF-IDF
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [5]:
# Choose a classifier
classifier = MultinomialNB()
# Train the classifier
classifier.fit(X_train_tfidf, y_train)

MultinomialNB()

In [6]:
# Make predictions on the testing data
y_pred = classifier.predict(X_test_tfidf)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)


print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.8
Classification Report:
               precision    recall  f1-score   support

          No       0.62      1.00      0.77         5
         Yes       1.00      0.70      0.82        10

    accuracy                           0.80        15
   macro avg       0.81      0.85      0.80        15
weighted avg       0.88      0.80      0.81        15



In [7]:
#Inference Function with "did not understand" Threshold

def predict_intent(input_text, threshold=0.7):
    input_text = input_text.lower()
    input_text = tfidf_vectorizer.transform([input_text])
    probabilities = classifier.predict_proba(input_text)[0]
    
    max_prob = max(probabilities)
    if max_prob < threshold:
        return "I did not inderstand"
        
    intent = classifier.classes_[np.argmax(probabilities)]
    return intent

In [8]:
# Testing the predict_intent function with "yes"
input_text = "yes"
intent = predict_intent(input_text)
print(f"Intent for '{input_text}': {intent}")

Intent for 'yes': Yes


In [9]:
# Testing the predict_intent function with "food"
input_text = "food"
intent = predict_intent(input_text)
print(f"Intent for '{input_text}': {intent}")

Intent for 'food': I did not inderstand


In [10]:
# Testing the predict_intent function with "no"
input_text = "no"
intent = predict_intent(input_text)
print(f"Intent for '{input_text}': {intent}")

Intent for 'no': No


In [11]:
# Testing the predict_intent function with "ye mera desh"
input_text = "ye mera desh"
intent = predict_intent(input_text)
print(f"Intent for '{input_text}': {intent}")

Intent for 'ye mera desh': I did not inderstand


In [13]:
### Create a Pickle file using serialization 
import pickle
pickle_out = open("classifier.pkl","wb")
pickle.dump(classifier, pickle_out)
pickle_out.close()