<a href="https://colab.research.google.com/github/Madhu2s6361/Creating-an-AI-Powered-Chatbot-for-Customer-Support-using-Natural-Language-Processing/blob/main/Project_3_Creating_an_AI_Powered_Chatbot_for_Customer_Support_using_Natural_Language_Processing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Creating an AI Powered Chatbot for Customer Support using Natural Language Processing

In [27]:
# Install required packages
!pip install -q scikit-learn nltk joblib gradio

In [28]:
# Create intents.json (corrected responses & a few extra patterns for robustness)
import json

intents = {
    "intents": [
        {
            "tag": "greeting",
            "patterns": ["hi", "hello", "hey", "good morning", "good evening", "hey there"],
            "responses": ["Hello! How can I help you today?", "Hi there — what can I do for you?"]
        },
        {
            "tag": "goodbye",
            "patterns": ["bye", "goodbye", "see you", "talk to you later"],
            "responses": ["Goodbye! Have a nice day.", "See you later — reach out if you need anything else."]
        },
        {
            "tag": "thanks",
            "patterns": ["thanks", "thank you", "thx", "thank you very much"],
            "responses": ["You're welcome!", "Happy to help!"]
        },
        {
            "tag": "order_status",
            "patterns": [
                "where is my order", "track my order", "order status",
                "track order", "order tracking", "what's the status of my order",
                "please share my order status", "where’s my package", "where is my package",
                "share order id", "please share order id"
            ],
            "responses": [
                "Please share your order ID and I’ll look it up.",
                "Sure — please share your order ID so I can check the status."
            ]
        },
        {
            "tag": "product_info",
            "patterns": [
                "tell me about product", "product info", "what is this product",
                "details about product", "product specifications", "product details"
            ],
            "responses": [
                "Which product would you like information about? Please share the product name or SKU.",
                "Tell me the product name and I'll provide details."
            ]
        },
        {
            "tag": "refund_policy",
            "patterns": [
                "refund", "how to return", "return policy",
                "I want a refund", "how do I return a product", "return my order"
            ],
            "responses": [
                "Our refund policy allows returns within 30 days with a receipt. Would you like to start a return?",
                "You can return items within 30 days. Do you want me to create a return request?"
            ]
        },
        {
            "tag": "complaint",
            "patterns": [
                "I have a complaint", "this is broken", "item arrived damaged",
                "not working", "I'm unhappy with my purchase", "damaged item"
            ],
            "responses": [
                "I'm sorry to hear that. Please tell me the order ID and a brief description of the problem.",
                "Apologies for that — can you give me the order ID so we can resolve it?"
            ]
        },
        {
            "tag": "unknown",
            "patterns": [],
            "responses": [
                "Sorry, I didn't understand that. Could you rephrase or ask something else?",
                "I’m not sure I understood. Could you give more details or try different words?"
            ]
        }
    ]
}

with open("intents.json", "w", encoding="utf-8") as f:
    json.dump(intents, f, indent=4, ensure_ascii=False)

print("Saved intents.json (corrected).")


Saved intents.json (corrected).


In [29]:
import nltk

nltk.download("punkt")
nltk.download("punkt_tab")   # prevents LookupError in newer NLTK builds
nltk.download("stopwords")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [30]:
import json
import joblib
import random
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

STOPWORDS = set(stopwords.words("english"))
STEMMER = PorterStemmer()

def preprocess_text(text: str) -> str:
    tokens = word_tokenize(text.lower())
    tokens = [t for t in tokens if t.isalpha() and t not in STOPWORDS]
    stems = [STEMMER.stem(t) for t in tokens]
    return " ".join(stems)

def load_intents(path="intents.json"):
    with open(path, "r", encoding="utf-8") as f:
        data = json.load(f)
    X, y, tag_to_responses = [], [], {}
    for intent in data["intents"]:
        tag = intent["tag"]
        tag_to_responses[tag] = intent.get("responses", [])
        for p in intent.get("patterns", []):
            X.append(preprocess_text(p))
            y.append(tag)
    return X, y, tag_to_responses

print("Loading intents...")
X, y, tag_to_responses = load_intents()
print(f"{len(X)} patterns, {len(set(y))} tags")

pipeline = Pipeline([
    ("tfidf", TfidfVectorizer(ngram_range=(1,2), max_features=2000)),
    ("clf", LogisticRegression(max_iter=300))
])

print("Training...")
pipeline.fit(X, y)

bundle = {"pipeline": pipeline, "tag_to_responses": tag_to_responses}
joblib.dump(bundle, "model.pkl")
print("Model trained and saved to model.pkl")


Loading intents...
43 patterns, 7 tags
Training...
Model trained and saved to model.pkl


In [31]:
# This cell replaces manual interactive testing.
# It loads the model and replies to every pattern found in intents.json,
# printing intent and confidence. This uses exactly what you saved in Cell 2.

import joblib, json, random
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize

# reload same preprocessing (ensures consistency)
STOPWORDS = set(stopwords.words("english"))
STEMMER = PorterStemmer()

def preprocess_text(text: str) -> str:
    tokens = word_tokenize(text.lower())
    tokens = [t for t in tokens if t.isalpha() and t not in STOPWORDS]
    stems = [STEMMER.stem(t) for t in tokens]
    return " ".join(stems)

# load model
bundle = joblib.load("model.pkl")
pipeline = bundle["pipeline"]
tag_to_responses = bundle["tag_to_responses"]

def predict(text, threshold=0.30):
    proc = preprocess_text(text)
    probs = pipeline.predict_proba([proc])[0]
    classes = pipeline.classes_
    best_idx = int(probs.argmax())
    best_prob = float(probs[best_idx])
    best_tag = classes[best_idx]
    if best_prob < threshold:
        return "unknown", best_prob
    return best_tag, best_prob

def bot_reply(message):
    tag, prob = predict(message)
    resp = random.choice(tag_to_responses.get(tag, tag_to_responses.get("unknown", ["Sorry, I didn't understand."])))
    return resp, tag, prob

# Load the intents.json and iterate through each pattern to show bot responses
with open("intents.json", "r", encoding="utf-8") as f:
    data = json.load(f)

print("Automated test: replying to each pattern defined in intents.json\n")
for intent in data["intents"]:
    tag = intent.get("tag", "")
    patterns = intent.get("patterns", [])
    if not patterns:
        continue
    print(f"--- INTENT: {tag} ---")
    for p in patterns:
        resp, predicted_tag, conf = bot_reply(p)
        print(f"User: {p}")
        print(f"Bot: {resp}  (predicted_intent={predicted_tag}, confidence={conf:.2f})")
    print()


Automated test: replying to each pattern defined in intents.json

--- INTENT: greeting ---
User: hi
Bot: I’m not sure I understood. Could you give more details or try different words?  (predicted_intent=unknown, confidence=0.30)
User: hello
Bot: Sorry, I didn't understand that. Could you rephrase or ask something else?  (predicted_intent=unknown, confidence=0.30)
User: hey
Bot: Hello! How can I help you today?  (predicted_intent=greeting, confidence=0.42)
User: good morning
Bot: Hello! How can I help you today?  (predicted_intent=greeting, confidence=0.34)
User: good evening
Bot: Hi there — what can I do for you?  (predicted_intent=greeting, confidence=0.34)
User: hey there
Bot: Hi there — what can I do for you?  (predicted_intent=greeting, confidence=0.42)

--- INTENT: goodbye ---
User: bye
Bot: I’m not sure I understood. Could you give more details or try different words?  (predicted_intent=unknown, confidence=0.23)
User: goodbye
Bot: Sorry, I didn't understand that. Could you rephra

In [32]:
# If you still want a live demo after automated tests, run this cell.
# It provides a small web UI. Remove share=True if you don't want a public link.
import gradio as gr

def respond_gradio(message):
    resp, tag, conf = bot_reply(message)
    return f"{resp}  (intent={tag}, conf={conf:.2f})"

demo = gr.Interface(
    fn=respond_gradio,
    inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
    outputs=gr.Textbox(label="Chatbot response"),
    title="NLP Customer Support Chatbot",
    description="Simple TF-IDF + LogisticRegression intent-based chatbot demo."
)

demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f8b8144d7c9cdb6b35.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


