<a href="https://colab.research.google.com/github/Madhu2s6361/Creating-an-AI-Powered-Chatbot-for-Customer-Support-using-Natural-Language-Processing/blob/main/Project_3_Creating_an_AI_Powered_Chatbot_for_Customer_Support_using_Natural_Language_Processing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install dependencies
!pip install -q scikit-learn nltk joblib gradio

In [2]:
# Create intents.json
import json

intents = {
    "intents": [
        {
            "tag": "greeting",
            "patterns": ["hi", "hello", "hey", "good morning", "good evening", "hey there"],
            "responses": ["Hello! How can I help you today?", "Hi there — what can I do for you?"]
        },
        {
            "tag": "goodbye",
            "patterns": ["bye", "goodbye", "see you", "talk to you later"],
            "responses": ["Goodbye! Have a nice day.", "See you later — reach out if you need anything else."]
        },
        {
            "tag": "thanks",
            "patterns": ["thanks", "thank you", "thx", "thank you very much"],
            "responses": ["You're welcome!", "Happy to help!"]
        },
        {
            "tag": "order_status",
            "patterns": [
                "where is my order", "track my order", "order status",
                "track order", "order tracking", "what's the status of my order"
            ],
            "responses": [
                "Can you please provide your order ID? I can check the status for you.",
                "Sure — share your order ID and I’ll look it up."
            ]
        },
        {
            "tag": "product_info",
            "patterns": [
                "tell me about product", "product info", "what is this product",
                "details about product", "product specifications"
            ],
            "responses": [
                "Which product would you like information about? Please share the product name or SKU.",
                "Tell me the product name and I'll provide details."
            ]
        },
        {
            "tag": "refund_policy",
            "patterns": [
                "refund", "how to return", "return policy",
                "I want a refund", "how do I return a product"
            ],
            "responses": [
                "Our refund policy allows returns within 30 days with a receipt. Would you like to start a return?",
                "You can return items within 30 days. Do you want me to create a return request?"
            ]
        },
        {
            "tag": "complaint",
            "patterns": [
                "I have a complaint", "this is broken", "item arrived damaged",
                "not working", "I'm unhappy with my purchase"
            ],
            "responses": [
                "I'm sorry to hear that. Please tell me the order ID and a brief description of the problem.",
                "Apologies for that — can you give me the order ID so we can resolve it?"
            ]
        },
        {
            "tag": "unknown",
            "patterns": [],
            "responses": [
                "Sorry, I didn't understand that. Could you rephrase or ask something else?",
                "I’m not sure I understood. Could you give more details or try different words?"
            ]
        }
    ]
}

with open("intents.json", "w", encoding="utf-8") as f:
    json.dump(intents, f, indent=4, ensure_ascii=False)

print("Saved intents.json")

Saved intents.json


In [3]:
import nltk

# Required downloads (including punkt_tab to avoid the LookupError)
nltk.download("punkt")
nltk.download("punkt_tab")   # Fix for newer NLTK layouts
nltk.download("stopwords")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [4]:
import json
import joblib
import random
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

STOPWORDS = set(stopwords.words("english"))
STEMMER = PorterStemmer()

def preprocess_text(text: str) -> str:
    tokens = word_tokenize(text.lower())
    tokens = [t for t in tokens if t.isalpha() and t not in STOPWORDS]
    stems = [STEMMER.stem(t) for t in tokens]
    return " ".join(stems)

def load_intents(path="intents.json"):
    with open(path, "r", encoding="utf-8") as f:
        data = json.load(f)
    X, y, tag_to_responses = [], [], {}
    for intent in data["intents"]:
        tag = intent["tag"]
        tag_to_responses[tag] = intent.get("responses", [])
        for p in intent.get("patterns", []):
            X.append(preprocess_text(p))
            y.append(tag)
    return X, y, tag_to_responses

print("Loading intents...")
X, y, tag_to_responses = load_intents()
print(f"{len(X)} patterns, {len(set(y))} tags")

pipeline = Pipeline([
    ("tfidf", TfidfVectorizer(ngram_range=(1,2), max_features=2000)),
    ("clf", LogisticRegression(max_iter=300))
])

print("Training...")
pipeline.fit(X, y)
bundle = {"pipeline": pipeline, "tag_to_responses": tag_to_responses}
joblib.dump(bundle, "model.pkl")
print("Model trained and saved to model.pkl")

Loading intents...
35 patterns, 7 tags
Training...
Model trained and saved to model.pkl


In [5]:
# Simple test loop (works inside a Colab cell)
import joblib, random, sys
bundle = joblib.load("model.pkl")
pipeline = bundle["pipeline"]
tag_to_responses = bundle["tag_to_responses"]

def predict(text, threshold=0.45):
    proc = preprocess_text(text)
    probs = pipeline.predict_proba([proc])[0]
    classes = pipeline.classes_
    best_idx = probs.argmax()
    best_prob = probs[best_idx]
    best_tag = classes[best_idx]
    if best_prob < threshold:
        return "unknown", best_prob
    return best_tag, best_prob

def bot_reply(msg):
    tag, prob = predict(msg)
    resp = random.choice(tag_to_responses.get(tag, tag_to_responses["unknown"]))
    return f"Bot: {resp} (intent={tag}, conf={prob:.2f})"

# Example manual test:
print(bot_reply("hi"))
print(bot_reply("where is my order"))
print(bot_reply("I want a refund"))

Bot: Sorry, I didn't understand that. Could you rephrase or ask something else? (intent=unknown, conf=0.33)
Bot: Sure — share your order ID and I’ll look it up. (intent=order_status, conf=0.53)
Bot: Sorry, I didn't understand that. Could you rephrase or ask something else? (intent=unknown, conf=0.35)


In [6]:
# Run this cell to open a small web UI in Colab (click the external link that appears)
import gradio as gr

def respond_gradio(message):
    return bot_reply(message)

demo = gr.Interface(
    fn=respond_gradio,
    inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
    outputs=gr.Textbox(label="Chatbot response"),
    title="NLP Customer Support Chatbot",
    description="Simple TF-IDF + LogisticRegression intent-based chatbot demo."
)

# In Colab, use share=True to get a public link; remove share for local usage only
demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d41bdec5923454c542.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


