In [3]:
import nltk

nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('omw-1.4')    

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [2]:
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

text = "Hello Hem bhai, how are you?"
tokens = word_tokenize(text)
print("Tokens:", tokens)

lemmatizer = WordNetLemmatizer()
lemma = lemmatizer.lemmatize("running", pos="v")
print("Lemma:", lemma)


Tokens: ['Hello', 'Hem', 'bhai', ',', 'how', 'are', 'you', '?']
Lemma: run


## A) First, Dataset (Intents (Custom Dataset))

In [None]:
{
  "intents": [
    {
      "tag": "greeting",
      "patterns": ["Hi", "Hello", "Hey", "Good morning", "Good evening", "What's up?", "How's it going?"],
      "responses": ["Hello! How can I assist you today?", "Hi there!", "Hey! What's up?"]
    },
    {
      "tag": "goodbye",
      "patterns": ["Bye", "See you later", "Goodbye", "I am leaving", "Catch you later", "Talk to you soon"],
      "responses": ["Goodbye!", "See you soon!", "Have a nice day!"]
    },
    {
      "tag": "thanks",
      "patterns": ["Thanks", "Thank you", "That's helpful", "Thanks a lot", "Thank you so much"],
      "responses": ["You're welcome!", "Glad I could help!", "Anytime!", "No problem!"]
    },
    {
      "tag": "feeling",
      "patterns": ["How are you?", "How's everything?", "How do you feel?"],
      "responses": ["I'm a bot, but I'm doing great! How about you?", "I'm always ready to chat!"]
    },
    {
      "tag": "name",
      "patterns": ["What is your name?", "Who are you?", "Tell me your name"],
      "responses": ["I'm your friendly chatbot!", "You can call me Chatbot!", "I'm Hem's Chatbot."]
    },
    {
      "tag": "age",
      "patterns": ["How old are you?", "What is your age?", "When were you created?"],
      "responses": ["I'm timeless!", "I was created recently to assist you.", "Age is just a number!"]
    },
    {
      "tag": "weather",
      "patterns": ["What's the weather like?", "Is it raining?", "Tell me the weather today"],
      "responses": ["It's always sunny in my world!", "Weather looks nice outside.", "It might rain today, carry an umbrella."]
    },
    {
      "tag": "order_food",
      "patterns": ["I want to order food", "Order pizza", "Can I get a burger?", "I'd like to place a food order"],
      "responses": ["Sure, What would you like to order?", "I can help you order food!"]
    },
    {
      "tag": "noanswer",
      "patterns": [],
      "responses": ["Sorry, I didn't understand that. Can you rephrase?", "I'm not sure I follow."]
    }
  ]
}


## B) Build Model (train_chatbot.py)

In [4]:
# train_chatbot.py

import json
import numpy as np
import random
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
import nltk
from nltk.stem import WordNetLemmatizer
import pickle

# Download NLTK data
nltk.download('punkt')
nltk.download('wordnet')

# Initialize
lemmatizer = WordNetLemmatizer()

# Load intents
with open('intents.json') as file:
    data = json.load(file)

# Preparing data
training_sentences = []
training_labels = []
labels = []
responses = {}

for intent in data['intents']:
    for pattern in intent['patterns']:
        training_sentences.append(pattern)
        training_labels.append(intent['tag'])
    
    responses[intent['tag']] = intent['responses']

    if intent['tag'] not in labels:
        labels.append(intent['tag'])

# Encode labels
lbl_encoder = LabelEncoder()
lbl_encoder.fit(training_labels)
training_labels = lbl_encoder.transform(training_labels)

# Tokenize and lemmatize sentences
def tokenize(sentence):
    tokens = nltk.word_tokenize(sentence)
    return [lemmatizer.lemmatize(w.lower()) for w in tokens]

all_words = []
for sentence in training_sentences:
    all_words.extend(tokenize(sentence))

all_words = sorted(set(all_words))

X_train = []
for sentence in training_sentences:
    bag = []
    words = tokenize(sentence)
    for w in all_words:
        bag.append(1) if w in words else bag.append(0)
    X_train.append(bag)

X_train = np.array(X_train)
y_train = np.array(training_labels)

# Build model
model = Sequential()
model.add(Dense(128, input_shape=(len(X_train[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(labels), activation='softmax'))

# Compile model
optimizer = Adam(learning_rate=0.01)
model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=200, batch_size=8, verbose=1)

# Save model in new Keras format
model.save('chatbot_model.keras')

# Save other files
pickle.dump(all_words, open('words.pkl', 'wb'))
pickle.dump(lbl_encoder, open('labels.pkl', 'wb'))

print("Model trained and saved successfully in .keras format!")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.0693 - loss: 2.19962.19
Epoch 2/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2445 - loss: 1.9898
Epoch 3/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4097 - loss: 1.8078  
Epoch 4/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6116 - loss: 1.4509
Epoch 5/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5561 - loss: 1.3416 
Epoch 6/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7653 - loss: 1.1305  
Epoch 7/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6555 - loss: 0.9019
Epoch 8/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7868 - loss: 0.6998
Epoch 9/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

## C) Chatbot Core Logic (chatbot.py)

In [5]:
import random
import pickle
import nltk
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import load_model
import numpy as np

# Download NLTK data
nltk.download('punkt')
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()

# Load the trained model
model = load_model('chatbot_model.keras')  # Make sure to load the correct model format
# Load the other assets (words and labels)
with open('words.pkl', 'rb') as f:
    all_words = pickle.load(f)

with open('labels.pkl', 'rb') as f:
    lbl_encoder = pickle.load(f)

# Load intents
with open('intents.json') as file:
    intents = json.load(file)

# Function to clean and process text input
def clean_text(text):
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word.lower()) for word in tokens]
    return tokens

# Function to create a bag of words for prediction
def bag_of_words(text):
    tokens = clean_text(text)
    bag = [0] * len(all_words)
    for s in tokens:
        for i, word in enumerate(all_words):
            if word == s:
                bag[i] = 1
    return np.array(bag)

# Predict the intent of the user's input
def predict_intent(text):
    bow = bag_of_words(text)  # Create bag of words
    prediction = model.predict(np.array([bow]))  # Predict using the model
    intent_index = np.argmax(prediction)  # Get the index of the highest probability
    confidence = prediction[0][intent_index]  # Get the confidence of the prediction
    intent = lbl_encoder.inverse_transform([intent_index])[0]  # Map index to intent tag
    return intent, confidence

# Get a response based on the predicted intent
def get_response(intent_tag):
    for intent in intents['intents']:
        if intent['tag'] == intent_tag:
            return random.choice(intent['responses'])
    return "I am not sure how to answer that."

# Main function to get the chatbot's response
def chatbot_response(user_input):
    intent, confidence = predict_intent(user_input)
    if confidence > 0.6:
        return get_response(intent)
    else:
        return get_response("noanswer")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


## D) GUI Integration (gui.py)

In [6]:
import random
import json
import pickle
import numpy as np
import tkinter as tk
from tkinter import Scrollbar, Text, Entry, Button, END
from tensorflow.keras.models import load_model
import nltk
from nltk.stem import WordNetLemmatizer

# Initialize
lemmatizer = WordNetLemmatizer()

# Load Files
model = load_model('chatbot_model.keras')  # Correct model file
intents = json.loads(open('intents.json', encoding='utf-8').read())
words = pickle.load(open('words.pkl', 'rb'))
labels = pickle.load(open('labels.pkl', 'rb'))

def tokenize(sentence):
    tokens = nltk.word_tokenize(sentence)
    return [lemmatizer.lemmatize(word.lower()) for word in tokens]

def bag_of_words(sentence):
    tokens = tokenize(sentence)
    bag = [0] * len(words)
    for s in tokens:
        for i, word in enumerate(words):
            if word == s:
                bag[i] = 1
    return np.array(bag)

def chatbot_response(text):
    bow = bag_of_words(text)
    res = model.predict(np.array([bow]))[0]
    threshold = 0.4  # Lowered the threshold for more varied responses
    results = [[i, r] for i, r in enumerate(res) if r > threshold]

    if results:
        results.sort(key=lambda x: x[1], reverse=True)
        tag = labels.inverse_transform([results[0][0]])[0]  # Fixed this line

        for intent in intents['intents']:
            if intent['tag'] == tag:
                return random.choice(intent['responses'])

    return "I didn't get that. Can you rephrase?"

# GUI Setup
root = tk.Tk()
root.title("Hem's AI Chatbot")
root.geometry("500x600")
root.configure(bg="#17202A")

# Chat window
chat_log = Text(root, bd=1, bg="#2C3E50", fg="#EAECEE", font=("Helvetica", 14), wrap='word')
chat_log.pack(padx=10, pady=10, fill=tk.BOTH, expand=True)

scrollbar = Scrollbar(chat_log)
scrollbar.place(relheight=1, relx=0.974)
scrollbar.config(command=chat_log.yview)
chat_log['yscrollcommand'] = scrollbar.set
chat_log.config(state=tk.DISABLED)

# Entry box
entry_frame = tk.Frame(root, bg="#17202A")
entry_frame.pack(padx=10, pady=10, fill=tk.X)

entry_box = Entry(entry_frame, bd=1, bg="#2C3E50", fg="#EAECEE", font=("Helvetica", 14))
entry_box.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 10))

def send_message(event=None):
    msg = entry_box.get().strip()
    if msg == "":
        return

    chat_log.config(state=tk.NORMAL)
    chat_log.insert(END, "You: " + msg + "\n\n")
    entry_box.delete(0, END)

    res = chatbot_response(msg)
    chat_log.insert(END, "Bot: " + res + "\n\n")

    chat_log.config(state=tk.DISABLED)
    chat_log.yview(END)

send_button = Button(entry_frame, text="Send", font=("Helvetica", 13, "bold"), bg="#ABB2B9", command=send_message)
send_button.pack(side=tk.RIGHT)

# Bind Enter key to send message
root.bind('<Return>', send_message)

root.mainloop()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
