In [27]:
import json
import pickle
import numpy as np
import random
import nltk
from nltk.stem import WordNetLemmatizer
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

# Load data
with open("intents.json", "r") as file:
    intents = json.load(file)

words = pickle.load(open("words.pkl", "rb"))
classes = pickle.load(open("classes.pkl", "rb"))

# Prepare training data
training = []
output_empty = [0] * len(classes)  # Placeholder for output labels

for intent in intents["intents"]:
    for pattern in intent["examples"]:  # Using 'examples' instead of 'patterns'
        word_list = nltk.word_tokenize(pattern)
        word_list = [lemmatizer.lemmatize(word.lower()) for word in word_list]

        bag = [1 if w in word_list else 0 for w in words]  # Create bag of words
        output_row = list(output_empty)
        output_row[classes.index(intent["intent"])] = 1  # Assign intent

        training.append([bag, output_row])

# Shuffle data and convert to NumPy arrays
random.shuffle(training)
train_x = np.array([entry[0] for entry in training])
train_y = np.array([entry[1] for entry in training])

# Define the neural network model
model = Sequential([
    Dense(128, input_shape=(len(train_x[0]),), activation="relu"),
    Dropout(0.5),
    Dense(64, activation="relu"),
    Dropout(0.5),
    Dense(len(train_y[0]), activation="softmax")
])

# Compile the model
model.compile(loss="categorical_crossentropy", optimizer=Adam(learning_rate=0.01), metrics=["accuracy"])

# Train the model
print("🟢 Training the chatbot model...")
history = model.fit(train_x, train_y, epochs=200, batch_size=8, verbose=1)

# Save the trained model
model.save("chatbot_model.keras")
print("✅ Model training complete! Saved as chatbot_model.keras")


🟢 Training the chatbot model...
Epoch 1/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.1283 - loss: 2.2156
Epoch 2/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.2742 - loss: 2.0511 
Epoch 3/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.4092 - loss: 1.8362
Epoch 4/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.3983 - loss: 1.7354
Epoch 5/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.6750 - loss: 1.3861
Epoch 6/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.6117 - loss: 1.3460
Epoch 7/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.7433 - loss: 1.1411
Epoch 8/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.6458 - loss: 0.9824
Epoch 9/200
[1m4/4[0m

In [None]:
import json
import pickle
import nltk
import numpy as np
import random
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import load_model

# Load the model and necessary files
model = load_model("chatbot_model.keras")

with open("intents.json", "r") as file:
    intents = json.load(file)

# Load words and classes
with open("words.pkl", "rb") as file:
    words = pickle.load(file)

with open("classes.pkl", "rb") as file:
    classes = pickle.load(file)

# Initialize the lemmatizer
lemmatizer = WordNetLemmatizer()

# Function to preprocess and tokenize user input
def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)  # Tokenize input
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]  # Lemmatize
    return sentence_words

# Function to create a bag-of-words representation
def bow(sentence, words):
    sentence_words = clean_up_sentence(sentence)
    bag = [0] * len(words)  # Ensure correct size
    for s in sentence_words:
        for i, word in enumerate(words):
            if word == s:
                bag[i] = 1
    return np.array(bag).reshape(1, -1)  # Ensure shape (1, len(words))

# Function to predict intent
def predict_class(sentence):
    bow_input = bow(sentence, words)

    # Check if input size matches expected input size
    if bow_input.shape[1] != len(words):
        print(f"Warning: Expected {len(words)} input features but got {bow_input.shape[1]}.")

    prediction = model.predict(bow_input)[0]  # Predict intent
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(prediction) if r > ERROR_THRESHOLD]
    results.sort(key=lambda x: x[1], reverse=True)

    return [{"intent": classes[r[0]], "probability": str(r[1])} for r in results]

# Function to get a response based on intent
def get_response(intents_list, intents_json):
    if not intents_list:
        return "I'm sorry, I didn't understand that. Can you rephrase?"

    tag = intents_list[0]["intent"]
    
    for intent in intents_json["intents"]:
        if intent["intent"] == tag:  # Ensure correct key name
            return random.choice(intent["responses"])

    return "I'm not sure how to respond to that."

# Main chatbot function
def chat():
    print("Chatbot is ready! Type 'quit' to exit.")
    while True:
        message = input("You: ")
        if message.lower() == "quit":
            print("Goodbye!")
            break
        
        intents_list = predict_class(message)
        response = get_response(intents_list, intents)
        
        print(f"Bot: {response}")

# Run the chatbot
chat()


Chatbot is ready! Type 'quit' to exit.


You:  hi


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
Bot: Hi there! What can I do for you?


You:  data science


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Bot: Data science is an interdisciplinary field that uses scientific methods, algorithms, and systems to extract insights and knowledge from structured and unstructured data. It combines elements of statistics, computer science, and domain knowledge to solve real-world problems.


You:  careers


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Bot: Internship opportunities can be found on platforms like LinkedIn, Kaggle, GitHub, and company websites.


You:  salary


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
Bot: - **Entry-Level:** $70,000 - $100,000 per year


You:  internship


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 180ms/step
Bot: - **AI Researcher**


You:  data science


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
Bot: Data science is an interdisciplinary field that uses scientific methods, algorithms, and systems to extract insights and knowledge from structured and unstructured data. It combines elements of statistics, computer science, and domain knowledge to solve real-world problems.


You:  tools


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
Bot: - **Database Management:** MySQL, PostgreSQL, MongoDB


You:  careers and internship


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
Bot: - **AI Researcher**


You:  careers


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
Bot: - **Data Analyst**


You:  careers


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
Bot: Career paths in data science include:


You:  go


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
Bot: Hello! How can I assist you today?


You:  ckhkhjk


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
Bot: Hi there! What can I do for you?


You:  


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
Bot: Hello! How can I assist you today?


You:  


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
Bot: Hello! How can I assist you today?


You:  uyu


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
Bot: Hello! How can I assist you today?


You:  what is data


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
Bot: Data science is an interdisciplinary field that uses scientific methods, algorithms, and systems to extract insights and knowledge from structured and unstructured data. It combines elements of statistics, computer science, and domain knowledge to solve real-world problems.


You:  careers


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
Bot: - **Machine Learning Engineer**


You:  internship


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step
Bot: Career paths in data science include:


You:  go on


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
Bot: Hello! How can I assist you today?


{
    "intents": [
        {
            "intent": "Greeting",
            "examples": [
                "Hello",
                "Hi",
                "Hey",
                "Good morning",
                "Good evening"
            ],
            "responses": [
                "Hello! How can I assist you today?",
                "Hi there! What can I do for you?"
            ]
        },
        {
            "intent": "Farewell",
            "examples": [
                "Goodbye",
                "Bye",
                "See you later",
                "Take care"
            ],
            "responses": [
                "Goodbye! Have a great day!",
                "See you later! Feel free to ask anytime."
            ]
        },
        {
            "intent": "Introduction_to_Data_Science",
            "examples": [
                "What is data science?",
                "Explain data science in simple terms.",
                "Why is data science important?"
            ],
 