In [None]:
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import nltk
from nltk.stem.lancaster import LancasterStemmer
import random
import time
import os

# --- 1. Data Setup (Loading intents.json from file) ---
INTENTS_FILE = '/content/intent.json'

try:
    # Check if the file exists in the current directory (where Colab runs)
    if not os.path.exists(INTENTS_FILE):
        print(f"Error: The file '{INTENTS_FILE}' was not found.")
        print("Please upload your 'Intent.json' file to the Colab session's file panel.")
        raise FileNotFoundError(f"{INTENTS_FILE} not found.")

    with open(INTENTS_FILE, encoding='utf-8') as file:
        data = json.load(file)
    print(f"Successfully loaded data from {INTENTS_FILE}.")

except Exception as e:
    print(f"An error occurred while loading the data: {e}")
    # Exit gracefully if data loading fails
    exit()


# Initialize stemmer and download NLTK punkt
stemmer = LancasterStemmer()
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    print("Downloading NLTK punkt...")
    nltk.download('punkt')

# Try to find punkt_tab and download if not found
try:
    nltk.data.find('tokenizers/punkt_tab')
except LookupError:
    print("Downloading NLTK punkt_tab...")
    nltk.download('punkt_tab')


# --- 2. Preprocessing and Feature Extraction ---

words = []
classes = []
documents = []
ignore_words = ['?', '!', '.', ',', "'s", "'re", "'m"]

# Loop through each sentence in the patterns
for intent_data in data['intents']:
    tag = intent_data['intent']  # Use 'intent' key for the tag
    classes.append(tag)

    for pattern in intent_data['text']: # Use 'text' key for patterns
        # Tokenize each word in the sentence
        w = nltk.word_tokenize(pattern)
        # Add to word list
        words.extend(w)
        # Add to documents in our corpus
        documents.append((w, tag))

# Stem and lower each word and remove duplicates
words = [stemmer.stem(w.lower()) for w in words if w.lower() not in ignore_words]
words = sorted(list(set(words)))

# Remove duplicate classes and sort
classes = sorted(list(set(classes)))

print(f"Total unique stemmed words (Vocabulary size): {len(words)}")
print(f"Total intents/classes: {len(classes)}")
print(f"Number of documents/patterns: {len(documents)}")

# Create training data
training = []
output = []
# Create an empty array for the output (one-hot encoding)
output_empty = [0] * len(classes)

# Training set, bag of words for each pattern
for doc in documents:
    # Initialize our bag of words
    bag = []
    # List of tokenized words for the pattern
    pattern_words = doc[0]
    # Stem each word
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]

    # Create the bag of words array
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # Output is a '0' for each class and '1' for the current intent's class
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append(bag)
    output.append(output_row)

# Convert training data to numpy arrays
train_x = np.array(training)
train_y = np.array(output)


# --- 3. Model Definition and Training ---

# Build the Neural Network Model
# Using a 3-layer fully connected network (Deep Neural Network)
model = Sequential([
    # Input layer and first hidden layer
    Dense(128, input_shape=(len(train_x[0]),), activation='relu'),
    Dropout(0.5), # Regularization to prevent overfitting
    # Second hidden layer
    Dense(64, activation='relu'),
    Dropout(0.5),
    # Output layer (number of neurons equals number of classes/intents)
    Dense(len(train_y[0]), activation='softmax')
])

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

print("\nStarting model training (200 epochs)...")
# Train the model (set verbose=1 to see training progress)
history = model.fit(train_x, train_y, epochs=200, batch_size=5, verbose=0)
print("Model training complete!")
print(f"Final training accuracy: {history.history['accuracy'][-1]:.4f}")


# --- 4. Prediction Functions ---

def clean_up_sentence(sentence):
    """Tokenizes and stems the sentence."""
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

def bag_of_words(sentence, words, show_details=False):
    """Creates a bag of words (vector) for the input sentence."""
    sentence_words = clean_up_sentence(sentence)
    # Bag of words matrix
    bag = [0]*len(words)
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                bag[i] = 1
                if show_details:
                    print(f"found in bag: {w}")
    return np.array(bag)

def classify_intent(sentence):
    """Predicts the intent tag for the sentence."""
    # Generate predictions from the model
    input_data = bag_of_words(sentence, words)
    # The model expects a batch, so we reshape the single input array
    # We use a try-except block in case the input data shape is empty
    try:
        results = model.predict(np.array([input_data]), verbose=0)[0]
    except Exception:
        # If prediction fails (e.g., empty input), return empty list
        return []

    # Filter out predictions below a threshold
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(results) if r > ERROR_THRESHOLD]
    # Sort by probability in descending order
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append((classes[r[0]], r[1])) # tag, probability
    # Return tuple of intent and probability
    return return_list

def get_response(intents_list, intents_json):
    """Gets a random response based on the highest-confidence intent."""
    if not intents_list:
        return "I'm sorry, I don't understand that. Can you rephrase?"

    tag = intents_list[0][0]
    list_of_intents = intents_json['intents']

    # Locate the intent object by matching the 'intent' key
    for i in list_of_intents:
        if i['intent'] == tag:
            # We use the standard 'responses' field for simplicity.
            # Handling extensions (like %%HUMAN%% replacements) is a next step!
            result = random.choice(i['responses'])
            return result
    return "Something went wrong fetching the response."


# --- 5. Chat Loop Interface ---

print("\n--- ChatBot Ready ---")
print(f"Model trained on {len(classes)} intents. Start chatting!")
print("Type 'quit' to exit.")

while True:
    try:
        user_input = input("You: ")
        if user_input.lower() == 'quit':
            break

        # 1. Classify the user input (get intent tag)
        intent_results = classify_intent(user_input)

        # 2. Get a random response based on the intent
        bot_response = get_response(intent_results, data)

        print(f"Bot: {bot_response}")

    except EOFError:
        # Handles case where input is closed (common in scripted environments)
        print("\nChatBot session ended (EOF).")
        break
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        time.sleep(1)
        break

print("ChatBot session ended.")

Successfully loaded data from /content/intent.json.
Downloading NLTK punkt...


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Downloading NLTK punkt_tab...


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Total unique stemmed words (Vocabulary size): 156
Total intents/classes: 42
Number of documents/patterns: 138

Starting model training (200 epochs)...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Model training complete!
Final training accuracy: 0.9855

--- ChatBot Ready ---
Model trained on 42 intents. Start chatting!
Type 'quit' to exit.
You: bangladesh team t20 captain name?
Bot: Captains in 2025: Shanto (Tests), Miraz (ODIs), Litton (T20Is).
You: about shakib
Bot: Career bests: Test 217, ODI 134*, T20I 84; best bowling 7/36 (Test), 5/29 (ODI), 5/20 (T20I).
You: about mushfiq
Bot: Key feats: first Bangladeshi with three Test double hundreds; ODI runs 7,795.
You: tamim iqbal highest score in test
Bot: Career highs: Test 206, ODI 158, T20I 103*.
You: Mahmudullah date of birth
Bot: Ireland tour (Bangladesh) 2025: Tests – Nov 11–15 (Sylhet), Nov 19–23 (Mirpur). T20Is – Nov 27 & 29 (Chattogram), Dec 2 (Mirpur).
You: last test of mahmudullah
Bot: Test squad vs Ireland (Nov 2025): Shanto (C), Shadman, Mahmudul Hasan Joy, Mominul, Mushfiqur, Litton, Jaker Ali, Miraz, Taijul, Syed Khaled, Hasan Mahmud, Nahid Rana, Ebadot, Hasan Murad.
You: last series of bangladesh
Bot: T20Is 2025: 3

KeyboardInterrupt: Interrupted by user