In [None]:
import json
import numpy as np
import random
import os
import pickle
import nltk
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
import logging

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler()]
)
logger = logging.getLogger('chatbot')

class ChatBot:
    def __init__(self, intents_file='intents.json'):
        # Download required NLTK datasets if not already downloaded
        nltk_data_path = nltk.data.path[0]
        if not os.path.exists(os.path.join(nltk_data_path, 'tokenizers/punkt')):
            logger.info("Downloading NLTK punkt...")
            nltk.download("punkt", quiet=True)
        if not os.path.exists(os.path.join(nltk_data_path, 'corpora/wordnet')):
            logger.info("Downloading NLTK wordnet...")
            nltk.download("wordnet", quiet=True)
        
        self.lemmatizer = WordNetLemmatizer()
        self.intents_file = intents_file
        self.ignore_chars = ["?", "!", ".", ",", ";", ":", "-", "_", "(", ")", "[", "]", "{", "}", "'", "\""]
        self.model_path = "chatbot_model.h5"
        self.words_path = "words.pkl"
        self.classes_path = "classes.pkl"
        
        # Load or prepare training data
        self.prepare_training_data()
    
    def load_intents(self):
        """Load intents from JSON file."""
        try:
            if os.path.exists(self.intents_file):
                with open(self.intents_file, 'r') as file:
                    return json.load(file)
            else:
                # Use default intents if file doesn't exist
                return {
                    "intents": [
                        {
                            "tag": "greeting",
                            "patterns": ["Hello", "Hi", "Hey", "Greetings", "What's up?"],
                            "responses": ["Hello!", "Hey there!", "Hi, how can I assist you?"]
                        },
                        {
                            "tag": "goodbye",
                            "patterns": ["Bye", "See you later", "Goodbye"],
                            "responses": ["Goodbye!", "Take care!", "See you soon!"]
                        },
                        {
                            "tag": "thanks",
                            "patterns": ["Thanks", "Thank you", "Appreciate it"],
                            "responses": ["You're welcome!", "No problem!", "Glad to help!"]
                        },
                        {
                            "tag": "weather",
                            "patterns": ["What's the weather like?", "Is it raining?", "Tell me the weather"],
                            "responses": ["I don't have live weather updates, but you can check a weather website!"]
                        },
                        {
                            "tag": "help",
                            "patterns": ["I need help", "Can you assist me?", "Support"],
                            "responses": ["Sure! How can I help?", "I'm here to assist you."]
                        }
                    ]
                }
        except Exception as e:
            logger.error(f"Error loading intents: {e}")
            raise
    
    def prepare_training_data(self):
        """Prepare training data from intents."""
        # Check if model and data already exist
        if (os.path.exists(self.model_path) and 
            os.path.exists(self.words_path) and 
            os.path.exists(self.classes_path)):
            logger.info("Loading existing model and data...")
            self.model = load_model(self.model_path)
            with open(self.words_path, 'rb') as file:
                self.words = pickle.load(file)
            with open(self.classes_path, 'rb') as file:
                self.classes = pickle.load(file)
            return
        
        # Load intents
        intents = self.load_intents()
        
        # Tokenization & Lemmatization
        words = []
        classes = []
        documents = []
        
        logger.info("Processing intents data...")
        for intent in intents["intents"]:
            for pattern in intent["patterns"]:
                word_list = nltk.word_tokenize(pattern)
                words.extend(word_list)
                documents.append((word_list, intent["tag"]))
                if intent["tag"] not in classes:
                    classes.append(intent["tag"])
        
        # Lemmatize and remove duplicates
        words = [self.lemmatizer.lemmatize(w.lower()) for w in words if w not in self.ignore_chars]
        words = sorted(set(words))
        classes = sorted(set(classes))
        
        logger.info(f"Found {len(documents)} documents, {len(classes)} classes, {len(words)} unique lemmatized words")
        
        # Save words and classes
        with open(self.words_path, 'wb') as file:
            pickle.dump(words, file)
        with open(self.classes_path, 'wb') as file:
            pickle.dump(classes, file)
        
        self.words = words
        self.classes = classes
        
        # Create training data
        training = self.create_training_data(documents, words, classes)
        
        # Build and train model
        self.build_model(training)
    
    def create_training_data(self, documents, words, classes):
        """Create bag of words training data."""
        training = []
        output_empty = [0] * len(classes)
        
        for doc in documents:
            bag = []
            pattern_words = [self.lemmatizer.lemmatize(w.lower()) for w in doc[0]]
            
            for w in words:
                bag.append(1 if w in pattern_words else 0)
            
            output_row = list(output_empty)
            output_row[classes.index(doc[1])] = 1
            training.append([bag, output_row])
        
        # Shuffle and convert to array
        random.shuffle(training)
        return np.array(training, dtype=object)
    
    def build_model(self, training):
        """Build and train the neural network model."""
        # Split training data
        X = np.array([item[0] for item in training])
        y = np.array([item[1] for item in training])
        
        # Split into training and validation sets
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
        
        # Build model
        model = Sequential([
            Dense(256, input_shape=(len(X_train[0]),), activation="relu"),
            Dropout(0.5),
            Dense(128, activation="relu"),
            Dropout(0.5),
            Dense(64, activation="relu"),
            Dropout(0.3),
            Dense(len(self.classes), activation="softmax")
        ])
        
        # Compile model
        model.compile(
            loss="categorical_crossentropy", 
            optimizer=Adam(learning_rate=0.001), 
            metrics=["accuracy"]
        )
        
        # Set up callbacks
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.0001),
            ModelCheckpoint(
                self.model_path, 
                monitor='val_accuracy', 
                verbose=1, 
                save_best_only=True, 
                mode='max'
            )
        ]
        
        # Train model
        logger.info("Training model...")
        history = model.fit(
            X_train, 
            y_train, 
            epochs=300, 
            batch_size=8, 
            validation_data=(X_val, y_val),
            callbacks=callbacks,
            verbose=1
        )
        
        logger.info(f"Model accuracy: {model.evaluate(X_val, y_val)[1]:.4f}")
        self.model = model
        
    def preprocess_input(self, sentence):
        """Preprocess user input for prediction."""
        sentence_words = nltk.word_tokenize(sentence)
        sentence_words = [self.lemmatizer.lemmatize(word.lower()) for word in sentence_words]
        bag = [1 if w in sentence_words else 0 for w in self.words]
        return np.array(bag)
    
    def get_response(self, user_input, confidence_threshold=0.7):
        """Get chatbot response for user input."""
        try:
            bag = self.preprocess_input(user_input)
            prediction = self.model.predict(np.array([bag]))[0]
            max_index = np.argmax(prediction)
            confidence = prediction[max_index]
            
            if confidence > confidence_threshold:
                tag = self.classes[max_index]
                intents = self.load_intents()
                
                for intent in intents["intents"]:
                    if intent["tag"] == tag:
                        return {
                            "tag": tag,
                            "response": random.choice(intent["responses"]),
                            "confidence": float(confidence)
                        }
            
            return {
                "tag": "unknown",
                "response": "I'm not sure how to respond to that.",
                "confidence": float(confidence)
            }
        except Exception as e:
            logger.error(f"Error getting response: {e}")
            return {
                "tag": "error",
                "response": "Sorry, I encountered an error processing your request.",
                "confidence": 0.0
            }
    
    def run_interactive_chat(self):
        """Run an interactive chat session in the console."""
        print("\n🤖 Chatbot: Hello! Type 'exit' to end the conversation.")
        
        while True:
            user_input = input("\nYou: ")
            if user_input.lower() in ["exit", "quit", "bye"]:
                print("🤖 Chatbot: Goodbye!")
                break
            
            result = self.get_response(user_input)
            print(f"🤖 Chatbot: {result['response']} (Confidence: {result['confidence']:.2f})")


if __name__ == "__main__":
    # Create and run chatbot
    chatbot = ChatBot()
    chatbot.run_interactive_chat()