In [None]:
import re
import random
import json
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

class SimpleChatbot:
    def __init__(self, intents_file=None):
        # Download necessary NLTK resources
        print("Downloading required NLTK resources...")
        nltk.download('punkt', quiet=True)
        nltk.download('stopwords', quiet=True)
        nltk.download('wordnet', quiet=True)
        print("Download complete!")
        
        # Initialize the lemmatizer
        self.lemmatizer = WordNetLemmatizer()
        
        # Load intents
        if intents_file:
            try:
                with open(intents_file, 'r') as file:
                    self.intents = json.load(file)
            except FileNotFoundError:
                self.create_default_intents()
        else:
            self.create_default_intents()
            
        # Prepare training data
        self.prepare_training_data()
        
        # Train the model
        self.train_model()
        
        # Initialize examples dictionary for direct text-answer training
        self.examples = {}
        
    def create_default_intents(self):
        """Create default intents if no file is provided"""
        self.intents = {
            "intents": [
                {
                    "tag": "greeting",
                    "patterns": ["Hi", "Hello", "Hey", "How are you", "What's up"],
                    "responses": ["Hello!", "Hey there!", "Hi! How can I help you today?"]
                },
                {
                    "tag": "goodbye",
                    "patterns": ["Bye", "See you later", "Goodbye", "I'm leaving"],
                    "responses": ["Goodbye!", "See you later!", "Take care!"]
                },
                {
                    "tag": "thanks",
                    "patterns": ["Thank you", "Thanks", "That's helpful"],
                    "responses": ["You're welcome!", "Happy to help!", "Anytime!"]
                },
                {
                    "tag": "help",
                    "patterns": ["Help", "I need help", "Can you help me", "What can you do"],
                    "responses": ["I can answer questions, provide information, or just chat. What do you need help with?"]
                }
            ]
        }
        print("Created default intents as no intents file was provided.")
        
    def prepare_training_data(self):
        """Extract patterns and prepare training data."""
        self.patterns = []
        self.tags = []
        
        for intent in self.intents["intents"]:
            for pattern in intent["patterns"]:
                self.patterns.append(pattern)
                self.tags.append(intent["tag"])
    
    def preprocess_text(self, text):
        """Preprocess text: tokenize, lowercase, remove stopwords, and lemmatize."""
        # Check if text is empty or None
        if not text or not isinstance(text, str):
            return ""
            
        # Tokenize text
        tokens = word_tokenize(text.lower())
        
        # Get English stopwords
        stop_words = set(stopwords.words('english'))
        
        # Remove stopwords and lemmatize
        tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
        
        return " ".join(tokens)
    
    def train_model(self):
        """Train the TF-IDF vectorizer model."""
        # Preprocess all patterns
        processed_patterns = [self.preprocess_text(pattern) for pattern in self.patterns]
        
        # Add examples to training data if available
        if hasattr(self, 'examples') and self.examples:
            for example_text in self.examples.keys():
                processed_patterns.append(self.preprocess_text(example_text))
                self.patterns.append(example_text)
                # Use the example text as its own tag to retrieve the direct answer
                self.tags.append(f"example_{hash(example_text)}")
        
        # Initialize and fit TF-IDF vectorizer
        self.vectorizer = TfidfVectorizer()
        self.X_train = self.vectorizer.fit_transform(processed_patterns)
    
    def predict_intent(self, user_input):
        """Predict the intent of the user input."""
        # Preprocess user input
        processed_input = self.preprocess_text(user_input)
        
        # Vectorize user input
        user_vector = self.vectorizer.transform([processed_input])
        
        # Calculate cosine similarity
        similarities = cosine_similarity(user_vector, self.X_train)
        
        # Get the index of the most similar pattern
        max_similarity_idx = np.argmax(similarities)
        
        # Return the corresponding tag and the similarity score
        similarity_score = similarities[0][max_similarity_idx]
        
        # Only return a match if similarity is above threshold
        if similarity_score > 0.2:
            return self.tags[max_similarity_idx], similarity_score
        else:
            return "unknown", 0.0
    
    def get_response(self, tag):
        """Get a random response for the given intent tag."""
        # Check if this is an example-based response
        if tag.startswith("example_"):
            # Find the corresponding example
            for example_text, answer in self.examples.items():
                if f"example_{hash(example_text)}" == tag:
                    return answer
            
        # Otherwise use intent-based response
        for intent in self.intents["intents"]:
            if intent["tag"] == tag:
                return random.choice(intent["responses"])
                
        return "I'm not sure I understand. Could you rephrase that?"
    
    def entity_recognition(self, user_input):
        """Simple rule-based entity recognition."""
        entities = {}
        
        # Extract dates (simple pattern)
        date_pattern = r'\b\d{1,2}/\d{1,2}/\d{2,4}\b'
        dates = re.findall(date_pattern, user_input)
        if dates:
            entities["dates"] = dates
            
        # Extract emails
        email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
        emails = re.findall(email_pattern, user_input)
        if emails:
            entities["emails"] = emails
            
        # Extract numbers
        number_pattern = r'\b\d+\b'
        numbers = re.findall(number_pattern, user_input)
        if numbers:
            entities["numbers"] = [int(num) for num in numbers]
            
        return entities
    
    def chat(self, user_input):
        """Process user input and return a response."""
        # Check if this is a teaching command
        if user_input.lower().startswith("teach:"):
            return self.learn_new_intent(user_input[6:])
        
        # Check if this is a direct example teaching command
        if user_input.lower().startswith("learn:"):
            return self.learn_example(user_input[6:])
        
        # Check if input is empty
        if not user_input.strip():
            return "I didn't receive any input. How can I help you?"
        
        # Predict intent
        tag, confidence = self.predict_intent(user_input)
        
        # Recognize entities
        entities = self.entity_recognition(user_input)
        
        # Get response based on intent
        response = self.get_response(tag)
        
        # Add entity information if available
        if entities:
            entity_info = "\nI noticed the following information: " + ", ".join(
                [f"{entity_type}: {', '.join(str(e) for e in entity_values)}" 
                 for entity_type, entity_values in entities.items()]
            )
            response += entity_info
            
        return response
    
    def learn_new_intent(self, teaching_input):
        """Parse teaching input and add new intent"""
        try:
            # Format should be: tag|pattern1,pattern2|response1,response2
            parts = teaching_input.split('|')
            if len(parts) != 3:
                return "Teaching format should be: tag|pattern1,pattern2|response1,response2"
            
            tag = parts[0].strip()
            patterns = [p.strip() for p in parts[1].split(',') if p.strip()]
            responses = [r.strip() for r in parts[2].split(',') if r.strip()]
            
            if not tag or not patterns or not responses:
                return "Please provide tag, patterns, and responses."
            
            # Check if intent already exists
            for intent in self.intents["intents"]:
                if intent["tag"] == tag:
                    # Update existing intent
                    intent["patterns"].extend(patterns)
                    intent["responses"].extend(responses)
                    break
            else:
                # Add new intent
                self.intents["intents"].append({
                    "tag": tag,
                    "patterns": patterns,
                    "responses": responses
                })
            
            # Update training data
            self.prepare_training_data()
            self.train_model()
            
            return f"I've learned the '{tag}' intent with {len(patterns)} patterns and {len(responses)} responses!"
        
        except Exception as e:
            return f"Error learning new intent: {str(e)}"
    
    def learn_example(self, example_input):
        """Learn a direct example question and answer pair."""
        try:
            # Format should be: question|answer
            parts = example_input.split('|')
            if len(parts) != 2:
                return "Learning format should be: learn:question|answer"
            
            question = parts[0].strip()
            answer = parts[1].strip()
            
            if not question or not answer:
                return "Please provide both a question and an answer."
            
            # Store the example
            self.examples[question] = answer
            
            # Update training data
            self.prepare_training_data()
            self.train_model()
            
            return f"I've learned that when asked '{question}', I should respond with '{answer}'!"
        
        except Exception as e:
            return f"Error learning example: {str(e)}"
    
    def batch_learn_examples(self, examples_list):
        """
        Learn multiple examples at once from a list of question-answer tuples.
        
        Args:
            examples_list: List of tuples (question, answer)
        
        Returns:
            String indicating success or failure
        """
        try:
            count = 0
            for question, answer in examples_list:
                if question and answer:
                    self.examples[question.strip()] = answer.strip()
                    count += 1
            
            if count > 0:
                # Update training data
                self.prepare_training_data()
                self.train_model()
                return f"Successfully learned {count} new examples!"
            else:
                return "No valid examples were provided."
        
        except Exception as e:
            return f"Error in batch learning: {str(e)}"
    
    def save_all(self, intents_file="intents.json", examples_file="examples.json"):
        """Save both intents and examples to files"""
        results = []
        
        # Save intents
        try:
            with open(intents_file, 'w') as file:
                json.dump(self.intents, file, indent=4)
            results.append(f"Intents saved to {intents_file}")
        except Exception as e:
            results.append(f"Error saving intents: {str(e)}")
        
        # Save examples
        try:
            with open(examples_file, 'w') as file:
                json.dump(self.examples, file, indent=4)
            results.append(f"Examples saved to {examples_file}")
        except Exception as e:
            results.append(f"Error saving examples: {str(e)}")
        
        return "\n".join(results)
    
    def load_examples(self, examples_file="examples.json"):
        """Load examples from a file"""
        try:
            with open(examples_file, 'r') as file:
                self.examples = json.load(file)
            
            # Update training data
            self.prepare_training_data()
            self.train_model()
            
            return f"Loaded {len(self.examples)} examples from {examples_file}"
        except FileNotFoundError:
            return f"Examples file {examples_file} not found."
        except Exception as e:
            return f"Error loading examples: {str(e)}"
    
    def save_intents(self, filename="intents.json"):
        """Save the current intents to a file"""
        try:
            with open(filename, 'w') as file:
                json.dump(self.intents, file, indent=4)
            return f"Intents saved to {filename}"
        except Exception as e:
            return f"Error saving intents: {str(e)}"

def main():
    print("Starting Enhanced NLP Chatbot...")
    chatbot = SimpleChatbot()
    print("\nChatbot is ready! Type 'quit', 'exit', or 'bye' to end the conversation.")
    print("To teach me a new intent, use: teach:tag|pattern1,pattern2|response1,response2")
    print("To teach me a direct example, use: learn:question|answer")
    print("To save everything, type: save")
    print("For batch learning mode, type: batch-learn")
    print("\nCommands:")
    print("- save: Save intents and examples")
    print("- load-examples: Load examples from examples.json")
    print("- list-examples: Show all learned examples")
    print()
    
    while True:
        user_input = input("You: ")
        
        if user_input.lower() in ['quit', 'exit', 'bye']:
            print("Chatbot: Goodbye!")
            break
        
        if user_input.lower() == 'save':
            result = chatbot.save_all()
            print(f"Chatbot: {result}")
            continue
        
        if user_input.lower() == 'load-examples':
            result = chatbot.load_examples()
            print(f"Chatbot: {result}")
            continue
        
        if user_input.lower() == 'list-examples':
            if not chatbot.examples:
                print("Chatbot: No examples have been learned yet.")
            else:
                print("Chatbot: Here are the examples I've learned:")
                for q, a in chatbot.examples.items():
                    print(f"Q: {q}\nA: {a}\n")
            continue
        
        if user_input.lower() == 'batch-learn':
            print("Chatbot: Entering batch learning mode. Enter question and answer pairs.")
            print("Format each pair as: question|answer")
            print("Type 'done' when finished.")
            
            examples = []
            while True:
                example_input = input("Example: ")
                if example_input.lower() == 'done':
                    break
                
                parts = example_input.split('|')
                if len(parts) == 2:
                    examples.append((parts[0], parts[1]))
                else:
                    print("Invalid format. Use: question|answer")
            
            result = chatbot.batch_learn_examples(examples)
            print(f"Chatbot: {result}")
            continue
            
        response = chatbot.chat(user_input)
        print(f"Chatbot: {response}")
if __name__ == "__main__":
    main()

Starting Enhanced NLP Chatbot...
Downloading required NLTK resources...
Download complete!
Created default intents as no intents file was provided.

Chatbot is ready! Type 'quit', 'exit', or 'bye' to end the conversation.
To teach me a new intent, use: teach:tag|pattern1,pattern2|response1,response2
To teach me a direct example, use: learn:question|answer
To save everything, type: save
For batch learning mode, type: batch-learn

Commands:
- save: Save intents and examples
- load-examples: Load examples from examples.json
- list-examples: Show all learned examples

Chatbot: I didn't receive any input. How can I help you?
Chatbot: Hi! How can I help you today?
Chatbot: Hey there!
Chatbot: I'm not sure I understand. Could you rephrase that?
