In [None]:
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import spacy
import pandas as pd 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import json 

# Ensure NLTK data is downloaded (run once)
try:
    nltk.data.find('corpora/stopwords')
except LookupError:
    nltk.download('stopwords')
try:
    nltk.data.find('corpora/wordnet')
except LookupError:
    nltk.download('wordnet')
try:
    nltk.data.find('taggers/averaged_perceptron_tagger')
except LookupError:
    nltk.download('averaged_perceptron_tagger')


# Load SpaCy model for NER
try:
    nlp_ner = spacy.load("en_core_web_sm")
except OSError:
    print("SpaCy model 'en_core_web_sm' not found. Please run: python -m spacy download en_core_web_sm")
    # Fallback or exit if model is crucial
    nlp_ner = None

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\rahma\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [4]:
# --- 1. Preprocessing and Data Handling ---

class TextPreprocessor:
    """
    Handles text preprocessing steps: tokenization, stopword elimination, and lemmatization.
    """
    def __init__(self):
        self.stop_words = set(stopwords.words('english'))
        self.lemmatizer = WordNetLemmatizer()

    def preprocess(self, text):
        """
        Applies a series of preprocessing steps to the input text.
        """
        text = text.lower() # Convert to lowercase
        text = re.sub(f"[{re.escape(string.punctuation)}]", "", text) # Remove punctuation
        tokens = nltk.word_tokenize(text) # Tokenization
        tokens = [word for word in tokens if word.isalpha()] # Remove non-alphabetic tokens
        tokens = [word for word in tokens if word not in self.stop_words] # Stopword elimination
        tokens = [self.lemmatizer.lemmatize(word) for word in tokens] # Lemmatization
        return " ".join(tokens)

In [None]:
# --- 2. Intent Recognition (Simplified) ---

class IntentRecognizer:

    def __init__(self):
        self.vectorizer = TfidfVectorizer()
        self.model = LogisticRegression(max_iter=1000)
        self.intents = []

    def train(self, training_data):
       
        texts = [item[0] for item in training_data]
        self.intents = [item[1] for item in training_data]

        X = self.vectorizer.fit_transform(texts)
        y = self.intents

        # Splitting data for evaluation (optional, for real-world training, use a dedicated dataset)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        self.model.fit(X_train, y_train)

        # Evaluate the model (for demonstration)
        y_pred = self.model.predict(X_test)
        print("\n--- Intent Recognition Model Evaluation ---")
        print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
        print(f"Precision: {precision_score(y_test, y_pred, average='weighted', zero_division=0):.2f}")
        print(f"Recall: {recall_score(y_test, y_pred, average='weighted', zero_division=0):.2f}")
        print(f"F1-Score: {f1_score(y_test, y_pred, average='weighted', zero_division=0):.2f}")


    def predict_intent(self, text):
        """
        Predicts the intent of the given text.
        """
        if not self.intents:
            return "unknown_intent"

        vectorized_text = self.vectorizer.transform([text])
        if vectorized_text.shape[1] == 0: # Handle empty vocabulary case
            return "unknown_intent"

        prediction = self.model.predict(vectorized_text)
        return prediction[0]

In [6]:
# --- 3. Named Entity Recognition (NER) ---

class NERExtractor:
    """
    Extracts named entities from text using SpaCy.
    Entities like 'DATE', 'MONEY', 'ORG' could be relevant for banking.
    """
    def __init__(self, nlp_model):
        self.nlp = nlp_model

    def extract_entities(self, text):
        """
        Processes text to extract entities and their labels.
        Returns a dictionary of entities.
        """
        if not self.nlp:
            print("SpaCy model not loaded. Cannot perform NER.")
            return {}

        doc = self.nlp(text)
        entities = {}
        for ent in doc.ents:
            # Example: Store entities by their label, could be more structured
            if ent.label_ not in entities:
                entities[ent.label_] = []
            entities[ent.label_].append(ent.text)
        return entities

In [7]:
# --- 4. Dialogue Management (Placeholder) ---

class DialogueManager:

    def __init__(self):
        self.context = {} # Stores conversational context (e.g., last intent, entities)
        # Store responses from the dataset for direct lookup
        self.responses_map = {}

    def load_responses(self, data_path="Dataset_Banking_chatbot.csv"):
       
        try:
            df = pd.read_csv(data_path)
           
            for index, row in df.iterrows():
                query = row['Query'].lower().strip()
                response = row['Response'].strip()
                # Use the query itself as a key for direct lookup if intent is not precise
                self.responses_map[query] = response
               
    
        except FileNotFoundError:
            print(f"Error: Dataset not found at {data_path}. Dialogue manager will use fallback responses.")
        except KeyError:
            print("Error: 'Query' or 'Response' columns not found in the dataset. Check CSV header.")


    def manage_dialogue(self, intent, entities, original_query):
        """
        Determines the appropriate response or action based on intent and entities.
        Now tries to find a direct match from the loaded responses first.
        """
        response = ""

        
        
        matched_response = self.responses_map.get(original_query.lower().strip())
        if matched_response:
            response = matched_response
        else:
            # Fallback to generic responses if no direct match found
            if intent == "account_balance":
                if "CARDINAL" in entities: # Simple check for numerical entities
                    response = f"To check your account balance, please provide your account number. (Detected number: {entities['CARDINAL'][0]})"
                else:
                    response = "To check your account balance, please provide your account number."
            elif intent == "transaction_history":
                if "DATE" in entities:
                    response = f"Sure, I can help with your transaction history for {entities['DATE'][0]}. Please confirm your account details."
                else:
                    response = "I can help you with your transaction history. What period are you interested in?"
            elif intent == "fraud_report":
                response = "I'm sorry to hear that. Please provide details about the suspicious activity. I will connect you to a fraud specialist."
            elif intent == "greeting":
                response = "Hello! How can I assist you with your banking needs today?"
            elif intent == "goodbye":
                response = "Goodbye! Have a great day."
            elif intent == "new_account_inquiry":
                response = "To open a new bank account, visit our website and click on 'Open Account' or visit any of our branches with your identification documents."
            elif intent == "loan_inquiry":
                response = "To apply for a loan, visit our website and fill out the online application form or visit one of our branches to speak with a loan officer."
            elif intent == "card_issue":
                response = "Report a lost or stolen debit card immediately by calling our customer service at 1-800-123-4567 or through our mobile app."
            elif intent == "password_reset":
                response = "Click on 'Forgot Password' on the login page and follow the instructions to reset your password using your registered email address."
            else:
                response = "I'm not sure how to help with that. Could you please rephrase or ask about a different banking topic?"

        # Update context 
        self.context['last_intent'] = intent
        self.context['last_entities'] = entities
        return response


In [8]:
# --- 5. Sentiment Analysis (Placeholder) ---

class SentimentAnalyzer:
    """
    Classifies client inquiries to determine frustration or urgency.
    This is a placeholder. A real implementation would use a trained
    sentiment model (e.g., fine-tuned BERT for sentiment classification).
    """
    def analyze_sentiment(self, text):
        """
        Returns a dummy sentiment for demonstration.
        In reality, this would return 'positive', 'negative', 'neutral',
        or an urgency level.
        """
        if "urgent" in text.lower() or "immediately" in text.lower() or "now" in text.lower():
            return "urgent"
        elif "problem" in text.lower() or "issue" in text.lower() or "frustrated" in text.lower():
            return "negative"
        else:
            return "neutral" # Default sentiment

In [9]:
# --- 6. Response Generation (Placeholder) ---

class ResponseGenerator:
    """
    Generates human-like responses.
    This is a placeholder. In a real system, this would involve
    fine-tuning BERT/GPT-based models on banking conversation data.
    """
    def generate_response(self, intent, entities, sentiment, dialogue_response):
        """
        Combines information to generate a final response.
        For this example, it primarily uses the dialogue manager's response.
        """
        # In a real scenario, the LLM would take intent, entities, sentiment,
        # and dialogue state as input to generate a nuanced response.
        # Example of how sentiment *could* influence response:
        if sentiment == "urgent":
            return f"Urgent: {dialogue_response} Please hold while I connect you to a human agent."
        elif sentiment == "negative":
            return f"I understand your concern. {dialogue_response}"
        else:
            return dialogue_response

In [10]:

# --- Main Chatbot Class ---

class BankingChatbot:
    """
    Integrates all components to create the banking chatbot.
    """
    def __init__(self, dataset_path="Dataset_Banking_chatbot.csv"):
        self.preprocessor = TextPreprocessor()
        self.intent_recognizer = IntentRecognizer()
        self.ner_extractor = NERExtractor(nlp_ner)
        self.dialogue_manager = DialogueManager()
        self.sentiment_analyzer = SentimentAnalyzer()
        self.response_generator = ResponseGenerator()
        self.dataset_path = "C:/Users/rahma/Downloads/Dataset_Banking_chatbot.csv" 
        self._load_and_train_models()

    def _load_and_train_models(self):
        """
        Loads data from the CSV dataset and trains the models.
        """
        print("Loading and training models from CSV...")
        try:
            df = pd.read_csv(self.dataset_path)
          
            df['Intent'] = df['Query'].apply(self._derive_intent_from_query)

            training_data = [(row['Query'], row['Intent']) for index, row in df.iterrows()]

            # Preprocess training data for intent recognition
            preprocessed_training_data = [
                (self.preprocessor.preprocess(text), intent)
                for text, intent in training_data
            ]
            self.intent_recognizer.train(preprocessed_training_data)

            # Load responses into dialogue manager
            self.dialogue_manager.load_responses(self.dataset_path)

            print("Models loaded and trained successfully using CSV data.")

        except FileNotFoundError:
            print(f"Error: Dataset not found at {self.dataset_path}. Please ensure the CSV file is in the same directory.")
            print("Chatbot will operate with limited functionality (generic fallbacks).")
        except KeyError as e:
            print(f"Error: Missing expected column in CSV: {e}. Please ensure 'Query' and 'Response' columns exist.")
            print("Chatbot will operate with limited functionality (generic fallbacks).")
        except Exception as e:
            print(f"An unexpected error occurred during model loading: {e}")
            print("Chatbot will operate with limited functionality (generic fallbacks).")


    def _derive_intent_from_query(self, query):
        """
        A heuristic function to derive intents from queries in the dataset.
        This is a *very* simplified approach for demonstration.
        In a real dataset, you would ideally have a dedicated 'Intent' column.
        """
        query_lower = query.lower()
        if "open account" in query_lower or "new bank account" in query_lower:
            return "new_account_inquiry"
        elif "check my account balance" in query_lower or "current balance" in query_lower:
            return "account_balance"
        elif "transfer money" in query_lower:
            return "money_transfer"
        elif "direct deposit" in query_lower:
            return "direct_deposit"
        elif "lost or stolen debit card" in query_lower or "card issue" in query_lower:
            return "card_issue"
        elif "forgot my online banking password" in query_lower or "reset my password" in query_lower:
            return "password_reset"
        elif "apply for a loan" in query_lower or "loan options" in query_lower or "loan application" in query_lower:
            return "loan_inquiry"
        elif "interest rates" in query_lower or "savings accounts" in query_lower:
            return "interest_rates_inquiry"
        elif "close my bank account" in query_lower:
            return "account_closure"
        elif "update my personal information" in query_lower or "change my contact information" in query_lower:
            return "update_personal_info"
        elif "dispute a transaction" in query_lower or "unauthorized transaction" in query_lower:
            return "dispute_transaction"
        elif "order a new checkbook" in query_lower:
            return "order_checkbook"
        elif "fees for overdraft protection" in query_lower or "overdraft policy" in query_lower:
            return "overdraft_fees"
        elif "increase my credit limit" in query_lower:
            return "credit_limit_increase"
        elif "activate my new credit card" in query_lower:
            return "activate_credit_card"
        elif "schedule recurring payments" in query_lower or "auto-pay" in query_lower:
            return "recurring_payments"
        elif "foreign transactions" in query_lower or "international" in query_lower:
            return "foreign_transactions"
        elif "request a bank statement" in query_lower or "account statements" in query_lower:
            return "bank_statement_request"
        elif "financial planning services" in query_lower or "financial advice" in query_lower:
            return "financial_planning"
        elif "suspect fraudulent activity" in query_lower or "report fraud" in query_lower:
            return "fraud_report"
        elif "set up alerts" in query_lower or "account notifications" in query_lower:
            return "account_alerts"
        elif "maximum amount i can withdraw" in query_lower or "atm withdrawal limit" in query_lower:
            return "atm_withdrawal_limit"
        elif "use my debit card internationally" in query_lower:
            return "international_debit_card_use"
        elif "branch hours" in query_lower or "nearest branch" in query_lower:
            return "branch_info"
        elif "account is frozen" in query_lower:
            return "account_frozen"
        elif "view my account history" in query_lower or "account activity" in query_lower:
            return "account_history"
        elif "mobile check deposit" in query_lower:
            return "mobile_check_deposit"
        elif "stop payment on a check" in query_lower:
            return "stop_check_payment"
        elif "account minimum balances" in query_lower:
            return "minimum_balance_policy"
        elif "add an authorized user" in query_lower or "joint account holder" in query_lower:
            return "add_authorized_user"
        elif "set up online banking" in query_lower:
            return "setup_online_banking"
        elif "link multiple accounts" in query_lower:
            return "link_accounts"
        elif "update my account preferences" in query_lower or "change my account settings" in query_lower:
            return "update_account_preferences"
        elif "temporary credit limit increase" in query_lower:
            return "temp_credit_limit_increase"
        elif "check if my application is approved" in query_lower:
            return "application_status"
        elif "change my pin" in query_lower:
            return "change_pin"
        elif "investment services" in query_lower:
            return "investment_services"
        elif "retirement accounts" in query_lower or "retirement planning" in query_lower:
            return "retirement_planning"
        elif "budgeting assistance" in query_lower or "budgeting and financial planning" in query_lower:
            return "budgeting_assistance"
        elif "enroll in e-statements" in query_lower:
            return "enroll_e_statements"
        elif "suspicious email" in query_lower or "phishing" in query_lower:
            return "suspicious_email"
        elif "copy of my credit report" in query_lower or "check my credit score" in query_lower:
            return "credit_report_inquiry"
        elif "student accounts" in query_lower or "student loan services" in query_lower:
            return "student_services"
        elif "account closure fees" in query_lower:
            return "account_closure_fees"
        elif "transfer money internationally" in query_lower:
            return "international_money_transfer"
        elif "replacement for a damaged card" in query_lower:
            return "damaged_card_replacement"
        elif "business banking services" in query_lower or "business loan" in query_lower or "business credit card" in query_lower:
            return "business_banking"
        elif "tax documents" in query_lower or "tax filing" in query_lower:
            return "tax_documents"
        elif "personal line of credit" in query_lower:
            return "personal_line_of_credit"
        elif "link my bank account to my mobile wallet" in query_lower:
            return "mobile_wallet_link"
        elif "account maintenance fees" in query_lower:
            return "account_maintenance_fees"
        elif "insurance products" in query_lower or "travel insurance" in query_lower:
            return "insurance_products"
        elif "financial emergencies" in query_lower:
            return "financial_emergencies"
        elif "checking account" in query_lower:
            return "checking_account_info"
        elif "update my mailing address" in query_lower:
            return "update_mailing_address"
        elif "returned checks" in query_lower:
            return "returned_checks_policy"
        elif "automatic transfer between accounts" in query_lower:
            return "automatic_transfer"
        elif "access my account through a web browser" in query_lower:
            return "web_access_info"
        elif "temporary increase in my withdrawal limit" in query_lower:
            return "temp_withdrawal_limit_increase"
        elif "account protection services" in query_lower:
            return "account_protection"
        elif "mortgage application" in query_lower:
            return "mortgage_application"
        elif "unauthorized transaction" in query_lower:
            return "unauthorized_transaction_report"
        elif "personal loan" in query_lower:
            return "personal_loan_inquiry"
        elif "account reconciliation" in query_lower:
            return "account_reconciliation"
        elif "loan statement" in query_lower:
            return "loan_statement_request"
        elif "account type" in query_lower:
            return "change_account_type"
        elif "suspicious call" in query_lower:
            return "suspicious_call"
        elif "temporary overdraft limit increase" in query_lower:
            return "temp_overdraft_limit_increase"
        elif "copy of my account contract" in query_lower:
            return "account_contract_request"
        elif "hello" in query_lower or "hi" in query_lower or "good morning" in query_lower:
            return "greeting"
        elif "bye" in query_lower or "goodbye" in query_lower or "see you later" in query_lower:
            return "goodbye"
        elif "tell me about your services" in query_lower:
            return "about_services"
        else:
            return "general_inquiry" 


    def chat(self, user_input):
        """
        Processes user input and generates a chatbot response.
        """
        print(f"\nUser: {user_input}")

        # 1. Preprocessing
        processed_input = self.preprocessor.preprocess(user_input)
        print(f"Processed Input: '{processed_input}'")

        # 2. Intent Recognition
        intent = self.intent_recognizer.predict_intent(processed_input)
        print(f"Detected Intent: {intent}")

        # 3. Named Entity Recognition
        entities = self.ner_extractor.extract_entities(user_input)
        print(f"Extracted Entities: {entities}")

        # 4. Sentiment Analysis
        sentiment = self.sentiment_analyzer.analyze_sentiment(user_input)
        print(f"Detected Sentiment: {sentiment}")

        # 5. Dialogue Management
        # Pass the original user_input to dialogue manager for direct response lookup
        dialogue_response = self.dialogue_manager.manage_dialogue(intent, entities, user_input)
        print(f"Dialogue Manager Response: {dialogue_response}")

        # 6. Response Generation
        final_response = self.response_generator.generate_response(
            intent, entities, sentiment, dialogue_response
        )
        print(f"Chatbot: {final_response}")
        return final_response

In [11]:
# --- Main execution loop for demonstration ---
if __name__ == "__main__":
    print("Initializing Banking Chatbot...")
    # Pass the dataset path to the chatbot constructor
    chatbot = BankingChatbot(dataset_path="C:/Users/rahma/Downloads/Dataset_Banking_chatbot.csv")

    print("\n--- Start Chatting (type 'quit' to exit) ---")
    while True:
        user_message = input("You: ")
        if user_message.lower() == 'quit':
            print("Chatbot: Goodbye!")
            break
        chatbot.chat(user_message)


Initializing Banking Chatbot...
Loading and training models from CSV...
An unexpected error occurred during model loading: 'utf-8' codec can't decode byte 0x92 in position 3083: invalid start byte
Chatbot will operate with limited functionality (generic fallbacks).

--- Start Chatting (type 'quit' to exit) ---

User: view my account history
Processed Input: 'view account history'
Detected Intent: unknown_intent
Extracted Entities: {}
Detected Sentiment: neutral
Dialogue Manager Response: I'm not sure how to help with that. Could you please rephrase or ask about a different banking topic?
Chatbot: I'm not sure how to help with that. Could you please rephrase or ask about a different banking topic?

User: forgot my online banking password
Processed Input: 'forgot online banking password'
Detected Intent: unknown_intent
Extracted Entities: {}
Detected Sentiment: neutral
Dialogue Manager Response: I'm not sure how to help with that. Could you please rephrase or ask about a different bankin