<a href="https://colab.research.google.com/github/Omar-Elhadidi/icthub-chatbot/blob/main/Final_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
   !pip install gradio pandas numpy scikit-learn nltk openpyxl



In [2]:
# ICT Hub Chatbot - Google Colab Version
# This version is optimized for running in Google Colab

import gradio as gr
import pandas as pd
import numpy as np
import random
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import LabelEncoder
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import warnings
warnings.filterwarnings('ignore')

# Download required NLTK data
print("Downloading NLTK data...")
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)
nltk.download('punkt_tab', quiet=True) # Download punkt_tab resource

print("Loading dataset...")
# Load the dataset
# Make sure 'icthub_dataset.xlsx' is in the same directory as your Python script
# or provide the full path to the file.
try:
    df = pd.read_excel('icthub_dataset.xlsx')
    print(f"Dataset loaded with {len(df)} entries")
except FileNotFoundError:
    print("Error: 'icthub_dataset.xlsx' not found. Please ensure the dataset file is in the correct location.")
    # Create a dummy DataFrame or exit if the dataset is essential for operation
    df = pd.DataFrame({'User Input': [], 'Category': [], 'Chatbot Response': []})
    exit("Exiting: Dataset file not found.")


# Initialize preprocessing components
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocessing(text):
    """Preprocess the input text"""
    # Convert to lowercase
    text = text.lower()

    # Remove special characters and numbers
    text = re.sub(r'[^a-zA-Z\s]', '', text)

    # Tokenize
    tokens = word_tokenize(text)

    # Remove stopwords and lemmatize
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words]

    # Join tokens back
    return ' '.join(tokens)

print("Preprocessing data...")
# Prepare the data for training
if not df.empty:
    df['Processed_Text'] = df['User Input'].apply(preprocessing)

    # Create TF-IDF vectorizer
    tfidf_vec = TfidfVectorizer(max_features=1000, ngram_range=(1, 2))

    # Fit the vectorizer on processed text
    X = tfidf_vec.fit_transform(df['Processed_Text'])

    # Encode labels
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(df['Category'])

    print("Training model...")
    # Train the model
    model = MultinomialNB()
    model.fit(X, y)

    # Get known words from vocabulary
    known_words = list(tfidf_vec.vocabulary_.keys())
    print(f"Model trained successfully! Vocabulary size: {len(known_words)}")
else:
    print("No data to preprocess or train model. Please check 'icthub_dataset.xlsx'.")
    tfidf_vec = None
    model = None
    label_encoder = None
    known_words = []


def is_similar_to_known_words(user_input, min_matches=1):
    """Check if user input is similar to known words"""
    if not known_words: # Handle case where model wasn't trained
        return False
    words = user_input.split()
    match_count = 0
    for word in words:
        close = difflib.get_close_matches(word, known_words, n=1, cutoff=0.75)
        if close:
            match_count += 1
    return match_count >= min_matches

def chatbot_response(user_input):
    """Generate chatbot response"""
    if model is None or tfidf_vec is None or label_encoder is None:
        return "Chatbot is not fully initialized. Please ensure the dataset is loaded correctly."

    # Make preprocessing
    processed_input = preprocessing(user_input)

    # Check similar
    if not is_similar_to_known_words(processed_input):
        return "Sorry, I didn't understand that. Could you please rephrase your question?"

    # Convert to number
    user_input_vectorized = tfidf_vec.transform([processed_input])

    # Predict the class
    predicted_category = model.predict(user_input_vectorized)
    predicted_category = label_encoder.inverse_transform(predicted_category)[0]

    # Map the answer and make random answer
    responses = df.groupby('Category')['Chatbot Response'].apply(list).to_dict()

    if predicted_category in responses:
        response = random.choice(responses[predicted_category]).strip()
        if len(response) > 0:
            return response
        else:
            return "Sorry, I didn't understand that."
    else:
        return "Sorry, I couldn't find a suitable response for that."

# Create the Gradio interface
with gr.Blocks(
    title="ICT Hub Chatbot",
    theme=gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="gray",
    ),
    css="""
    /* Ensure the body and html take full height and width, removing default margins/padding */
    html, body {
        height: 100%;
        margin: 0;
        padding: 0;
    }

    /* Main container for Gradio app. Removed max-width to allow full horizontal stretch. */
    .gradio-container {
        max-width: none !important; /* This is the key change: remove max-width */
        width: 100% !important; /* Ensure it takes full available width */
        margin: 0 !important; /* Remove auto margin to align to edges */
        padding: 20px; /* Add some padding around the content for aesthetics */
        box-sizing: border-box; /* Include padding in the element's total width */
        min-height: 100vh; /* Ensure it takes at least the full viewport height */
        display: flex;
        flex-direction: column;
        align-items: center; /* Center content horizontally within the full width */
    }

    /* Adjust specific components to take full width within their parent */
    .gradio-html, .gradio-textbox, .gradio-button, .gradio-chatbot {
        width: 100% !important;
    }

    /* Adjust the chat message input row to align correctly */
    .gradio-row {
        width: 100%; /* Ensure rows take full width */
        justify-content: center; /* Center items within the row */
        align-items: center;
        flex-wrap: wrap; /* Allow items to wrap on smaller screens */
    }

    /* Specific adjustment for the logo and title row to align elements */
    .gradio-row:first-child { /* Targets the first row which contains logo and title */
        justify-content: space-between; /* Distribute logo and title with space between */
        padding: 0 10px; /* Add slight padding if needed */
    }

    /* General styling for message input and send button area */
    .gradio-row > *:not(.gradio-button) { /* Apply to textbox, but not the button */
        flex-grow: 1; /* Allow textbox to grow */
    }

    .gradio-chatbot {
        flex-grow: 1; /* Allow chatbot to take available vertical space */
    }

    /* Adjust welcome message */
    .gr-html.welcome-message {
        width: 100%;
        text-align: center;
    }
    """
) as demo:

    # Header with logo and title
    with gr.Row():
        try:
            # Check if icthub_logo.png exists in the current directory
            # If not, the 'except' block will handle it.
            with open("icthub_logo.png", "rb") as f:
                pass # Just trying to open to check existence
            gr.Image("icthub_logo.png", show_label=False, container=False, height=50, interactive=False)
        except FileNotFoundError:
            # If logo not found, create a placeholder
            gr.HTML("""
            <div style="width: 50px; height: 50px; background: #1976d2; border-radius: 50%; display: flex; align-items: center; justify-content: center; color: white; font-weight: bold;">
                ICT
            </div>
            """)

        gr.HTML("""
        <div style="text-align: center; padding: 15px;">
            <h2 style="color: #1976d2; margin: 0;">ICT Hub Chatbot</h2>
            <p style="color: #666; margin: 5px 0; font-size: 14px;">Your AI Assistant</p>
        </div>
        """)

    # Welcome message
    gr.HTML("""
    <div class="gr-html welcome-message" style="text-align: center; padding: 8px; background: #e8f5e8; border-radius: 8px; margin: 8px 0;">
        <p style="margin: 0; color: #2e7d32; font-size: 14px;">Welcome! Ask me about ICT Hub services!</p>
    </div>
    """)

    # Chat interface
    chatbot = gr.Chatbot(
        label="Chat", # Gradio might show "Chat History" by default, depending on version/theme
        height=300,
        show_label=True,
        container=True,
        bubble_full_width=False
    )

    # Input area
    with gr.Row():
        msg = gr.Textbox(
            label="Message",
            placeholder="Type your message...",
            scale=3,
            show_label=False
        )
        send_btn = gr.Button("Send", variant="primary", scale=1, size="sm")

    # Footer with information
    gr.HTML("""
    <div style="text-align: center; padding: 10px; color: #666; font-size: 11px;">
        <p>Powered by AI • ICT Hub Services</p>
    </div>
    """)

    # State to disable input after exit
    input_enabled = gr.State(True)

    # Event handlers
    def user_input(message, history, input_enabled):
        if not input_enabled:
            return "", history, gr.update(interactive=False)
        if message.strip().lower() == 'exit':
            history.append([message, "Goodbye! Chat ended. Refresh to start again."])
            return "", history, gr.update(interactive=False, value="") # Clear textbox and disable
        return "", history + [[message, None]], gr.update(interactive=True)

    def bot_response(history, input_enabled):
        if not input_enabled:
            return history, gr.update(interactive=False)
        if history[-1][1] is None:
            response = chatbot_response(history[-1][0])
            if response == '__EXIT__': # Placeholder for potential future exit logic from chatbot_response
                history[-1][1] = "Goodbye! Chat ended. Refresh to start again."
                return history, gr.update(interactive=False)
            history[-1][1] = response
        return history, gr.update(interactive=True)

    # Link events to components
    msg.submit(user_input, [msg, chatbot, input_enabled], [msg, chatbot, msg], queue=False).then(
        bot_response, [chatbot, input_enabled], [chatbot, msg]
    )

    send_btn.click(user_input, [msg, chatbot, input_enabled], [msg, chatbot, msg], queue=False).then(
        bot_response, [chatbot, input_enabled], [chatbot, msg]
    )

print("Starting the chatbot interface...")
# Launch the app
demo.launch(
    share=True,  # For Colab compatibility, generates a public sharable link
    debug=True,  # For Colab error display
    show_error=True,
    server_name="0.0.0.0", # Required for external access in Colab
    server_port=7860 # Default Gradio port
)

Downloading NLTK data...
Loading dataset...
Dataset loaded with 715 entries
Preprocessing data...
Training model...
Model trained successfully! Vocabulary size: 957
Starting the chatbot interface...
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://26f2134b671407669d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 0.0.0.0:7860 <> https://26f2134b671407669d.gradio.live
Keyboard interruption in main thread... closing server.
Killing tunnel 0.0.0.0:7860 <> https://26f2134b671407669d.gradio.live




