In [38]:
pip install requests beautifulsoup4 dash pandas plotly



In [39]:
import requests
from bs4 import BeautifulSoup
import re
import json

In [40]:
# Base URL template
base_url = "https://raw.githubusercontent.com/TRiZKy/CloudAccessBear/main/pages/page_{}.html"

In [41]:
!pip install firebase
from firebase import firebase

# Initialize Firebase connection using the URL
FBconn = firebase.FirebaseApplication('https://cloud-computing-course-8815f-default-rtdb.europe-west1.firebasedatabase.app/', None)



In [42]:
# Function to fetch pages
def fetch_pages(start=1, end=33):
    pages_content = {}  # Dictionary to store fetched page contents
    for i in range(start, end + 1):
        url = base_url.format(i)
        try:
            response = requests.get(url)
            if response.status_code == 200:
                pages_content[f"page_{i}"] = BeautifulSoup(response.text, "html.parser")  # Parse HTML content
            else:
                print(f"Failed to fetch page {i}: Status code {response.status_code}")
        except Exception as e:
            print(f"Error fetching page {i}: {str(e)}")

    return pages_content

In [43]:
# Function to extract data from a single page
def extract_data(page):
    if page is None:
        return []

    products = []
    for product in page.select(".fy23-search-card.m-gallery-product-item-v2.J-search-card-wrapper.fy23-list-card.searchx-offer-item"):
        title_element = product.select_one(".search-card-e-title a") #URL LINK
        span_element = product.select_one(".search-card-e-title span") # NAME OF PRODUCT
        price_element = product.select_one(".search-card-e-price-main") # PRICE
        image_element = product.select_one(".search-card-e-slider div a img") #IMAGE
        rating_element = product.select_one(".search-card-e-review") #RATING

        if title_element and span_element and price_element and image_element:
            product_data = {
                "title": span_element.get_text(strip=False),
                "url": "https://" + title_element.get('href')[2:],  # Ensure 'url' format matches
                "price": price_element.get_text().split("-")[0].strip(),  # Strip extra whitespace
                "image": "https://" + image_element.get('src')[2:],  # Ensure 'image' format matches
                "rating": rating_element.get_text(strip=False) if rating_element else "NA"
            }
            products.append(product_data)

    return products


In [44]:
def remove_exact_duplicates(products):
    seen = set()
    unique_products = []
    for product in products:
        identifier = (product['title'], product['url'])
        if identifier not in seen:
            seen.add(identifier)
            unique_products.append(product)
    return unique_products


In [45]:
# Synonym dictionary
synonyms = {
    "mobile": ["phone", "cellphone", "smartphone", "handset"],
    "screen": ["display", "monitor", "panel"],
    "battery": ["power source", "energy cell", "power pack"],
    "charger": ["adapter", "power brick", "power adapter"],
    "case": ["cover", "shell", "protector"],
    "tools": ["kit", "toolkit", "equipment"],
    "repair": ["fix", "maintenance", "service"],
    "glass": ["screen protector", "tempered glass", "shield"],
    "cable": ["wire", "connector", "cord"],
    "headphones": ["earphones", "headset", "audio device"],
    "speaker": ["sound system", "audio output", "woofer"]
}

In [46]:
def remove_stop_words(index):
    stop_words = {'a', 'an', 'the', 'and', 'or', 'in', 'on', 'at', 'to'}
    # Create a new index excluding stop words
    filtered_index = {word: data for word, data in index.items() if word not in stop_words}
    return filtered_index

In [47]:
from nltk.stem import PorterStemmer

def apply_stemming(index):
    stemmer = PorterStemmer()
    stemmed_index = {}
    for word, data in index.items():
        # Skip stemming for alphanumeric tokens like s22
        if re.match(r'\w*\d+\w*', word):  # Matches alphanumeric tokens
            stemmed_word = word
        else:
            stemmed_word = stemmer.stem(word)

        if stemmed_word not in stemmed_index:
            stemmed_index[stemmed_word] = {"DocIDs": [], "details": []}
        # Merge the data for the stemmed word
        stemmed_index[stemmed_word]["DocIDs"].extend(data["DocIDs"])
        stemmed_index[stemmed_word]["details"].extend(data["details"])
    return stemmed_index

In [48]:
def build_index(database):
    transformed_index = []
    temp_index = {}

    for doc_id, product in enumerate(database):
        # Check if the product has a valid title
        if 'title' not in product or not product['title']:
            print(f"Skipping product at index {doc_id} due to missing title.")
            continue

        # Use title for indexing
        content = product['title']
        words = set(re.findall(r'\w+', content.lower()))

        for word in words:
            if word not in temp_index:
                temp_index[word] = {"DocIDs": [], "details": []}
            temp_index[word]["DocIDs"].append(doc_id)
            temp_index[word]["details"].append({
                "title": product["title"],
                "url": product["url"],
                "price": product["price"],
                "image": product["image"],
                "rating": product["rating"],
                "DocID": doc_id
            })



    # Remove stop words
    temp_index = remove_stop_words(temp_index)

    # Apply stemming
    temp_index = apply_stemming(temp_index)

    # Transform the temporary index into the required format
    for term, data in temp_index.items():
        transformed_index.append({
            "term": term,
            "DocIDs": data["DocIDs"],
            "details": data["details"]
        })


    return transformed_index


In [49]:
def expand_query_with_synonyms(query):
    query_words = re.findall(r'\w+', query.lower())
    expanded_words = set(query_words)

    for word in query_words:
        for key, synonym_list in synonyms.items():
            if word == key or word in synonym_list:
                expanded_words.update([key] + synonym_list)
    return list(expanded_words)

In [50]:
def search_index(index, query):
    # Expand the query using synonyms
    query_words = expand_query_with_synonyms(query)

    # Apply stemming to the query words
    stemmer = PorterStemmer()
    stemmed_query_words = [stemmer.stem(word) for word in query_words]

    results = {}

    for word in stemmed_query_words:
        # Search the index for each word
        for entry in index:
            if entry["term"] == word:
                # Iterate through titles in entry["details"]
                for detail in entry["details"]:
                    title = detail["title"].lower()
                    if word in title:  # Check if the word is in the title
                        unique_key = (detail["title"], detail["url"])  # Unique identifier
                        if unique_key not in results:
                            results[unique_key] = {
                                "title": detail["title"],
                                "url": detail["url"],
                                "price": detail["price"],
                                "image": detail["image"],
                                "rating": detail["rating"],
                                "match_count": 0  # Initialize match count
                            }
                        results[unique_key]["match_count"] += 1
                break  # Exit the loop since we found the term

    # Sort results by match count in descending order
    sorted_results = dict(sorted(results.items(), key=lambda item: item[1]["match_count"], reverse=True))
    return sorted_results

In [51]:
import difflib

def autocorrect_query(query, index_terms):
    query_words = re.findall(r'\w+', query.lower())  # Tokenize query
    corrected_words = []

    for word in query_words:
        # Find the closest match in the index
        closest_matches = difflib.get_close_matches(word, index_terms, n=1, cutoff=0.8)
        if closest_matches:
            corrected_words.append(closest_matches[0])  # Use the closest match
        else:
            corrected_words.append(word)  # If no match, keep the original word

    # Reconstruct the corrected query
    corrected_query = ' '.join(corrected_words)
    return corrected_query


In [52]:
all_pages_content = fetch_pages()

uses firebase realtime database:

In [53]:
all_products = []
for page_id, page_content in all_pages_content.items():
  products = extract_data(page_content)
  all_products.extend(products)
all_products = remove_exact_duplicates(all_products)

print(f"\nExtracted {len(all_products)} products in total.\n")

result = FBconn.post('/product_data/', all_products)
print(result)

print("Building index...")
index = build_index(all_products)

result = FBconn.post('/index/', index)
print(result)
"""# Upload all_products data
ref.document('all_products').set({'data': all_products})
print("All products uploaded to Firebase Realtime Database successfully!")

print(f"Index size: {len(json.dumps(index))} bytes")

# Chunk size in bytes (1 MB)
chunk_size = 1048576  # 1 MB limit per chunk
chunk_id = 0

def upload_chunks(index, chunk_size):
    current_chunk = []  # To hold the current chunk's items
    current_size = 0  # To track the current chunk's size

    # Go through each item in the index
    for item in index:
        # Calculate the size of the item when serialized to JSON
        item_size = len(json.dumps(item))  # Get the size in bytes

        # Check if adding this item would exceed the chunk size
        if current_size + item_size > chunk_size:
            # If so, upload the current chunk and reset
            upload_chunk(current_chunk)
            current_chunk = [item]  # Start new chunk with the current item
            current_size = item_size  # Reset the current size
        else:
            # Otherwise, add this item to the current chunk
            current_chunk.append(item)
            current_size += item_size  # Increase the chunk size

    # Upload the final chunk if it contains any items
    if current_chunk:
        upload_chunk(current_chunk)

def upload_chunk(chunk):
    global chunk_id
    # For simplicity, upload to Firestore under a unique document
    doc_ref = db.collection('product_data').document(f'index_part_{chunk_id}')
    doc_ref.set({'data': chunk})
    print(f"Uploaded a chunk with {len(chunk)} items.")
    chunk_id += 1

# Upload index data
upload_chunks(index, chunk_size)"""



Extracted 1451 products in total.

{'name': '-OGFciIqzBUHxzHvT4ig'}
Building index...
{'name': '-OGFcilJF3FOUTJD0Ww3'}


'# Upload all_products data\nref.document(\'all_products\').set({\'data\': all_products})\nprint("All products uploaded to Firebase Realtime Database successfully!")\n\nprint(f"Index size: {len(json.dumps(index))} bytes")\n\n# Chunk size in bytes (1 MB)\nchunk_size = 1048576  # 1 MB limit per chunk\nchunk_id = 0\n\ndef upload_chunks(index, chunk_size):\n    current_chunk = []  # To hold the current chunk\'s items\n    current_size = 0  # To track the current chunk\'s size\n\n    # Go through each item in the index\n    for item in index:\n        # Calculate the size of the item when serialized to JSON\n        item_size = len(json.dumps(item))  # Get the size in bytes\n\n        # Check if adding this item would exceed the chunk size\n        if current_size + item_size > chunk_size:\n            # If so, upload the current chunk and reset\n            upload_chunk(current_chunk)\n            current_chunk = [item]  # Start new chunk with the current item\n            current_size = i

#Patterns for Chatbot and initialization

In [54]:
patterns = [
    # General greetings and inquiries
    (r'(?i)(hi|hello|hey)', ['Hello! How can I assist you?', 'Hi there! Looking for something specific?']),
    (r'(?i)how are you\??', ['I\'m great, thank you! How can I help you today?']),
    (r'(?i)(.*)\bname\b(.*)', ['You can call me ChatGPT.', 'I go by the name ChatGPT.']),
    (r'(?i)(bye|goodbye)', ['Goodbye! Come back if you need more help.', 'See you later! Take care!']),
    (r'(?i)what can you do\??', ['I can chat with you and answer simple questions!', 'I\'m a chatbot here to assist you.']),
    (r'(?i)(thank you|thanks)', ['You\'re welcome!', 'No problem!', 'My pleasure!']),
    (r'(?i)how old are you\??', ['I\'m a computer program, so I don\'t have an age!', 'Age is just a number for chatbots!']),
    (r'(?i)(.*) weather (.*)', ['I cannot check the weather, but I hope it\'s nice where you are!']),
    (r'(?i)(.*) help (.*)', ['I can assist you with basic queries. How can I help you today?']),
    (r'(?i)(.*) created you\??', ['I was created by OpenAI.', 'I am a product of OpenAI.']),
    (r'(?i)(.*) your favorite (.*)', ['I don\'t have preferences, but I can help you with information about many things!']),

    # Alibaba general queries
    (r'(?i)\bwhat is Alibaba\b', [
        'Alibaba is a global e-commerce platform where you can find a variety of products, including repair tools, electronics, and more.',
        'Alibaba is a marketplace for businesses and consumers to find and purchase a wide range of products.']),
    (r'(?i)(.*)\bAlibaba products\b(.*)', [
        'Alibaba offers a wide range of products, including tools for mobile phone repair, industrial equipment, and more.',
        'You can find almost anything on Alibaba, from small items like screwdrivers to large machinery.']),
    (r'(?i)(.*)\bAlibaba services\b(.*)', [
        'Alibaba provides services like wholesale product sourcing, logistics, and secure payment solutions.',
        'Alibaba specializes in connecting buyers and suppliers globally, offering tools for efficient trade.']),
    (r'(?i)(.*)\bAlibaba help\b(.*)', [
        'You can visit Alibaba\'s support page for assistance with orders and account issues.',
        'For help with Alibaba, you can explore their FAQ section or contact customer support.']),

    # Phone-related queries
    (r'(?i)(.*)\bcommon phone searches\b(.*)', [
        'The most common phone-related searches include repair tools, screen savers, and soldering equipment.',
        'Popular searches include screen separators, precision screwdrivers, and multi-functional repair kits.']),
    (r'(?i)(.*)\bphone tools\b(.*)', [
        'For phone tools, you can explore items like [Precision Screwdriver Sets](https://www.alibaba.com/product-detail/117-in-1-Cell-Phone-Repair_1601141576453.html).',
        'You might find [LCD Screen Separators](https://www.alibaba.com/) and soldering kits helpful for phone repair.']),
    (r'(?i)(.*)\bmost popular product\b(.*)', [
        'Some of the most popular products include the 117-in-1 repair kit and LCD screen separators. You can check them on Alibaba.',
        'Popular products for phone repair include screen separators and magnetic screwdriver kits.']),
    (r'(?i)(.*)\bphone repair\b(.*)', [
        'For phone repair, you may need tools like soldering irons, screen protectors, and screen separators.',
        'You can find comprehensive phone repair kits on Alibaba. Check out options [here](https://www.alibaba.com).']),

    # General suggestions about products from index
    (r'(?i)(.*)\bscreen savers\b(.*)', [
        'Screen savers are easy to install. Clean your screen, align the saver, and gently apply it.',
        'Screen protectors can prevent scratches and cracks. Make sure to use a dust-free environment when applying.']),
    (r'(?i)(.*)\bsoldering station\b(.*)', [
        'Soldering stations are essential for phone repairs. Check out this [popular option](https://www.alibaba.com/product-detail/Electric-Soldering-Iron-With-Temperature-Control_1600891820879.html).',
        'For soldering stations, look for models with temperature control for precise work.']),
    (r'(?i)(.*)\bseparator machine\b(.*)', [
        'Screen separator machines help in repairing LCDs and removing broken screens. Here\'s an option: [Screen Separator](https://www.alibaba.com/).',
        'A separator machine can make screen replacement easier. Explore options on Alibaba.']),
]


In [55]:
import nltk
from nltk.chat.util import Chat, reflections

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('wordnet')
# Create a chatbot
chatbot = Chat(patterns, reflections)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


#Imports for UI

In [78]:
from dash import Dash, dcc, html, Input, Output, State, ctx
import json
from nltk.stem import PorterStemmer
import re
import matplotlib.pyplot as plt
import base64
from io import BytesIO
import numpy as np
from wordcloud import WordCloud

#Initialize the Dash app

In [79]:
app = Dash(__name__, suppress_callback_exceptions=True)

# Layout for the Search Page

In [80]:
search_page = html.Div(
    style={
        "min-height": "800px",
        "height": "auto",
        "width": "100%",
        "display": "flex",
        "flexDirection": "column",
        "alignItems": "center",
        "background": "#f8f9fa",
        "fontFamily": "Arial, sans-serif",
    },
    children=[
        # Header
        html.Div(
            html.H1(
                "Search Engine",
                style={
                    "fontSize": "2.5rem",
                    "color": "#343a40",
                    "margin": "20px 0",
                    "textShadow": "1px 1px 2px rgba(0, 0, 0, 0.2)",
                },
            ),
        ),
        # Navigation Links
        html.Div(
            children=[
                dcc.Link("Search Page", href="/", style={"marginRight": "20px", "textDecoration": "none", "color": "#007bff"}),
                dcc.Link("Manager Page", href="/manager", style={"marginRight": "20px", "textDecoration": "none", "color": "#007bff"}),
                dcc.Link("Chat Bot", href="/chatbot", style={"marginRight": "20px", "textDecoration": "none", "color": "#007bff"}),
                dcc.Link("Statistics Page", href="/statistics", style={"textDecoration": "none", "color": "#007bff"}),
            ],
            style={"marginBottom": "20px"},
        ),
        # Search Bar
        html.Div(
            style={"position": "relative", "width": "60%"},
            children=[
                dcc.Input(
                    id="search-query",
                    type="text",
                    placeholder="Type your query here...",
                    style={
                        "width": "100%",
                        "padding": "10px",
                        "fontSize": "1.1rem",
                        "borderRadius": "25px",
                        "border": "1px solid #ddd",
                        "boxShadow": "0px 2px 5px rgba(0,0,0,0.1)",
                    },
                ),
            ],
        ),
        # Search Button
        html.Button(
            "Search",
            id="search-button",
            style={
                "padding": "10px 30px",
                "fontSize": "1.1rem",
                "color": "white",
                "backgroundColor": "#007bff",
                "border": "none",
                "borderRadius": "25px",
                "marginTop": "15px",
                "cursor": "pointer",
                "boxShadow": "0px 2px 5px rgba(0,0,0,0.2)",
                "transition": "background-color 0.3s ease",
            },
        ),
        # Results Section with Loading Spinner
        dcc.Loading(
            id="loading-results",
            type="circle",
            fullscreen=False,
            children=[
                html.Div(
                    id="search-results",
                    style={
                        "width": "60%",
                        "marginTop": "20px",
                        "justify-self":"center",
                        "min-height":"400px",
                    },
                ),
            ],
        ),
        # Load More Button
        html.Button(
            "Load More",
            id="load-more-button",
            n_clicks=0,
            style={
                "padding": "10px 30px",
                "fontSize": "1.1rem",
                "color": "white",
                "backgroundColor": "#007bff",
                "border": "none",
                "borderRadius": "25px",
                "marginTop": "15px",
                "cursor": "pointer",
                "boxShadow": "0px 2px 5px rgba(0,0,0,0.2)",
                "transition": "background-color 0.3s ease",
            }
        ),
        dcc.Store(id="results-store", data={"results": [], "displayed": 0}),
    ],
)

# Layout for the Manager Page

In [81]:
manager_page = html.Div(
    style={
        "display": "flex",
        "flexDirection": "row",
        "justifyContent": "space-between",
        "alignItems": "flex-start",
        "background": "#f8f9fa",
        "fontFamily": "Arial, sans-serif",
        "padding": "20px",
        "height": "100vh",
        "overflow": "hidden",
    },
    children=[
        html.Div(
            style={"width": "50%", "max-width": "400px"},
            children=[
                html.Div(
                    html.H1(
                        "Index Manager",
                        style={
                            "fontSize": "2.5rem",
                            "color": "#343a40",
                            "marginBottom": "20px",
                            "textShadow": "1px 1px 2px rgba(0, 0, 0, 0.2)",
                        },
                    ),
                ),
                dcc.Link(
                    "Back to Search Page",
                    href="/",
                    style={"textDecoration": "none", "color": "#007bff"},
                ),
                # Term dropdown (existing)
                dcc.Dropdown(
                    id="term-dropdown",
                    options=[],
                    placeholder="Select a term",
                    style={"width": "100%", "marginBottom": "20px"},
                ),
                # Synonym management section
                html.Div(
                    style={"marginBottom": "20px", "padding": "15px", "border": "1px solid #dee2e6", "borderRadius": "5px"},
                    children=[
                        html.H3("Synonym Management", style={"marginBottom": "10px"}),
                        # Current synonyms dropdown
                        dcc.Dropdown(
                            id="synonym-dropdown",
                            multi=True,
                            placeholder="Current synonyms",
                            style={"width": "100%", "marginBottom": "10px"},
                        ),
                        # New synonym input
                        dcc.Input(
                            id="new-synonym-input",
                            type="text",
                            placeholder="Enter new synonym",
                            style={
                                "width": "94.5%",
                                "marginBottom": "10px",
                                "padding": "8px",
                                "borderRadius": "4px",
                                "border": "1px solid #ced4da",
                            },
                        ),
                        # Add/Remove synonym buttons
                        html.Div(
                            style={"display": "flex", "gap": "10px"},
                            children=[
                                html.Button(
                                    "Add Synonym",
                                    id="add-synonym-button",
                                    style={
                                        "padding": "8px 15px",
                                        "fontSize": "1rem",
                                        "color": "white",
                                        "backgroundColor": "#28a745",
                                        "border": "none",
                                        "borderRadius": "4px",
                                        "cursor": "pointer",
                                    },
                                ),
                                html.Button(
                                    "Remove Selected",
                                    id="remove-synonym-button",
                                    style={
                                        "padding": "8px 15px",
                                        "fontSize": "1rem",
                                        "color": "white",
                                        "backgroundColor": "#dc3545",
                                        "border": "none",
                                        "borderRadius": "4px",
                                        "cursor": "pointer",
                                    },
                                ),
                            ],
                        ),
                    ],
                ),
                # DocID management section (existing)
                dcc.Dropdown(
                    id="docid-dropdown",
                    multi=True,
                    placeholder="Select DocIDs to remove",
                    style={"width": "100%", "marginBottom": "20px"},
                ),
                html.Button(
                    "Remove DocIDs",
                    id="remove-docid-button",
                    style={
                        "padding": "10px 30px",
                        "fontSize": "1.1rem",
                        "color": "white",
                        "backgroundColor": "#dc3545",
                        "border": "none",
                        "borderRadius": "25px",
                        "marginBottom": "20px",
                        "cursor": "pointer",
                        "boxShadow": "0px 2px 5px rgba(0,0,0,0.2)",
                        "transition": "background-color 0.3s ease",
                    },
                ),
                html.Div(
                    id="manager-output",
                    style={"width": "100%", "marginTop": "20px"},
                ),
            ],
        ),
        dcc.Loading(
            id="loading-json-viewer",
            type="circle",
            fullscreen=False,
            style={"margin-right": "30vw"},
            children=[
                html.Div(
                    id="json-viewer",
                    style={
                        "width": "auto",
                        "margin-left": "40px",
                        "height": "90vh",
                        "overflowY": "scroll",
                        "padding": "10px",
                        "background": "#ffffff",
                        "borderRadius": "10px",
                        "boxShadow": "0px 4px 10px rgba(0, 0, 0, 0.1)",
                        "fontFamily": "Courier New, monospace",
                        "fontSize": "0.9rem",
                        "color": "#212529",
                        "border": "1px solid #ddd",
                    },
                ),
            ],
        ),
    ],
)

# Layout for the Statistics Page


In [82]:
statistics_page = html.Div(
    style={
        "min-height": "1200px",
        "height": "auto",
        "width": "100%",
        "display": "flex",
        "flexDirection": "column",
        "alignItems": "center",
        "background": "#f8f9fa",
        "fontFamily": "Arial, sans-serif",
    },
    children=[
        # Header
        html.Div(
            html.H1(
                "Statistics",
                style={
                    "fontSize": "2.5rem",
                    "color": "#343a40",
                    "margin": "20px 0",
                    "textShadow": "1px 1px 2px rgba(0, 0, 0, 0.2)",
                },
            ),
        ),
        # Navigation Links
        html.Div(
            children=[
                dcc.Link("Back to Search Page", href="/", style={"marginRight": "20px", "textDecoration": "none", "color": "#007bff"}),
                dcc.Link("Back to Manager Page", href="/manager", style={"textDecoration": "none", "color": "#007bff"}),
            ],
            style={"marginBottom": "20px"},
        ),
        # Loading Spinner for Statistics Content
        dcc.Loading(
            id="loading-statistics",
            type="circle",
            children=[
                html.Div(id="statistics-content", style={"width": "60%", "marginTop": "20px", "justify-self":"center"}),
            ],
        ),
    ],
)

#Layout for Chatbot Page

In [83]:
chatbot_page = html.Div(
    style={
        "min-height": "800px",
        "height": "auto",
        "width": "100%",
        "display": "flex",
        "flexDirection": "column",
        "alignItems": "center",
        "background": "#f8f9fa",
        "fontFamily": "Arial, sans-serif",
        "position": "relative",
        "paddingBottom": "100px",  # Space for input at bottom
    },
    children=[
        html.Div(
            html.H1(
                "Chatbot",
                style={
                    "fontSize": "2.5rem",
                    "color": "#343a40",
                    "margin": "20px 0",
                    "textShadow": "1px 1px 2px rgba(0, 0, 0, 0.2)",
                },
            )
        ),
        # Navigation Links
        html.Div(
            children=[
                dcc.Link("Search Page", href="/",
                         style={"marginRight": "20px", "textDecoration": "none", "color": "#007bff"}),
                dcc.Link("Manager Page", href="/manager",
                         style={"marginRight": "20px", "textDecoration": "none", "color": "#007bff"}),
                dcc.Link("Chat Bot", href="/chatbot",
                         style={"marginRight": "20px", "textDecoration": "none", "color": "#007bff"}),
                dcc.Link("Statistics Page", href="/statistics", style={"textDecoration": "none", "color": "#007bff"}),
            ],
            style={"marginBottom": "20px"},
        ),
        html.Div(
            id="chat-history",
            style={
                "width": "60%",
                "marginTop": "20px",
                "background": "#ffffff",
                "borderRadius": "10px",
                "padding": "15px",
                "height": "auto",
                # "overflowY": "scroll",
                "boxShadow": "0px 4px 10px rgba(0,0,0,0.1)",
                "display": "flex",
                "flexDirection": "column",
            },
            children=[

            ],
        ),
        html.Div(
            style={
                "position": "fixed",
                "bottom": "0",
                "width": "100%",
                "backgroundColor": "#f8f9fa",
                "padding": "10px 0",
                "boxShadow": "0px -2px 5px rgba(0,0,0,0.1)",
                "display": "flex",
                "justifyContent": "center",
            },
            children=[
                html.Div(
                    style={"width": "60%", "display": "flex", "alignItems": "center"},
                    children=[
                        dcc.Input(
                            id="chat-input",
                            type="text",
                            placeholder="Type your message here...",
                            style={
                                "flex": "1",
                                "padding": "10px",
                                "fontSize": "1.1rem",
                                "borderRadius": "25px",
                                "border": "1px solid #007bff",
                                "boxShadow": "0px 2px 5px rgba(0,0,0,0.1)",
                                "marginRight": "10px",
                            },
                        ),
                        html.Button(
                            "Send",
                            id="send-button",
                            style={
                                "padding": "10px 20px",
                                "fontSize": "1.1rem",
                                "color": "white",
                                "backgroundColor": "#007bff",
                                "border": "none",
                                "borderRadius": "25px",
                                "cursor": "pointer",
                                "boxShadow": "0px 2px 5px rgba(0,0,0,0.2)",
                                "transition": "background-color 0.3s ease",
                            },
                        ),
                    ],
                ),
            ],
        ),
    ],
)


# App Layout

In [84]:
app.layout = html.Div(
    [
        dcc.Location(id="url", refresh=False),
        html.Div(id="page-content"),
    ]
)

#Unifed Callback for Redirecting

In [85]:
@app.callback(
    Output("page-content", "children"),
    [Input("url", "pathname")]
)
def display_page(pathname):
    if pathname == "/manager":
        return manager_page
    elif pathname == "/statistics":
        return statistics_page
    elif pathname == "/chatbot":
        return chatbot_page
    else:
        return search_page

# Unified Callback for Search and Load More

In [86]:
from dash import ctx

@app.callback(
    [Output("search-results", "children"), Output("results-store", "data"), Output("load-more-button", "style")],
    [Input("search-button", "n_clicks"), Input("load-more-button", "n_clicks")],
    [State("search-query", "value"), State("results-store", "data")],
)
def update_results(search_clicks, load_more_clicks, query, results_store):
    ITEMS_PER_PAGE = 5
    triggered_id = ctx.triggered_id

    # Original "Load More" button style
    original_button_style = {
        "padding": "10px 30px",
        "fontSize": "1.1rem",
        "color": "white",
        "backgroundColor": "#007bff",
        "border": "none",
        "borderRadius": "25px",
        "margin": "15px",
        "cursor": "pointer",
        "boxShadow": "0px 2px 5px rgba(0,0,0,0.2)",
        "transition": "background-color 0.3s ease",
    }

    if triggered_id == "search-button":
        if not query:
            return html.Div("Please enter a search query.", style={"color": "red"}), {"results": [], "displayed": 0}, {
                **original_button_style,
                "display": "none",
            }

        results = search_index(index, query)
        suggestion_message = ""

        if not results:
            # Autocorrect the query if no results found
            corrected_query = autocorrect_query(query, [entry["term"] for entry in index])
            if corrected_query != query:
                suggestion_message = f"Didn't find '{query}', did you mean '{corrected_query}'?"
                results = search_index(index, corrected_query)

        if not results:
            return html.Div("No results found.", style={"color": "red"}), {"results": [], "displayed": 0}, {
                **original_button_style,
                "display": "none",
            }

        displayed = ITEMS_PER_PAGE
        result_cards = generate_result_cards(list(results.values())[:displayed])

        results_store = {"results": list(results.values()), "displayed": displayed}

        load_more_style = {
            **original_button_style,
            "display": "block" if len(results) > displayed else "none",
        }

        return html.Div([html.P(suggestion_message, style={"color": "blue"}), *result_cards]), results_store, load_more_style

    elif triggered_id == "load-more-button":
        results = results_store["results"]
        displayed = results_store["displayed"]
        new_displayed = displayed + ITEMS_PER_PAGE

        additional_results = results[displayed:new_displayed]
        existing_cards = generate_result_cards(results[:displayed])
        new_cards = generate_result_cards(additional_results)

        results_store["displayed"] = new_displayed

        load_more_style = {
            **original_button_style,
            "display": "block" if new_displayed < len(results) else "none",
        }

        return existing_cards + new_cards, results_store, load_more_style

    return html.Div(), results_store, {**original_button_style, "display": "none"}
def generate_result_cards(results):
    return [
        html.Div(
            style={
                "border": "1px solid #ddd",
                "borderRadius": "10px",
                "padding": "15px",
                "marginBottom": "15px",
                "boxShadow": "0px 2px 5px rgba(0,0,0,0.1)",
                "display": "flex",
                "alignItems": "center",
            },
            children=[
                html.Img(
                    src=detail.get("image", ""),
                    style={"width": "80px", "height": "80px", "borderRadius": "5px", "marginRight": "15px"},
                ),
                html.Div(
                    children=[
                        html.H3(detail.get("title", ""), style={"margin": "0 0 10px 0"}),
                        html.P(f"Price: {detail.get('price', 'N/A')}", style={"margin": "0 0 5px 0"}),
                        html.P(f"Rating: {detail.get('rating', 'N/A')}", style={"margin": "0 0 5px 0"}),
                        html.A("View Product", href=detail.get("url", "#"), target="_blank", style={"color": "#007bff"}),
                    ],
                ),
            ],
        )
        for detail in results
    ]


# Unified Callback to manage index actions and refresh DocIDs dropdown

In [87]:
@app.callback(
    Output("json-viewer", "children",allow_duplicate=True),
    [Input("url", "pathname")],
    prevent_initial_call=True
)
def load_json_viewer_initial(pathname):
    if pathname == "/manager":
        # Simulate delay for loading large JSON data
        import time
        time.sleep(20)  # Simulate data processing time

        # Load and format the large JSON index
        return html.Pre(
            json.dumps(index, indent=2),
            style={"whiteSpace": "pre-wrap", "wordBreak": "break-word"},
        )
    return ""
@app.callback(
    Output("term-dropdown", "options"),
    [Input("url", "pathname")],
)
def populate_term_dropdown(pathname):
    if pathname == "/manager":
        return [{"label": term["term"], "value": term["term"]} for term in index]
    return []

# New callback for synonym management
@app.callback(
    [
        Output("synonym-dropdown", "options"),
        Output("synonym-dropdown", "value"),
        Output("new-synonym-input", "value"),
        Output("manager-output", "children"),
    ],
    [
        Input("term-dropdown", "value"),
        Input("add-synonym-button", "n_clicks"),
        Input("remove-synonym-button", "n_clicks"),
    ],
    [
        State("new-synonym-input", "value"),
        State("synonym-dropdown", "value"),
    ],
)
def manage_synonyms(term, add_clicks, remove_clicks, new_synonym, selected_synonyms):
    if not term:
        return [], [], "", "Please select a term."

    # Initialize outputs
    synonym_options = []
    output_message = ""
    new_synonym_value = ""

    # Find all related synonyms (including the key term)
    expanded_words = set()
    related_key = None

    # First, check if the term is a key in synonyms
    if term in synonyms:
        expanded_words.update([term] + synonyms[term])
        related_key = term
    else:
        # If not a key, check if it's a value in any synonym list
        for key, synonym_list in synonyms.items():
            if term.lower() in [s.lower() for s in synonym_list]:
                expanded_words.update([key] + synonym_list)
                expanded_words.remove(term)
                related_key = key
                break

    if ctx.triggered_id == "add-synonym-button" and new_synonym:
        if not related_key:
            # If no existing group found, create new entry with term as key
            synonyms[term] = [new_synonym]
            expanded_words.update([term, new_synonym])
            related_key = term
            output_message = f"Created new synonym group with '{term}' as key and added '{new_synonym}'"
        else:
            # Add to existing group
            if new_synonym not in synonyms[related_key]:
                synonyms[related_key].append(new_synonym)
                expanded_words.add(new_synonym)
                output_message = f"Added synonym '{new_synonym}' to group with key '{related_key}'"
            else:
                output_message = f"Synonym '{new_synonym}' already exists in this group"
        new_synonym_value = ""  # Clear input field

    elif ctx.triggered_id == "remove-synonym-button" and selected_synonyms:
        if related_key:
            removed_synonyms = []
            for syn in selected_synonyms:
                if syn in synonyms[related_key]:
                    synonyms[related_key].remove(syn)
                    expanded_words.remove(syn)
                    removed_synonyms.append(syn)

            if removed_synonyms:
                output_message = f"Removed synonyms: {', '.join(removed_synonyms)}"
                # If all synonyms removed, consider removing the key itself
                if not synonyms[related_key]:
                    del synonyms[related_key]
                    output_message += f"\nRemoved empty synonym group for '{related_key}'"
            else:
                output_message = "No synonyms were removed"
        else:
            output_message = "No synonym group found for this term"

    # Create options for dropdown, marking the key term
    synonym_options = []
    if related_key:
        # Add the key term first, marked as (KEY)
        synonym_options.append({"label": f"{related_key} (KEY)", "value": related_key})
        # Add all synonyms
        synonym_options.extend([
            {"label": syn, "value": syn}
            for syn in sorted(expanded_words - {related_key})  # Exclude key term as it's already added
        ])

    return synonym_options, None, new_synonym_value, output_message

# Modified existing callback to include synonym updates
@app.callback(
    [
        Output("manager-output", "children", allow_duplicate=True),
        Output("docid-dropdown", "options"),
        Output("docid-dropdown", "value"),
        Output("json-viewer", "children"),
    ],
    [Input("term-dropdown", "value"), Input("remove-docid-button", "n_clicks")],
    [State("docid-dropdown", "value")],
    prevent_initial_call=True,
)
def manage_index_and_update_viewer(term, remove_clicks, docids):
    if not term:
        return (
            "Please select a term.",
            [],
            [],
            html.Pre(
                json.dumps(index, indent=2),
                style={"whiteSpace": "pre-wrap", "wordBreak": "break-word"},
            ),
        )

    if ctx.triggered_id == "term-dropdown":
        for entry in index:
            if entry["term"] == term:
                return (
                    "",
                    [{"label": docid, "value": docid} for docid in entry["DocIDs"]],
                    [],
                    html.Pre(
                        json.dumps(index, indent=2),
                        style={"whiteSpace": "pre-wrap", "wordBreak": "break-word"},
                    ),
                )

    if ctx.triggered_id == "remove-docid-button":
        if not docids:
            return (
                "Please select DocIDs to remove.",
                [],
                [],
                html.Pre(
                    json.dumps(index, indent=2),
                    style={"whiteSpace": "pre-wrap", "wordBreak": "break-word"},
                ),
            )

        docid_list = docids
        for entry in index:
            if entry["term"] == term:
                entry["DocIDs"] = [docid for docid in entry["DocIDs"] if docid not in docid_list]
                entry["details"] = [detail for detail in entry["details"] if detail["DocID"] not in docid_list]
                break

        updated_options = [{"label": docid, "value": docid} for docid in entry["DocIDs"]]
        return (
            f"Removed DocIDs {docid_list} from term '{term}'.",
            updated_options,
            [],
            html.Pre(
                json.dumps(index, indent=2),
                style={"whiteSpace": "pre-wrap", "wordBreak": "break-word"},
            ),
        )

    return (
        "No action performed.",
        [],
        [],
        html.Pre(
            json.dumps(index, indent=2),
            style={"whiteSpace": "pre-wrap", "wordBreak": "break-word"},
        ),
    )

#Unifed Callback for Statistics Page

In [88]:
@app.callback(
    Output("statistics-content", "children"),
    [Input("url", "pathname")]
)
def generate_statistics(pathname):
    if pathname == "/statistics":
        word_counts = {entry["term"]: len(entry["DocIDs"]) for entry in index}
        sorted_word_counts = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
        top10=sorted_word_counts[:10]

        if sorted_word_counts:
            words, counts = zip(*top10)
            fig, ax = plt.subplots(2, 1, figsize=(12, 24))

            # Top 10 Indexed Words
            ax[0].bar(words, counts, alpha=0.7)
            ax[0].set_title("Top 10 Indexed Words", fontsize=13)
            ax[0].set_xlabel("Words", fontsize=12)
            ax[0].set_ylabel("Frequency", fontsize=12)
            ax[0].grid(alpha=0.3)
            ax[0].tick_params(axis='x', rotation=45)

            # Word Cloud
            wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(dict(sorted_word_counts))
            ax[1].imshow(wordcloud, interpolation='bilinear')
            ax[1].axis('off')
            ax[1].set_title("Word Cloud of Indexed Words", fontsize=13)

            plt.tight_layout()
            buf = BytesIO()
            plt.savefig(buf, format="png")
            plt.close(fig)
            buf.seek(0)
            encoded_image = base64.b64encode(buf.read()).decode("utf-8")
            return html.Div([
                html.Img(src=f"data:image/png;base64,{encoded_image}", style={"width": "100%"}),
            ])
    return html.Div()


#Unifed Callback for Chatbot Actions

In [89]:
from dash.dependencies import Input, Output, State
@app.callback(
    [Output("chat-history", "children"),  # Update chat history
     Output("chat-input", "value")],  # Clear the input field
    [Input("send-button", "n_clicks")],
    [State("chat-input", "value"), State("chat-history", "children")]
)
def update_chat(n_clicks, user_input, chat_history):
    if n_clicks and user_input:
        # Format the user's message as a right-aligned chat bubble
        user_bubble = html.Div(
            children=[
                html.Span("You", style={"fontSize": "0.8rem", "color": "#6c757d", "marginLeft": "10px"}),
                html.Div(
                    user_input,
                    style={
                        "alignSelf": "flex-end",
                        "maxWidth": "70%",
                        "margin": "5px 0",
                        "padding": "10px",
                        "backgroundColor": "#007bff",
                        "color": "white",
                        "borderRadius": "15px 15px 0 15px",
                        "boxShadow": "0px 2px 5px rgba(0,0,0,0.1)",
                        "fontSize": "1rem",
                    },
                ),
            ],
            style={"display": "flex", "flexDirection": "column", "alignItems": "flex-end"},
        )

        # Get chatbot's response and format it as a left-aligned chat bubble
        bot_response_text = chatbot.respond(user_input.lower())
        if not bot_response_text:
            bot_response_text = "I'm sorry, I don't understand that."

        bot_bubble = html.Div(
            children=[
                html.Span("Chatto", style={"fontSize": "0.8rem", "color": "#6c757d", "marginLeft": "10px"}),
                html.Div(
                    bot_response_text,
                    style={
                        "alignSelf": "flex-start",
                        "maxWidth": "70%",
                        "margin": "5px 0",
                        "padding": "10px",
                        "backgroundColor": "#5FED75",
                        "color": "#343a40",
                        "borderRadius": "15px 15px 15px 0",
                        "boxShadow": "0px 2px 5px rgba(0,0,0,0.1)",
                        "fontSize": "1rem",
                    },
                ),
            ],
            style={"display": "flex", "flexDirection": "column", "alignItems": "flex-start"},
        )

        # Update chat history with both user and chatbot messages
        chat_history = chat_history or []
        chat_history.append(user_bubble)
        chat_history.append(bot_bubble)

        # Return updated chat history and clear the input field
        return chat_history, ""

    # Return current chat history and keep the input field unchanged
    return chat_history, ""




# Run the app

In [90]:
app.run_server(debug=True, port=8050)

<IPython.core.display.Javascript object>