# Backend: Crawler

---



### Imports


In [None]:
%%capture
# installations and imports
import requests
from bs4 import BeautifulSoup
import re
from collections import defaultdict
import nltk
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('punkt_tab')
from nltk.corpus import stopwords
from collections import deque

!pip install firebase
from firebase import firebase

### **Upload Object Service**



In [None]:
class UploadObjectService:

  def __init__(self, postprocess_service):
    self.OBJECT_TO_UPLOAD = {}
    self.postprocess_service = postprocess_service

  def update_upload_object(self,dictionary,url):
    """
    Adds new page data to the upload object, organizing by terms.

    Args:
        dictionary (dict): Terms with their count and score
        url (str): Source URL for the terms
    """
    #print("--> update_upload_object")
    for key, list_of_count_and_score in dictionary.items():
      data_to_add = {"link": url, "count": list_of_count_and_score[0], "score": list_of_count_and_score[1]}
      if key in self.OBJECT_TO_UPLOAD.keys():
        self.OBJECT_TO_UPLOAD[key]['links'].append(data_to_add)  # adds to existing list
      else:
        self.OBJECT_TO_UPLOAD[key] = {"links": [data_to_add]}    # creates new property

  def upload_object(self):
    """
    Processes and uploads accumulated data to Firebase.
    Handles network and Firebase-related errors.
    Clears upload object after successful upload.
    """
    try:
      self.OBJECT_TO_UPLOAD, statistics_of_crawler, metadata = self.postprocess_service.postprocess(self.OBJECT_TO_UPLOAD)

      if FBconn.put('/', "statistics", statistics_of_crawler) and FBconn.put('/',"metadata", metadata) and FBconn.put('/',"dbDocs", self.OBJECT_TO_UPLOAD):
        print("Upload Successful")
        self.OBJECT_TO_UPLOAD = {}
      else:
        print("Upload failed - likely a permissions error")
    except requests.exceptions.RequestException as e:
      print(f"Network error during upload attempt: {e}")
    except Exception as e:
      print(f"Firebase error during upload attempt: {e}")

### **Dictionary Creation Service**

In [None]:
class DictionaryCreationService:

  def create_dictionary(self, words, word_count):
      """
      Creates a dictionary of words with their counts and position-based scores.

      Args:
          words (list): List of preprocessed words from the page
          word_count (int): Total number of words on the page

      Returns:
          dict: Dictionary with words as keys and [count, score] as values
      """
      count_dictionary = self.create_dictionary_with_indexes(words)
      return self.transform_indexes_in_dict_to_scores(count_dictionary, word_count)

  def create_dictionary_with_indexes(self, words):
      """
      Creates initial dictionary tracking word counts and positions.

      Args:
          words (list): List of preprocessed words

      Returns:
          dict: Dictionary with format {word: [count, [positions]]}
      """
      dictionary = defaultdict(int)
      # For each word in the list of pre-processed words
      for index, word in enumerate(words):
        # If it does not exist in the dictionary already, create the key-value pair and add the current word index to the list
        if word not in dictionary:
          dictionary[word] = [1, []]
          dictionary[word][1].append(index)
        # Else, just increment the count and append the index into the list
        else:
          dictionary[word][0] += 1
          dictionary[word][1].append(index)
      return dictionary

  # Calculate the score of each word in the current page's dictionary
  def transform_indexes_in_dict_to_scores(self, dictionary_of_page, total_words):
      """
      Converts word position indexes into relevance scores.
      Earlier positions receive higher scores.

      Args:
          dictionary_of_page (dict): Dictionary with word counts and positions
          total_words (int): Total word count in the page

      Returns:
          dict: Dictionary with format {word: [count, score]}
      """

      # For each word in the dictionary for the current page
      for key in dictionary_of_page.keys():
          # Get number of appearences of the word in this page
          count = dictionary_of_page[key][0]
          # Get a list of indexes of the current word in the page
          positions = dictionary_of_page[key][1]
          total_score = 0

          # Calculate the position score for each occurrence of the word => The higher the score, the better
          for index in positions:
              position_score = 1 - (index / total_words)
              total_score += position_score

          # Replace the index list with the total score
          dictionary_of_page[key][1] = round(total_score, 3)

      return dictionary_of_page

### **Pre Process Service**

In [None]:
class PreprocessService:

  def preprocess_text(self, text):
    """
    Args:
      text: string
    Returns:
      list of preprocessed words
    """
    #print("--> preprocess_text")
    text = self.clean_text_string(text)

    # Tokenize (basically split by spaces)
    words = nltk.word_tokenize(text)
    # remove irrelevent strings
    words = self.remove_stop_words(words)
    words = self.remove_single_chars(words)
    words = self.remove_number_strings(words)

    return words

  def clean_text_string(self, text):
    """
    Processes raw text through multiple cleaning steps.

    Args:
        text (str): Raw text content from webpage
    Returns:
        list: Cleaned and filtered words
    """
    #print("---> clean_text_string")

    # Remove punctuation
    # EXAMPLE: "hi, myName 0is0" --> "hi myName 0is0"
    text = re.sub(r'[^\w\s]', ' ', text)

    # split camelcase and pascalcase
    # EXAMPLE: "hi  myName 0is0" --> "hi  my Name 0is0"
    text = re.sub(r'(?<!^)(?<![\W\d_])([A-Z])', r' \1', text)

    # Convert to lowercase
    # EXAMPLE: "hi  my Name 0is0" --> "hi  my name 0is0"
    text = text.lower()

    # Split between digits and letters (both directions)
    # EXAMPLE: "hi  my name 0is0" --> "hi  my name 0 is 0"
    text = re.sub(r'(\d+)([a-z])', r'\1 \2', text)  # digits followed by letters
    text = re.sub(r'([a-z])(\d+)', r'\1 \2', text)  # letters followed by digits

    return text


  def remove_stop_words(self, words):
    STOP_WORDS = set(stopwords.words('english')) # List of english stop words
    return [word for word in words if word not in STOP_WORDS]

  def remove_single_chars(self, words):
    return [word for word in words if len(word) > 1]

  def remove_number_strings(self, words):
    return [word for word in words if not word.isdigit()]

### **Post Process Service**

In [None]:
class PostprocessService:

  def __init__(self):
    self.action_terms = [
          "build", "deploy", "create", "configure", "integrate", "manage", "scale",
          "test", "update", "connect", "provision", "launch", "clone", "allocate",
          "enable", "query", "retrieve", "replicate", "execute", "optimize", "monitor",
          "share", "collaborate", "contribute", "invite", "access"
      ]
    self.restrictive_terms = [
          "prohibit", "terminate", "restrict", "limit", "revoke", "suspend", "forbid",
          "deny", "disallow", "cease", "throttle", "cap", "exceed", "disable", "block",
          "prevent", "restrictapi", "limitbandwidth", "resourcecap", "liable",
          "nontransferable", "breach", "violation", "infringement", "indemnify"
      ]


  def postprocess(self, OBJECT_TO_UPLOAD):
    """
    Processes final data and generates crawler statistics.

    Args:
        OBJECT_TO_UPLOAD (dict): Collected crawler data

    Returns:
        tuple: (Processed data, Statistics dictionary, Metadata for statistics)
    """
    for item in OBJECT_TO_UPLOAD.values():
        # Sort links by score
        item['links'] = sorted(item['links'], key=lambda x: x['score'], reverse=True)
        pageCount = len(item["links"])
        wordCount = sum(link_data['count'] for link_data in item['links'])
        item['pageCount'] = pageCount
        item['wordCount'] = wordCount

    total_wordCount = sum([data['wordCount'] for data in OBJECT_TO_UPLOAD.values()])
    cnt = len(OBJECT_TO_UPLOAD)
    metadata = {
        "wordCount": total_wordCount,
        "termCount": cnt
    }
    top_ten = self.get_top_terms(OBJECT_TO_UPLOAD)
    avg = self.calculate_avg(OBJECT_TO_UPLOAD, metadata["wordCount"], metadata["termCount"])
    act_vs_rest, actionCount, restrictionCount = self.calculate_action_restriction_percentages(OBJECT_TO_UPLOAD, metadata["wordCount"])
    metadata["actionCount"] = actionCount
    metadata["restrictionCount"] = restrictionCount
    statistics_of_crawler = {'top_ten': top_ten, 'avg': avg, 'action_vs_restriction': act_vs_rest}
    return OBJECT_TO_UPLOAD, statistics_of_crawler, metadata

  def get_top_terms(self, OBJECT_TO_UPLOAD, n=100):
      """
      Gets most frequent terms from crawled content.

      Args:
          OBJECT_TO_UPLOAD (dict): Processed crawler data
          n (int): Number of top terms to return

      Returns:
          list: Top terms with their counts [(term, count),...]
      """
      # Sort dictionary items by wordCount in descending order
      sorted_terms = sorted(OBJECT_TO_UPLOAD.items(), key=lambda x: x[1]['wordCount'], reverse=True)

      # Take first n items and format them as (term, count) tuples
      top_n = [(term, data['wordCount']) for term, data in sorted_terms[:n]]

      return top_n

  # avg repetitions per word

  def calculate_avg(self, OBJECT_TO_UPLOAD, wordCount, termCount):
    """
    Calculates average word repetitions across all terms.

    Args:
        OBJECT_TO_UPLOAD (dict): Processed crawler data
        wordCount (num): number of overall words found
        termCount (num): number of overall terms found

    Returns:
        float: Average repetitions per word
    """
    return round(wordCount/termCount,2)


  # Action vs Restriction words

  def calculate_action_restriction_percentages(self, OBJECT_TO_UPLOAD, wordCount):
      """
      Calculates percentages of action vs restrictive terms.

      Args:
          OBJECT_TO_UPLOAD (dict): Processed crawler data

      Returns:
          list: [action_percentage, restriction_percentage]
      """

      # Convert lists to sets for faster lookup
      action_set = set(self.action_terms)
      restrictive_set = set(self.restrictive_terms)

      # Handle edge case of empty dictionary or zero sum
      if wordCount == 0:
          return [0.0, 0.0]

      action_count = 0
      restriction_count = 0

      for term, value in OBJECT_TO_UPLOAD.items():
          if term in action_set:
              action_count += value['wordCount']
          if term in restrictive_set:
              restriction_count +=  value['wordCount']

      action_per = round((action_count/wordCount) * 100, 2)
      restriction_per = round((restriction_count/wordCount) * 100, 2)

      return [action_per, restriction_per], action_count, restriction_count

### **Crawler**

In [None]:
def crawl(base_url):
  print("Web Crawler Deployed")

  #         Initialize constant variables:
  VISITED_LINKS = set()  # saving a set of already visited links
  DISALLOWED_LINKS = [
      'https://vercel.com/api', 'https://vercel.com/oauth', 'https://vercel.com/confirm', 'https://vercel.com//notifications',
      'https://vercel.com/old-browser.html', 'https://vercel.com/docs/concepts/payments-and-billing/usage-based-pro-plan',
      '/docs/concepts/projects/sensitive-environment-variables'
  ] # provided from https://vercel.com/robots.txt
  NEXT_LINKS = deque()    # a queue of next links to check
  NEXT_LINKS.appendleft(base_url)

  #         Initialize Services:
  preprocess_service = PreprocessService()
  dictionary_creation_service = DictionaryCreationService()
  postprocess_service = PostprocessService()
  upload_object_service = UploadObjectService(postprocess_service)

  while NEXT_LINKS and len(VISITED_LINKS) < LIMIT_PAGES:
    try:
      url = NEXT_LINKS.pop()
      if url not in VISITED_LINKS:
        VISITED_LINKS.add(url)
        print(f"{len(VISITED_LINKS)}) Current url: {url}")    # debug logs

        response = requests.get(url)
        #print("\nFirst page response:", response.text[0:400])
        if response.status_code != 200:
            print(f"Error got response code: {response.status_code}")
            continue    # if a page doesn't yield a successful response continue to the next link

        #             process response and add to the upload object
        soup = BeautifulSoup(response.text, "html.parser")
        #print("\n\n\nSoup: ", soup)
        link_text = soup.get_text()
        parsed_text = preprocess_service.preprocess_text(link_text)
        dictionary = dictionary_creation_service.create_dictionary(parsed_text, len(parsed_text))
        upload_object_service.update_upload_object(dictionary,url)
        #              add new links to link queue
        for link in soup.find_all("a", href=True):
            if link['href'].startswith('/'):
              new_link = "https://vercel.com" + link['href']
              if new_link not in VISITED_LINKS and new_link not in DISALLOWED_LINKS:
                NEXT_LINKS.appendleft(new_link)
    except Exception as e:
        print(f"Failed to crawl: {e}")

  #         postprocess and upload data
  upload_object_service.upload_object()
  print("Finish!")

### **Crawler main code: Crawl main page and upload to Firebase**

In [None]:
# main

# Global variable initialization
LIMIT_PAGES = 300
BASE_URL = "https://vercel.com/home"
FBconn = firebase.FirebaseApplication('https://task3db-c8039-default-rtdb.firebaseio.com/',None)

# start crawler in first page
#crawl(BASE_URL)
#FBconn.delete('/', 'dbDocs')
#FBconn.delete('/', 'statistics')
#FBconn.delete('/', 'metadata')

# Frontend: CSS + Imports + Firebase connection

---



**Installtions**

In [26]:
%%capture
!pip install firebase

**Imports**

In [27]:
from IPython.display import display, HTML, clear_output, Javascript
import json
from firebase import firebase
from time import sleep
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from google.colab import output
import google.generativeai as genai
import base64
import html

import requests
from bs4 import BeautifulSoup
import re
from collections import defaultdict
import nltk
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('punkt_tab')
from nltk.corpus import stopwords
from collections import deque


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


**Firebase**

In [28]:
# Firebase URL
FIREBASE_URL = 'https://task3db-c8039-default-rtdb.firebaseio.com/'
FBconn = firebase.FirebaseApplication(FIREBASE_URL,None)

# Mock users database
mock_users_db = [
    {"email": "123", "name": "Admin User", "password": "123", "role": "admin"},
    {"email": "234", "name": "Customer User", "password": "234", "role": "customer"},
    {"email": "BillDing76@gmail.com", "name": "Bill Ding", "password": "billding76", "role": "customer"},
    {"email": "admin", "name": "Administrator", "password": "admin", "role": "admin"},
    {"email": "user", "name": "Customer", "password": "user", "role": "customer"}
]

### **CSS**

In [29]:
# URL to the tiger image - using URL
tiger_image_url = "https://t3.ftcdn.net/jpg/02/84/87/88/240_F_284878821_E2nyf8wkMPLWnnn0D7DdgYAv7ngrCQTH.jpg"

TEAM_HEADER = """
<div class="team-header">
    <h1>Team Tiger</h1>
    <img src="{tiger_image_url}"
         alt="Tiger Logo"
         onerror="this.src='data:image/svg+xml;charset=UTF-8,<svg xmlns=\'http://www.w3.org/2000/svg\' width=\'80\' height=\'80\'><rect width=\'80\' height=\'80\' fill=\'%23f0f0f0\'/><text x=\'50%\' y=\'50%\' font-size=\'12\' text-anchor=\'middle\' alignment-baseline=\'middle\' fill=\'%23999\'>Tiger Image</text></svg>';">
    <p>Vercel Crawler Project</p>
</div>
"""

def get_css_styles():
    return """
    <style>
/* Base styles - Strive for consistency (Rule #1) */
* {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
    font-family: 'Arial', sans-serif;
}

body {
    background: #f5f5f5;
    min-height: 100vh;
    display: flex;
    justify-content: center;
    align-items: center;
    color: #333; /* Ensure readable text contrast */
}

/* Headings and Text - Reduce short-term memory load (Rule #6) */
h2 {
    color: #2c3e50;
    margin-bottom: 25px;
    text-align: center;
    font-size: 24px;
    font-weight: 600;
}

h3 {
    color: #34495e;
    margin-bottom: 15px;
    font-size: 20px;
    font-weight: 500;
    padding-bottom: 8px;
    border-bottom: 2px solid #f0f2f5;
}

p {
    color: #2c3e50;
    line-height: 1.6;
    margin-bottom: 15px;
}

/* Team Header - Offer informative feedback (Rule #2) */
.team-header {
    text-align: center;
    padding: 25px;
    background: linear-gradient(135deg, #2c3e50, #3498db);
    color: white;
    margin-bottom: 30px;
    border-radius: 10px;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}

.team-header h1 {
    font-size: 32px;
    margin-bottom: 15px;
    color: white;
}

.team-header img {
    width: 80px;
    height: 80px;
    border-radius: 50%;
    margin: 15px 0;
    border: 3px solid white;
}

.team-header p {
    font-size: 18px;
    color: white;
    opacity: 0.9;
}

/* Containers - Design dialog to yield closure (Rule #3) */
.login-container, .dashboard-container, .search-container, .statistics-container {
    background: white;
    max-width: 800px;
    width: 90%;
    margin: 20px;
    padding: 30px;
    border-radius: 10px;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}

/* Forms and Inputs - Support internal locus of control (Rule #4) */
input {
    width: 100%;
    padding: 12px;
    margin: 12px 0;
    border: 2px solid #e0e0e0;
    border-radius: 6px;
    font-size: 16px;
    transition: all 0.3s ease;
}

input:focus {
    outline: none;
    border-color: #3498db;
    box-shadow: 0 0 5px rgba(52, 152, 219, 0.3);
}

/* Buttons - Prevent errors and offer simple error handling (Rules #5 & #7) */
.button-container {
    display: grid;
    grid-gap: 10px;
    margin: 20px 0;
}

button {
    width: 100%;
    padding: 12px;
    border: none;
    border-radius: 6px;
    cursor: pointer;
    font-size: 16px;
    font-weight: 600;
    transition: all 0.3s ease;
    color: white;
    background-color: #3498db; /* Default button color */
}

button:hover {
    transform: translateY(-2px);
    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
}

/* Action-specific buttons with meaningful colors */
.login-button {
    background-color: #2ecc71; /* Green for positive actions */
}

.login-button:hover {
    background-color: #27ae60;
}

.logout-button {
    background-color: #e74c3c; /* Red for terminating actions */
}

.logout-button:hover {
    background-color: #c0392b;
}

.search-button {
    background-color: #3498db; /* Blue for navigation */
}

.search-button:hover {
    background-color: #2980b9;
}

.statistics-button {
    background-color: #9b59b6; /* Purple for analysis */
}

.statistics-button:hover {
    background-color: #8e44ad;
}

.back-button {
    background-color: #95a5a6; /* Grey for secondary actions */
}

.back-button:hover {
    background-color: #7f8c8d;
}

.delete-button {
    background-color: #e74c3c; /* Red for destructive actions */
}

.delete-button:hover {
    background-color: #c0392b;
}

.update-button {
    background-color: #f39c12; /* Orange for modifications */
}

.update-button:hover {
    background-color: #d35400;
}

/* Search Results - Permit easy reversal of actions (Rule #8) */
.search-box {
    display: flex;
    gap: 10px;
    margin-bottom: 20px;
}

.search-results {
    margin-top: 20px;
    max-height: 500px;
    overflow-y: auto;
    padding-right: 10px;
}

.search-result-item {
    background: #f8f9fa;
    padding: 15px;
    margin-bottom: 15px;
    border-radius: 6px;
    border-left: 4px solid #3498db;
    transition: all 0.3s ease;
    position: relative;
}

.search-result-item:hover {
    transform: translateX(5px);
    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}

.search-result-item .delete-button,
.search-result-item .update-button {
    width: auto;
    padding: 8px 16px;
    margin-top: 10px;
}

.search-result-item .button-group {
    display: flex;
    justify-content: space-between;
    margin-top: 15px;
    gap: 20px;
}

/* Chat Interface */
.chat-container {
    max-height: 400px;
    overflow-y: auto;
    border: 1px solid #ddd;
    border-radius: 8px;
    padding: 15px;
    margin: 20px 0;
    background-color: white;
}

.user-message, .assistant-message {
    padding: 12px;
    margin: 8px 0;
    border-radius: 8px;
    max-width: 80%;
    position: relative;
}

.user-message {
    background-color: #3498db;
    color: white;
    margin-left: auto;
}

.assistant-message {
    background-color: #f0f2f5;
    color: #2c3e50;
    margin-right: auto;
}

/* AI Response Container */
.ai-response-container {
    margin: 20px;
    padding: 15px;
    border: 1px solid #ddd;
    border-radius: 8px;
    background-color: #f8f9fa;
    max-width: 800px;
    margin-left: auto;
    margin-right: auto;
}

.ai-response {
    padding: 15px;
    background-color: white;
    border-radius: 8px;
    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}

/* Error Messages */
.error {
    color: #e74c3c;
    font-weight: bold;
    padding: 10px;
    margin: 10px 0;
    background-color: #fde8e8;
    border-radius: 4px;
    border-left: 4px solid #e74c3c;
}

/* Responsive Design */
@media (max-width: 480px) {
    .login-container, .dashboard-container, .search-container {
        margin: 10px;
        padding: 20px;
    }

    h2 {
        font-size: 20px;
    }

    input, button {
        padding: 10px;
        font-size: 14px;
    }

    .search-box {
        flex-direction: column;
    }

    .search-submit-button {
        width: 100%;
    }
}
    </style>
    """

# Interesting Feature: AI bot

---



In [30]:
genai.configure(api_key="AIzaSyCYSRb93xb8cBVRMq8jxT1f5-VUlMR6qN0")
model = genai.GenerativeModel("gemini-1.5-flash")

## Chat AI

In [31]:
from typing import List, Dict

# Add this class to manage chat history
class ChatHistory:
    def __init__(self):
        self.history = []

    def add_message(self, role: str, content: str):
      if len(self.history) > 5:
        self.history.pop(0)
      self.history.append({"role": role, "content": content})

    def get_history(self) -> List[Dict[str, str]]:
        return self.history

    def clear_history(self):
        self.history = []

# Create a global chat history instance
chat_history = ChatHistory()

def setup_chatbot_interface(user_message: str) -> str:
    try:
        # Add user message to history
        chat_history.add_message("user", user_message)

        # Create context from chat history
        context = " ".join([msg["content"] for msg in chat_history.get_history()])

        # Generate response using context
        response = model.generate_content(context)

        # Add AI response to history
        chat_history.add_message("assistant", response.text)

        # Return the full chat history formatted as HTML
        chat_html = ""
        for msg in chat_history.get_history():
            role_class = "user-message" if msg["role"] == "user" else "assistant-message"
            chat_html += f'<div class="{role_class}"><strong>{msg["role"].title()}:</strong> {msg["content"]}</div>'
        return chat_html

    except Exception as e:
        return f"<div class='error'>Error in chat: {str(e)}</div>"

def clear_chat_history():
    chat_history.clear_history()
    return "Chat history cleared."

def clean_chatBot_response(response: str) -> str:
    """
    Cleans a chatbot response by:
    - Replacing \\n with real newlines.
    - Removing escape characters like \\', etc.
    - Decoding HTML entities for better readability.

    Args:
        response (str): The raw response string.

    Returns:
        str: A cleaned, human-readable version of the response.
    """
    # Replace escaped newline characters with real newlines
    response = response.replace('\\n', '')

    # Remove any backslashes that escape other characters
    response = response.replace('\\\\', '')  # Removes double backslashes
    response = response.replace("'", "")  # Corrects the escaped single quote
    response = response.replace("\\", "")

    response = response.replace('*', '')

    # Decode any HTML entities (e.g., &amp; -> &)
    response = html.unescape(response)

    return response



# Frontend: Login Page

---



In [32]:

# Define the Python function to handle login
def validate_login(email, password):
    """
    Validate login credentials against the mock users database.
    """
    # Search for the user in the mock database
    user = next((u for u in mock_users_db if u['email'] == email), None)

    if user and user["password"] == password:
        message = f"<span>Login successful! Welcome, {user['name']} ({user['role']}).</span>"

        # Show dashboard after successful login
        create_dashboard_page(user['role'])

    else:
        message = "<span'>Invalid email or password. Please try again.</span>"

    # Send the message to JavaScript for display
    display(Javascript(f"""
        document.getElementById('output').innerHTML = `{message}`;
        """))

# Register the function explicitly for JavaScript invocation
output.register_callback('validate_login', validate_login)


# HTML + JavaScript code for the login page
def create_login_page():
    #To clear the history of chatbot
    if chat_history:
      chat_history.clear_history()
    login_page_html = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        {get_css_styles()}
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Login</title>
    </head>
    <body>
        {TEAM_HEADER.format(tiger_image_url=tiger_image_url)}
        <div class="login-container">
            <h2>Login</h2>
            <form id="loginForm" onsubmit="return false;">
                <input type="text" id="email" placeholder="Email" required />
                <input type="password" id="password" placeholder="Password" required />
                <button type="button" onclick="submitLogin()">Login</button>
            </form>
            <div id="output" class="result"></div>
        </div>
        <script>
        function submitLogin() {{
          const email = document.getElementById('email').value;
          const password = document.getElementById('password').value;

          // Send the data to the Python backend using Colab's method
          const command = `validate_login("${{email}}", "${{password}}")`;
          console.log(`Executing command: ${{command}}`);
          google.colab.kernel.invokeFunction('validate_login', [email, password], {{}});
          }}
        </script>
    </body>
    </html>
    """

    clear_output()
    display(HTML(login_page_html))

output.register_callback('create_login_page', create_login_page)

---

# Frontend: Search page

---



## search html

In [33]:
output.register_callback('setup_chatbot_interface', setup_chatbot_interface)
output.register_callback('clean_chatBot_response', clean_chatBot_response)

def create_search_page(role):
    search_page_html = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        {get_css_styles()}
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Search</title>
    </head>
    <body>
        {TEAM_HEADER.format(tiger_image_url=tiger_image_url)}
        <div class="search-container">
            <h2>Search Page</h2>
            <div>
              <p>
                The crawler analyzes vercel.com's content by mapping terms across its pages. It then processes this information to provide statistical insights and powers a search feature that returns ranked, relevant results for any term query.
                </p>
                </div>
            <div class="search-box">
                <input type="text" id="search-input" placeholder="Enter search term...">
                <button class="search-submit-button" onclick="performSearch('{role}')">Search</button>
            </div>
            <div id="search-results" class="search-results"></div>
            <div id="ai-response-container" class="ai-response-container"></div>
            <div class="button-container">
                <input type="text" id="ai-input" placeholder="How can I help you today">
                <button class="bot-button" onclick="ai_bot()">Ai response</button>
            </div>
            <div class="button-container">
                <button class="back-button" onclick="goBack({'role'})">Back</button>
            </div>
        </div>
        <script>

        //SEARCH
        async function performSearch(role) {{
            const query = document.getElementById('search-input').value.toLowerCase();
            try {{
                const response = await google.colab.kernel.invokeFunction(
                    'fetch_search_results',
                    [query]
                );

                var jsonString = response.data['text/plain'];
                if (jsonString.startsWith("'") && jsonString.endsWith("'")) {{
                    jsonString = jsonString.slice(1, -1);
                }}

                const list_result = JSON.parse(jsonString);
                console.log("JSON Object:", list_result);

                const resultsContainer = document.getElementById('search-results');
                resultsContainer.innerHTML = '';

                //FOR ADMIN ONLY
                //Add Delete Term button at the top (only for admin)
                if ('{role}' === 'admin' && query.split(' ').length <= 1) {{  // Check if the user is an admin
                    const deleteTermButton = document.createElement('button');
                    deleteTermButton.textContent = 'Delete Term';
                    deleteTermButton.classList.add('styled-button', 'delete-button');
                    deleteTermButton.addEventListener('click', async () => {{
                        try {{
                            await google.colab.kernel.invokeFunction('delete_term', [query], {{}});
                            console.log('Deleting term:', query);
                            resultsContainer.innerHTML = '<p>All results for this term have been deleted.</p>';
                        }} catch (error) {{
                            console.error('Error deleting term:', error);
                        }}
                    }});

                    // Append the Delete Term button to the results container
                    resultsContainer.appendChild(deleteTermButton);
                }}

                // Reset and populate the results container
                if (list_result.links && list_result.links.length > 0) {{
                    list_result.links.forEach((linkData, index) => {{
                        const resultDiv = document.createElement('div');
                        resultDiv.classList.add('search-result-item');
                        resultDiv.innerHTML = `
                            <p style="display: none;"><strong>Index:</strong> ${{index}}</p>
                            <p><strong>Link:</strong> <a href="${{linkData.link}}" target="_blank">${{linkData.link}}</a></p>
                            <p><strong>Count:</strong> ${{linkData.count}}</p>
                            <p><strong>Score:</strong> ${{linkData.score}}</p>
                        `;

                        // Add buttons for admin only
                        if ('{role}' === 'admin' && query.split(' ').length <= 1) {{
                            const deleteButton = document.createElement('button');
                            deleteButton.textContent = 'Delete';
                            deleteButton.classList.add('styled-button', 'delete-button');
                            deleteButton.addEventListener('click', () => {{
                                const index = list_result.links.indexOf(linkData);
                                console.log("Index of the current link:", index);
                                google.colab.kernel.invokeFunction('delete_data', [query, index], {{}}).then(() => {{
                                    resultsContainer.removeChild(resultDiv);
                                }});
                            }});

                            const updateButton = document.createElement('button');
                            updateButton.textContent = 'Update';
                            updateButton.classList.add('update-button');
                            updateButton.addEventListener('click', async () => {{
                                let user_input;
                                do {{
                                    user_input = prompt("Please enter a new count value (must be greater than 0):");
                                    if (user_input === null) return; // Cancel input
                                    user_input = parseInt(user_input, 10);
                                }} while (isNaN(user_input) || user_input <= 0);
                                await google.colab.kernel.invokeFunction('update_data', [query, index, "count", user_input], {{}});
                                performSearch(role);
                            }});

                            const buttonGroup = document.createElement('div');
                            buttonGroup.className = 'button-group';
                            buttonGroup.appendChild(deleteButton);
                            buttonGroup.appendChild(updateButton);
                            resultDiv.appendChild(buttonGroup);
                        }}

                        resultsContainer.appendChild(resultDiv);
                    }});
                }} else {{
                    resultsContainer.innerHTML = '<p>No results found.</p>';
                }}

            }} catch (error) {{
                console.error('Error fetching search results:', error);
            }}
        }}

        //AI BOT
        async function ai_bot() {{
        const query = document.getElementById('ai-input').value;

        try {{
            const response = await google.colab.kernel.invokeFunction(
                'setup_chatbot_interface',
                [query]
            );

            // Clear the input field after sending the question
            document.getElementById('ai-input').value = '';

            // Get the response text from the Python function
            let aiResponse = response.data['text/plain'];

            const cleanResponse = await google.colab.kernel.invokeFunction(
            'clean_chatBot_response',
            [aiResponse]
            );

            aiResponse = cleanResponse.data['text/plain'];
            console.log(aiResponse)
            // Display the response in the dedicated div
            const responseContainer = document.getElementById('ai-response-container');
            responseContainer.innerHTML = `<div class="ai-response"><h3>AI Response:</h3><p>${{aiResponse}}</p></div>`;

            console.log('AI chat completed');

        }} catch (error) {{
            console.error('Error in AI chat:', error);
            const responseContainer = document.getElementById('ai-response-container');
            responseContainer.innerHTML = '<p class="error">Error getting AI response</p>';
        }}
      }}

      //GO BACK
        function goBack(role) {{
        google.colab.kernel.invokeFunction('create_dashboard_page', ['{role}']);
        }}
        </script>
      </body>
    </html>
    """
    clear_output()
    display(HTML(search_page_html))

## Search functions

In [34]:

def fetch_search_results(query):
    """
    Fetches search results based on the query from the data source.
    This function can be customized to connect to a database, index, or search engine.
    """
    try:
        # Convert the query to lowercase and split into words
        words = query.lower().strip().split()

        if not words:
                return json.dumps({"error": "multiple_words"})

        # Check if query contains multiple words

        if len(words) > 1:
          terms = {}
          for word in words:
              terms[word] = FBconn.get(f'dbDocs/{word}', None)
              if terms[word] is None:
                  return json.dumps({"error": "no_results"})

          # Create a dictionary to store combined link data
          combined_links = {}

          # First, process the first term to initialize combined_links
          first_term = list(terms.keys())[0]
          for link_info in terms[first_term]['links']:
              link = link_info['link']
              combined_links[link] = {
                    'score': link_info['score'],
                    'count': link_info['count']
                }

          # Then check other terms and keep only links that appear in all terms
          for term, term_data in list(terms.items())[1:]:
              if term_data and 'links' in term_data:
                  current_term_links = {
                        link_info['link']: (link_info['score'], link_info['count'])
                        for link_info in term_data['links']
                    }

                  # Keep only links that appear in current term
                  links_to_remove = []
                  for link in combined_links:
                      if link in current_term_links:
                          combined_links[link]['score'] += current_term_links[link][0]
                          combined_links[link]['count'] += current_term_links[link][1]
                      else:
                          links_to_remove.append(link)

                  # Remove links that don't appear in current term
                  for link in links_to_remove:
                      del combined_links[link]
          # Calculate wordCount and pageCount
          word_count = sum(link_data['count'] for link_data in combined_links.values())
          page_count = len(combined_links)

            # Create the final data structure
          data = {
              'links': sorted([
                  {
                      'link': link,
                      'score': link_data['score'],
                      'count': link_data['count']
                  }
                  for link, link_data in combined_links.items()
              ], key=lambda x: x['score'], reverse=True),  # Sort by score in descending order
              'wordCount': word_count,
              'pageCount': page_count
          }
          return json.dumps(data)

        # Single word query
        data = FBconn.get(f'dbDocs/{words[0]}', None)
        # Handle the case where no data is found
        if data is None:
            return json.dumps({"error": "no_results"})

        # Convert `data` to a JSON string before returning
        return json.dumps(data)

    except Exception as e:
        print(f"Error in fetch_search_results: {str(e)}")
        return json.dumps({"error": "no_results"})


def delete_term(term):
    path = f"dbDocs/"  # Path for the specific term in the database
    try:
        # Delete the entire node for this term by setting it to None
        term_data = FBconn.get(f"/{path}/{term}", None)
        update_metadata(-term_data["wordCount"], -1, term)
        update_db_statistics(term, -term_data["wordCount"])
        delete_result = FBconn.delete(f"/{path}", term)

        if delete_result is None:  # Firebase returns None on successful operation
            return {
                "success": True,
                "message": f"Successfully deleted all data for term: {term}"
            }
        else:
            return {
                "success": False,
                "error": "Database deletion failed."
            }

    except Exception as e:
        return {
            "success": False,
            "error": str(e)
        }

def delete_data(term, index):
    index = int(index)
    path = f"dbDocs/{term}"  # Path for the specific term in the database

    try:
        # Fetch the current data for the term
        current_data = FBconn.get(f"/{path}", None)

        # Validate the data structure
        if current_data and "links" in current_data:
            # Check if the index is valid
            if 0 <= index < len(current_data["links"]):
                # Remove the link at the specified index
                removed_link = current_data["links"].pop(index)

                # Adjust the pageCount and wordCount
                current_data["pageCount"] -= 1
                current_data["wordCount"] -= removed_link["count"]  # Assuming `count` is related to word count

                # Update the global metadata word count


                if current_data["pageCount"] == 0:
                    delete_term(term)  # Assuming this is a function to handle term deletion
                else:
                    update_metadata(-removed_link["count"], 0, term)
                    update_db_statistics(term, -removed_link["count"])
                    # Use PATCH to update the specific fields (links, pageCount, and wordCount)
                    update_data = {
                        "links": current_data["links"],
                        "pageCount": current_data["pageCount"],
                        "wordCount": current_data["wordCount"]
                    }

                    update_result = FBconn.patch(f"/{path}", update_data)
                    if update_result:
                        return {
                            "success": True,
                            "removed_link": removed_link,
                            "updated_page_count": current_data["pageCount"],
                            "updated_word_count": current_data["wordCount"]
                        }
                    else:
                        return {"success": False, "error": "Database update failed."}
            else:
                return {"success": False, "error": "Invalid index."}
        else:
            return {"success": False, "error": "No links found."}
    except Exception as e:
        return {"success": False, "error": str(e)}




def update_data(term, id, updated_field, value):
    try:
        value = int(value)
        path = f"dbDocs/{term}"
        current_data = FBconn.get(f"/{path}", None)
        # update whole word count in DB
        wordCountDiff = value - int(current_data["links"][id]["count"])
        current_data["wordCount"] += wordCountDiff
        #update the metadata wordcount
        metadata_update = update_metadata(wordCountDiff, 0, term)
        update_db_statistics(term, wordCountDiff)
        # update word count in DB for this id
        current_data["links"][id]["count"] = value

        # FirebaseApplication expects a dictionary for updating multiple fields
        update_data = {
                    "links": current_data["links"],
                    "pageCount": current_data["pageCount"],
                    "wordCount": current_data["wordCount"]
                }

        result = FBconn.patch(f"dbDocs/{term}", update_data)
    except Exception as e:
        print(f"Error updating data for ID {id}: {e}")

def update_metadata(word_count_change, term_count_change, term):
    metadata_path = "metadata"  # Path for the metadata in the database
    action_terms = [
      "build", "deploy", "create", "configure", "integrate", "manage", "scale",
      "test", "update", "connect", "provision", "launch", "clone", "allocate",
      "enable", "query", "retrieve", "replicate", "execute", "optimize", "monitor",
      "share", "collaborate", "contribute", "invite", "access"
    ]
    restrictive_terms = [
      "prohibit", "terminate", "restrict", "limit", "revoke", "suspend", "forbid",
      "deny", "disallow", "cease", "throttle", "cap", "exceed", "disable", "block",
      "prevent", "restrictapi", "limitbandwidth", "resourcecap", "liable",
      "nontransferable", "breach", "violation", "infringement", "indemnify"
    ]

    try:
        # Fetch the current metadata
        metadata = FBconn.get(f"/{metadata_path}", None)

        # Adjust metadata values
        metadata["wordCount"] += word_count_change
        metadata["termCount"] += term_count_change
        newActionCount = metadata["actionCount"] + (word_count_change if term in action_terms else 0)
        newRestrictionCount = metadata["restrictionCount"] + (word_count_change if term in restrictive_terms else 0)

        # Update the metadata in Firebase
        update_result = FBconn.patch(f"/{metadata_path}", {
            "wordCount": metadata["wordCount"],
            "termCount": metadata["termCount"],
            "actionCount": newActionCount,
            "restrictionCount": newRestrictionCount
        })

        if update_result:
            return {
                "success": True,
                "message": f"Metadata updated successfully.",
                "updated_metadata": metadata
            }
        else:
            return {"success": False, "error": "Metadata update failed."}

    except Exception as e:
        return {"success": False, "error": str(e)}


def update_db_statistics(term, word_count_change):
    statistics_path = "statistics"  # Path to the statistics in Firebase
    try:
        # Fetch the current top 100 words from the statistics tab
        top_ten_data = FBconn.get(f"/{statistics_path}/top_ten", None)

        # Update the top ten data using the helper function
        result = update_top_ten(top_ten_data, term, word_count_change)

        if not result["success"]:
            return result  # Return if top ten update fails

        # Prepare the top_ten_data for upload
        top_ten_data = {
            "top_ten": result["top_ten_data"]
        }

        # Upload the updated statistics to Firebase
        update_result = FBconn.patch(f"/{statistics_path}", top_ten_data)
        avg_result = update_avg_in_statistics()
        action_vs_restriction_result = update_action_vs_restriction_in_statistics()

    except Exception as e:
        return {"success": False, "error": str(e)}



def update_top_ten(top_ten_data, term, word_count_change):
    term_found = False
    for word_data in top_ten_data:
        if word_data[0] == term:
            # Modify the word count for this term
            word_data[1] += word_count_change  # Update the count for the term
            # Track if we found the term
            term_found = True

            # Check if word count is zero, and if so, remove the term from statistics
            if word_data[1] <= 0:
                top_ten_data.remove(word_data)  # Remove the term from the list
            break

    if not term_found:
        return {"success": False, "error": f"Term '{term}' not found in top_ten."}

    # Sort the top_ten_data by word count in descending order
    top_ten_data.sort(key=lambda x: x[1], reverse=True)

    return {"success": True, "top_ten_data": top_ten_data}

def update_avg_in_statistics():
    metadata_path = "metadata"  # Path to the metadata in Firebase
    statistics_path = "statistics"  # Path to the statistics in Firebase
    try:
        # Fetch the current metadata (wordCount and termCount)
        metadata = FBconn.get(f"/{metadata_path}", None)

        # Calculate the avg based on wordCount and termCount
        if metadata["termCount"] > 0:
            avg = metadata["wordCount"] / metadata["termCount"]
        else:
            avg = 0  # Prevent division by zero
        avg = round(avg, 2)  # Round to 2 decimal places
        # Fetch the current statistics data
        statistics_data = FBconn.get(f"/{statistics_path}", None)

        # Update the avg field in statistics
        statistics_data["avg"] = avg

        # Upload the updated statistics with the new avg
        update_result = FBconn.patch(f"/{statistics_path}", statistics_data)

        if update_result:
            return {
                "success": True,
                "message": "Average (avg) successfully updated in statistics.",
                "updated_statistics": statistics_data
            }
        else:
            return {"success": False, "error": "Failed to update average in statistics."}

    except Exception as e:
        return {"success": False, "error": str(e)}

def update_action_vs_restriction_in_statistics():
    metadata_path = "metadata"  # Path to the metadata in Firebase
    statistics_path = "statistics"  # Path to the statistics in Firebase
    try:
        # Fetch the current metadata (actionCount, restrictionCount, wordCount)
        metadata = FBconn.get(f"/{metadata_path}", None)

        action_per = round((metadata["actionCount"] / metadata["wordCount"]) * 100, 2)
        restriction_per = round((metadata["restrictionCount"] / metadata["wordCount"]) * 100, 2)

        # Fetch the current statistics data
        statistics_data = FBconn.get(f"/{statistics_path}", None)

        # Update the action_vs_restriction field in statistics
        statistics_data["action_vs_restriction"] = [action_per, restriction_per]

        # Upload the updated statistics with the new action_vs_restriction
        update_result = FBconn.patch(f"/{statistics_path}", statistics_data)

        if update_result:
            return {
                "success": True,
                "message": "action_vs_restriction successfully updated in statistics.",
                "updated_statistics": statistics_data
            }
        else:
            return {"success": False, "error": "Failed to update action_vs_restriction in statistics."}

    except Exception as e:
        return {"success": False, "error": str(e)}


output.register_callback('fetch_search_results', fetch_search_results)
output.register_callback('delete_term', delete_term)
output.register_callback('delete_data', delete_data)
output.register_callback('update_data', update_data)
output.register_callback('clear_chat_history', clear_chat_history)

# Frontend: Dashbord Page

---



## dashbord html

In [35]:
def create_dashboard_page(role):
   dashboard_page_html = f"""
   <!DOCTYPE html>
   <html lang="en">
   <head>
       {get_css_styles()}
       <meta charset="UTF-8">
       <meta name="viewport" content="width=device-width, initial-scale=1.0">
       <title>Dashboard</title>
   </head>
   <body>
      {TEAM_HEADER.format(tiger_image_url=tiger_image_url)}
       <div class="dashboard-container">
           <h2>Welcome, {role} Dashboard</h2>
            <div class="button-container">
                <button class="logout-button" onclick="logout()">Logout</button>
                <button class="search-button" onclick="search('{role}')">Search</button>
                <button class="statistics-button" onclick="viewStatistics('{role}')">Statistics</button>
            </div>
           <div id="output" class="output"></div>
       </div>

       <script>
       function search(role) {{
          console.log('role:', role);
          // Call Python function to create the search page with the current role
          google.colab.kernel.invokeFunction('create_search_page', [role], {{}});
          document.getElementById('output').innerHTML = 'Loading search page...';
        }}

        function viewStatistics(role) {{
          google.colab.kernel.invokeFunction('on_statistics_button_click', [role], {{}});
          document.getElementById('output').innerHTML = 'Loading statistics...';
        }}
        function logout() {{
          // Redirect to the login page by reloading the notebook with the login form
          google.colab.kernel.invokeFunction('create_login_page', []);
          document.getElementById('output').innerHTML = 'Logging out...';
        }}
       </script>
   </body>
   </html>
   """
   clear_output()
   display(HTML(dashboard_page_html))

output.register_callback('create_dashboard_page', create_dashboard_page)
output.register_callback('create_search_page', create_search_page)

# Frontend: Statistics Page

---



## statistics html

In [36]:
def create_statistics_page(image_path_1, image_path_2, image_path_3, role):
    def generate_image_tag(image_path):
        with open(image_path, "rb") as image_file:
            encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
        return f"""
            <img src="data:image/png;base64,{encoded_string}"
                 alt="Plot"
                 onerror="this.src='data:image/svg+xml;charset=UTF-8,<svg xmlns=\'http://www.w3.org/2000/svg\' width=\'80\' height=\'80\'><rect width=\'80\' height=\'80\' fill=\'%23f0f0f0\'/><text x=\'50%\' y=\'50\' font-size=\'12\' text-anchor=\'middle\' alignment-baseline=\'middle\' fill=\'%23999\'>Image Not Found</text></svg>';"
                 style="max-width: 100%; margin-bottom: 20px;">
        """

    image_tag_1 = generate_image_tag(image_path_1)
    image_tag_2 = generate_image_tag(image_path_2)
    image_tag_3 = generate_image_tag(image_path_3)

    statistics_page_html = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        {get_css_styles()}
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Statistics</title>
    </head>
    <body>
        {TEAM_HEADER.format(tiger_image_url=tiger_image_url)}
        <div class="statistics-container">
            <h2>Statistics Page</h2>
            <div id="statistics-output" class="output">
                {image_tag_1}
                {image_tag_2}
                {image_tag_3}
            </div>
            <div class="button-container">
                <button class="back-button" onclick="goBack()">Back</button>
            </div>
        </div>

        <script>
            function goBack() {{
                // Call the Python function to load the dashboard with the current role
                google.colab.kernel.invokeFunction('create_dashboard_page', ['{role}']);
            }}
        </script>
    </body>
    </html>
    """
    return statistics_page_html

## statistics functions

In [37]:
def fetch_statistics():
  res=FBconn.get('statistics/',None)
  return res

# Update on_statistics_button_click to include new graphs
def on_statistics_button_click(role):
    data = fetch_statistics()
    greatest10 = data.get('top_ten', {})
    action_vs_restriction = data.get('action_vs_restriction', [0, 0])
    avg_term_appearance = data.get('avg', 0)

    # Create the plots and save them as images
    fig, ax = plt.subplots(figsize=(10, 6))
    plot_top_10_terms(ax, greatest10, "top_10_terms_plot.png")

    fig, ax = plt.subplots(figsize=(10, 6))
    plot_action_vs_restriction(ax, action_vs_restriction, "action_vs_restriction_plot.png")

    fig, ax = plt.subplots(figsize=(10, 6))
    plot_average_term_appearance(ax, avg_term_appearance, "average_term_appearance_plot.png")

    # Generate the statistics page with all three plots
    statistics_page_html = create_statistics_page(
        "top_10_terms_plot.png",
        "action_vs_restriction_plot.png",
        "average_term_appearance_plot.png",
        role
    )

    # Clear current output and display the statistics page
    clear_output(wait=True)
    display(HTML(statistics_page_html))

def plot_action_vs_restriction(ax, _action_vs_restriction, image_path="action_vs_restriction.png"):
    action_data = {
        'Type': ['Action', 'Restriction'],
        'Count': [_action_vs_restriction[0], _action_vs_restriction[1]]
    }
    action_df = pd.DataFrame(action_data)
    sns.barplot(x='Type', y='Count', data=action_df, hue='Type', legend=False, ax=ax)
    ax.set_title('Action vs Restriction Distribution', pad=20)
    ax.set_ylabel('Count')
    fig = ax.get_figure()
    fig.savefig(image_path)
    plt.close(fig)

def plot_average_term_appearance(ax, _avg_term_appearance, image_path="average_term_appearance.png"):
    """
    Creates a plot showing the average term appearance with improved visualization.
    """
    # Add grid for better readability
    ax.grid(True, linestyle='--', alpha=0.7)

    # Plot average line with improved style
    avg_line = ax.axhline(y=_avg_term_appearance,
                         color='#FF4B4B',  # Brighter red
                         linestyle='--',
                         linewidth=2)

    # Add text label with improved formatting
    ax.text(0.02, _avg_term_appearance,
            f'Average: {_avg_term_appearance:.2f}',
            verticalalignment='bottom',
            fontsize=12,
            fontweight='bold',
            bbox=dict(facecolor='white',
                     edgecolor='none',
                     alpha=0.7,
                     pad=3))

    # Style the plot
    ax.set_title('Average Term Appearance',
                 pad=20,
                 fontsize=14,
                 fontweight='bold')
    ax.set_ylabel('Frequency',
                 fontsize=12)
    ax.set_ylim(0, _avg_term_appearance * 2)

    # Set background color
    ax.set_facecolor('#f8f9fa')

    # Remove top and right spines
    sns.despine()

    # Save the figure
    fig = ax.get_figure()
    fig.savefig(image_path)
    plt.close(fig)

# Function to plot the top 10 terms
def plot_top_10_terms(ax, _greatest10, image_path="top_10_terms_plot.png"):
    """
    Creates a horizontal bar plot showing the top 10 most frequent terms and saves it as an image.
    """
    top10_df = pd.DataFrame(_greatest10[:10], columns=['Term', 'Count'])

    # Use custom color palette
    colors = sns.color_palette("husl", n_colors=len(top10_df))

    # Create the plot
    sns.barplot(x='Count',
                y='Term',
                data=top10_df,
                palette=colors,  # Using custom colors instead of hue
                ax=ax)

    # Add value labels on the bars
    for i, v in enumerate(top10_df['Count']):
        ax.text(v, i, f' {v:,}', va='center')

    # Style titles and add grid
    ax.set_title('Top 10 Most Frequent Terms', pad=20, fontsize=14, fontweight='bold')
    ax.set_xlabel('Appearance Count', fontsize=12)
    ax.set_ylabel('Terms', fontsize=12)
    ax.grid(True, axis='x', linestyle='--', alpha=0.7)

    # Remove top and right borders
    sns.despine()

    # Save the figure
    fig = ax.get_figure()
    fig.savefig(image_path)
    plt.close(fig)


output.register_callback('on_statistics_button_click', on_statistics_button_click)

# Main

---



**Login as User :**


1.   **Username : user**
2.   **Password : user**




**Login as Admin:**


1.   **Username : admin**
2.   **Password : admin**





In [38]:
# Render the login page
create_login_page()