<a href="https://colab.research.google.com/github/Cloud-Course-Group-Phoenix/Project-Pheonix/blob/main/HW2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports and Package installations

In [1]:
# pip installs
!pip install firebase
!pip install gradio
!pip install paho-mqtt

#================================= make sure all pip installs are above this line ============================================

# import to clear the installation code output
from IPython.display import clear_output
clear_output()

In [2]:
#imports
import gradio as gr
import json
import time
from firebase import firebase
import paho.mqtt.client as mqtt
import requests
import re
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from urllib.parse import urljoin, urlparse
import nltk
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
from datetime import datetime
import plotly.graph_objects as go
import pandas as pd
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [5]:
#DBLink = "https://wordbank-c75f1-default-rtdb.firebaseio.com/"
DBLink = "https://alexaanddaniel-c1413-default-rtdb.firebaseio.com/"


# Index

In [7]:
class DbService:
    def __init__(self,Dblink):
        self.dbLink= Dblink

    def insert_to_db(self,results):
        FBconn = firebase.FirebaseApplication(self.dbLink,None)
        FBconn.put('/','terms',results)

    def get_from_db(self):
        FBconn = firebase.FirebaseApplication(self.dbLink,None)
        results = FBconn.get('/','terms')
        return results


class QueryService:
    def __init__(self,url):
        self.url = url

    def fetch_page(self):
        response = requests.get(self.url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            return soup
        else:
            return None

    def index_words(self, soup, index = {}, url = ''):
        words = re.findall(r'\w+', soup.get_text())
        for word in words:
            word = word.lower()
            if word in index:
                index[word]["Appearences"] += 1
                # Add URL to docid if it's not already there
                if url and url not in index[word]["DocIDs"]:
                    index[word]["DocIDs"].append(url)
            else:
                # Initialize with count and docid list containing the current URL
                index[word] = {"Appearences": 1, "DocIDs": [url] if url else []}

        return index

    def remove_stop_words(self,index):
      stop_words = set(stopwords.words('english'))
      for stop_word in stop_words:
        if stop_word in index:
          del index[stop_word]
      return index

class Crawler:
  def __init__(self, url):
    self.url = url

  #Fetches all sub urls from a given url
  def get_sub_urls(self, url):
    sub_urls = []
    stack = [url]
    while stack:
      url = stack.pop()
      response = requests.get(url)
      response.raise_for_status()  # Raise an exception for bad responses
      soup = BeautifulSoup(response.content, 'html.parser')
      for link in soup.find_all('a', href=True):
          href = link['href']
          absolute_url = urljoin(url, href)  # Make URL absolute

          if (absolute_url.startswith(url)) and (absolute_url != url) and (absolute_url not in sub_urls):
              sub_urls.append(absolute_url)
              stack.append(absolute_url)

    return sub_urls



def main():
  try:
    dbService = DbService(DBLink)
    url = "https://mqtt.org/"
    crawler = Crawler(url)
    sub_urls = crawler.get_sub_urls(url)
    index = {}
    for sub_url in sub_urls:
      queryService = QueryService(sub_url)
      soup = queryService.fetch_page()
      index = queryService.index_words(soup, index, sub_url)
      index = queryService.remove_stop_words(index)
    dbService.insert_to_db(index)
    return
  except Exception as e:
            return f"Error during indexing: {str(e)}"


if __name__ == '__main__':
  main()

# Admin panel functions

In [8]:
# Index Management class for handling index-related functionality
class IndexManager:
    def __init__(self, db_link):
        self.db_link = db_link
        self.db_service = DbService(db_link)
        self.stats_path = "/indexStats"
        self._init_stats_if_needed()


    # Initialize stats in the database if they don't exist
    def _init_stats_if_needed(self):
        FBconn = firebase.FirebaseApplication(self.db_link, None)
        stats = FBconn.get('/', self.stats_path[1:])  # Remove leading slash
        if not stats:
            # Initialize with default values
            default_stats = {
                "word_count": 0,
                "page_count": 0,
                "last_indexed": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "search_counts": {"placeholder": 0}  # Start with a placeholder entry
            }
            FBconn.put('/', self.stats_path[1:], default_stats)


    # Get the top 10 most searched terms from the index
    def get_top_search_terms(self):
        FBconn = firebase.FirebaseApplication(self.db_link, None)
        stats = FBconn.get('/', self.stats_path[1:])
        if not stats or 'search_counts' not in stats:
            return []
        sorted_terms = sorted(
            stats['search_counts'].items(),
            key=operator.itemgetter(1),
            reverse=True
        )[:10]
        if sorted_terms:
            return sorted_terms
        return []


    # Record that a term was searched
    def record_search_term(self, term):
        FBconn = firebase.FirebaseApplication(self.db_link, None)
        stats = FBconn.get('/', self.stats_path[1:])
        if not stats:
            stats = {"search_counts": {"placeholder": 0}}
        elif 'search_counts' not in stats:
            stats['search_counts'] = {"placeholder": 0}

        # Remove placeholder if it exists and this isn't a placeholder term
        if "placeholder" in stats['search_counts'] and term != "placeholder":
            if len(stats['search_counts']) > 1:  # Only remove if there are other terms
                del stats['search_counts']["placeholder"]

        # Increment search count for this term
        if term in stats['search_counts']:
            stats['search_counts'][term] += 1
        else:
            stats['search_counts'][term] = 1

        # Save back to DB
        FBconn.put('/', self.stats_path[1:], stats)


    # Get the current status of the index
    def update_index_status(self):
        FBconn = firebase.FirebaseApplication(self.db_link, None)
        stats = FBconn.get('/', self.stats_path[1:])
        if not stats:
            return {
                "word_count": 0,
                "page_count": 0,
                "last_indexed": "Never"
            }
        return {
            "word_count": stats.get("word_count", 0),
            "page_count": stats.get("page_count", 0),
            "last_indexed": stats.get("last_indexed", "Unknown")
        }

    # Re-index content from the target website
    def reindex_content(self):
        try:
            url = "https://mqtt.org/"
            crawler = Crawler(url)
            sub_urls = crawler.get_sub_urls(url)
            index = {}
            for sub_url in sub_urls:
                queryService = QueryService(sub_url)
                soup = queryService.fetch_page()
                index = queryService.index_words(soup, index, sub_url)
                index = queryService.remove_stop_words(index)
            self.db_service.insert_to_db(index)

            # Update stats
            FBconn = firebase.FirebaseApplication(self.db_link, None)
            stats = FBconn.get('/', self.stats_path[1:]) or {}
            stats["word_count"] = len(index)
            stats["page_count"] = len(sub_urls)
            stats["last_indexed"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            FBconn.put('/', self.stats_path[1:], stats)

            return f"Re-indexing complete: {len(index)} words from {len(sub_urls)} pages"
        except Exception as e:
            return f"Error during re-indexing: {str(e)}"

def get_index_status():
  return index_manager.update_index_status()

def reindex_content():
  return index_manager.reindex_content()

# Track search terms for stats
def track_search_terms(query):
    if query :
        words = re.findall(r'\w+', query.lower())
        for word in words:
            index_manager.record_search_term(word)


# Check if MQTT broker is connected
def get_mqtt_connection_status():
    return False


# Reconnect to the MQTT broker
def reconnect_mqtt():
    return False


index_manager = IndexManager(DBLink)

#Admin Panel UI

In [9]:

# Create the admin dashboard UI
def create_admin_dashboard():
    with gr.Blocks(title="Admin Dashboard") as dashboard:
        gr.Markdown("# Admin Dashboard")

        with gr.Tab("Index Management"):
            with gr.Row():
                with gr.Column(scale=2):
                    gr.Markdown("### Top 10 Most Searched Terms")
                    top_terms_output = gr.Dataframe(
                        headers=["Rank", "Term", "Searches"],
                        row_count=10,
                        interactive=False
                    )

                    refresh_top_terms = gr.Button("Refresh Top Terms")

                with gr.Column(scale=3):
                    gr.Markdown("### Index Status")
                    index_status_md = gr.Markdown("")

                    with gr.Row():
                        reindex_button = gr.Button("Re-index Content", variant="primary")
                        refresh_index_status = gr.Button("Refresh Status")

                    index_action_output = gr.Textbox(label="Action Output", lines=2)

        with gr.Tab("MQTT Connection"):
            with gr.Row():
                with gr.Column():
                    gr.Markdown("### MQTT Broker Connection Status")
                    connection_status_md = gr.Markdown("")

                    reconnect_button = gr.Button("Reconnect to MQTT Broker", variant="primary")
                    connection_output = gr.Textbox(label="Connection Output", lines=2)

        # Initialize displayed data on load
        def update_top_terms():
            terms_with_counts = index_manager.get_top_search_terms()
            data = []
            for i, (term, count) in enumerate(terms_with_counts, 1):
                data.append([i, term, f"{count} Searches"])
            return pd.DataFrame(data, columns=["Rank", "Term", "Searches"])

        def update_index_status():
            status = get_index_status()
            return f"**Word Count:** {status['word_count']}<br>**Page Count:** {status['page_count']}<br>**Last Indexed:** {status['last_indexed']}"

        def update_connection_status():
            is_connected = get_mqtt_connection_status()
            if is_connected:
                return "✅ **Connected** to MQTT Broker"
            else:
                return "❌ **Disconnected** from MQTT Broker"

        # Set up event handlers
        refresh_top_terms.click(update_top_terms, outputs=top_terms_output)
        refresh_index_status.click(update_index_status, outputs=index_status_md)
        reindex_button.click(reindex_content, outputs=index_action_output)
        reconnect_button.click(reconnect_mqtt, outputs=connection_output)

        # Initialize the UI
        dashboard.load(update_top_terms, outputs=top_terms_output)
        dashboard.load(update_index_status, outputs=index_status_md)
        dashboard.load(update_connection_status, outputs=connection_status_md)

    return dashboard

# Create the admin dashboard & Launch the admin dashboard
admin_dashboard = create_admin_dashboard()
admin_dashboard.launch(inline=True)

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://214070d46bb9741e5c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




#Search Engine UI

In [None]:
def search_word(query):
    if not query:
        return "Please enter a search term"

    # Get the index from the database
    dbService = DbService(DBLink)
    index = dbService.get_from_db()

    if not index:
        return "No index found in the database. Please run the indexing process first."

    # Track search terms for analytics ############# check if works with multiple words!!!!!!!!!!!!!!!!!!!!!!!!
    track_search_terms(query)

    # Process the query - split into individual words
    words = re.findall(r'\w+', query.lower())

    if not words:
        return "Please enter valid search terms"

    # Dictionary to track all found URLs and their related words
    all_results = {}
    # Dictionary to track word appearance counts
    word_appearances = {}
    # Keep track of words not found
    words_not_found = []

    # Search for each word in the index
    for word in words:
        if word in index:
            urls = index[word]["DocIDs"]
            appearances = index[word]["Appearences"]
            word_appearances[word] = appearances

            # Add each URL to the results dictionary
            for url in urls:
                if url in all_results:
                    all_results[url].append(word)
                else:
                    all_results[url] = [word]
        else:
            words_not_found.append(word)

    # Format the results
    if not all_results:
        return f"No results found for any of the search terms: {', '.join(words)}"

    # Count the total number of URLs found and appearances
    total_urls = len(all_results)
    total_appearances = sum(word_appearances.values())

    # Start building the result string
    result = f"Found {len(words) - len(words_not_found)} of {len(words)} search terms in {total_urls} pages with {total_appearances} total appearances:\n\n"

    # Sort results by number of matching words (most matches first)
    sorted_results = sorted(all_results.items(), key=lambda x: len(x[1]), reverse=True)

    for i, (url, found_words) in enumerate(sorted_results, 1):
        result += f"{i}. {url} \n Contains words: \t {', '.join(found_words)}\n\n"

    # Add information about words not found
    if words_not_found:
        result += f"\nTerms not found: {', '.join(words_not_found)}"

    return result

# Create the Gradio interface for the search engine
search_interface = gr.Interface(
    fn=search_word,
    inputs=gr.Textbox(placeholder="Enter words to search..."),
    outputs=gr.Textbox(label="Search Results", lines=10),
    title="Multi-Word Search Engine",
    description="Search for multiple words and find the URLs where they appear.",
    allow_flagging='never',
)

# Launch the search interface
search_interface.launch(inline=True)



It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e70f31cee436a1c677.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




# Sensor data caching and processing

In [None]:
# Data proccessing
#def process_data(data):

db_url = "https://project-pheonix-39eef-default-rtdb.europe-west1.firebasedatabase.app/"
FBconn = firebase.FirebaseApplication(db_url,None)

# Data saving in DB
def send_to_db(path, data):
  FBconn.put('/',f'/fake/{path}',data)
  return

# create an mqtt connection
def on_connect(client, userdata, flags, rc):
  if rc == 0:
    print("Connected to MQTT Broker!\nSubscribing to topics")

    # Subscribe to the relevant topics
    client.subscribe("braude/D106/indoor")
    client.subscribe("braude/D106/outdoor")

    print("Successfully subscribed to topics!")
  else:
    print(f"Failed to connect, return code {rc}")

def on_disconnect(client, userdata, rc):
  if rc != 0:
    for i in range(5):
      print(f"Unexpected disconnection (error code: {rc}). Attempting to reconnect number {i + 1} in 5 seconds...")
      time.sleep(5)
      try:
        client.reconnect()
      except Exception as e:
        print(f"Reconnection attempt failed: {e}")

def on_message(client, userdata, msg):

  topic = msg.topic
  payload = msg.payload.decode('utf-8')  # Decode the byte string to a string

  print(f"Received JSON message on topic '{topic}': {payload}")
  entry = None
  try:
    sensor_data = json.loads(payload)
    print(f"Parsed JSON data: {sensor_data}")

    if topic == "braude/D106/indoor":
      send_to_db(f"indoor/{int(time.time())}", sensor_data)

    elif topic == "braude/D106/outdoor":
      send_to_db(f"outdoor/{int(time.time())}", sensor_data)

  except json.JSONDecodeError as e:
    print(f"Error decoding JSON: {e}")
    print(f"Problematic payload: {payload}")

# connect to the MQTT publisher
client = mqtt.Client()
client.on_connect = on_connect
client.on_disconnect = on_disconnect
client.on_message = on_message
client.connect("broker.hivemq.com", 1883, keepalive = 600)
client.loop_start()

time.sleep(5)
client.loop_stop()
client.disconnect()

  client = mqtt.Client()


Connected to MQTT Broker!
Subscribing to topics
Successfully subscribed to topics!


<MQTTErrorCode.MQTT_ERR_SUCCESS: 0>

# Screens

In [None]:
import plotly.graph_objects as go
from datetime import datetime
# Sensor data pulling from DB for indoors
data_indoor = FBconn.get('/fake/indoor',None)
data_keys_indoor = list(data_indoor.keys())
data_values_indoor = list(data_indoor.values())
readable_times_indoor = [datetime.utcfromtimestamp(int(ts)).strftime('%H:%M:%S') for ts in data_keys_indoor]

#Sensor data pulling form DB for outdoors
data_outdoor = FBconn.get('/fake/outdoor',None)
data_keys_outdoor = list(data_outdoor.keys())
data_values_outdoor = list(data_outdoor.values())
readable_times_outdoor = [datetime.utcfromtimestamp(int(ts)).strftime('%H:%M:%S') for ts in data_keys_outdoor]

enviorment = ['indoor','outdoor']
sensors = {'indoor':['Distance','Temperature','Humidity','Pressure'] , 'outdoor':['DLIGHT']}
dropdown_env = None
dropdown_sensor = None
#updates Dropbox values
def rs_change(rs):
  return gr.Dropdown(choices = sensors[rs],value=sensors[rs][0])

# Data visualization
def plot_graph(place,name):
    if place == 'indoor':
      val_arr = [value[name] for value in data_values_indoor]
      fig = go.Figure()
      fig.add_trace(go.Scatter(x=readable_times_indoor, y=val_arr, mode='lines+markers', name=name))
      fig.update_layout(title='Sensor {} Over Time'.format(name), xaxis_title='Time', yaxis_title=name)
    elif place == 'outdoor':
      val_arr = [value[name] for value in data_values_outdoor]
      fig = go.Figure()
      fig.add_trace(go.Scatter(x=readable_times_outdoor, y=val_arr, mode='lines+markers', name=name))
      fig.update_layout(title='Sensor {} Over Time'.format(name), xaxis_title='Time', yaxis_title=name)
    return fig

with gr.Blocks() as app:

  rs = gr.Dropdown(choices = enviorment, value='indoor')
  rs_sensors = gr.Dropdown(choices = sensors['indoor'],interactive = True)

  rs.change(fn=rs_change, inputs=rs, outputs=rs_sensors)
  #chk = gr.Dropdown(choices = readable_times_outdoor,interactive = True) for time
  pl = gr.Interface(fn=plot_graph, inputs=[rs ,rs_sensors], outputs=gr.Plot(label="Graph"))


# Gradio interface
#gr.Interface(fn=plot_graph, inputs=[rs ,rs_sensors], outputs=gr.Plot(label="Graph"))

app.launch(inline=True)

In [90]:
import gradio as gr

# User's initial profile
name = "Bob"
coins = 2500

# Create the Gradio interface
with gr.Blocks(theme=gr.themes.Citrus()) as demo:
    gr.Markdown("## 🛒 Shop")
    Total_coins_state = gr.State(coins)

    # User greeting and current coin display
    with gr.Row():
        gr.Markdown(f"Welcome **{name}**")
        current_coins=gr.Markdown(f"Coins: **{coins}** 💰")
    # Checkbox group for selecting rewards
    cart = gr.State([])
    items_to_add = gr.CheckboxGroup(
        ["Free Coffee ☕️ :50 coins", "Free Meal 🍔 :100 coins", "Pizza Party 🍕 :200 coins", "Water Park 💧 :300 coins", "Day Off 😄 :400 coins"],
        label="Choose Items to Add"
    )

    with gr.Row():
        add_button = gr.Button("➕ Add Items to Cart", variant="primary", size="lg")
        delete_button = gr.Button("❌ Clear Cart", variant="secondary")
    cart_display = gr.Markdown("🛒 **Cart is empty**")
    cart_size = gr.Number(label="Cart Size", interactive=False)
    checkout_result = gr.Markdown("")

    #returns a string of all items of the cart, or if it's empty returns empty cart
    def format_cart(cart_list):
        if not cart_list:
            return "🛒 **Cart is empty**"
        return "🛒 **Your Cart:**\n" + "\n".join([f"- {item}" for item in cart_list])
    #adds items to the cart
    def add_items(new_items, previous_cart):
        new_cart = previous_cart + new_items
        return new_cart, format_cart(new_cart), len(new_cart)

    #initiates checkout, sums the toal cost of all items, returns what was boughts and substracts from the user's coins
    def checkout(cart,total_coins):
        if not cart:
            return "❌ Your cart is empty!"
        messages = ["🧾 **Checkout Summary:**"]
        total_cost = 0
        for item in cart:
            match item:
                case "Free Coffee ☕️ :50 coins":
                    messages.append("☕️ Coffee - 50 coins")
                    total_cost += 50
                case "Free Meal 🍔 :100 coins":
                    messages.append("🍔 Meal - 100 coins")
                    total_cost += 100
                case "Pizza Party 🍕 :200 coins":
                    messages.append("🍕 Pizza - 200 coins")
                    total_cost += 200
                case "Water Park 💧 :300 coins":
                    messages.append("💧 Water Park - 300 coins")
                    total_cost += 300
                case "Day Off 😄 :400 coins":
                    messages.append("😄 Day Off - 400 coins")
                    total_cost += 400
                case _:
                    messages.append(f"❓ Unknown item: {item}")

        #checks if the total sum of items in the cart is samller then the amount of coins of the user has right now
        if total_cost > total_coins:
            return "❌ Not enough coins to complete the purchase!",[],format_cart([]),0,total_coins,gr.update(value=f"Coins: **{total_coins}** 💰")
        messages.append(f"\n💰 **Total Cost:** {total_cost} coins")
        return "\n".join(messages),[],format_cart([]),0,total_coins-total_cost,gr.update(value=f"Coins: **{total_coins-total_cost}** 💰")

    #clears the cart
    def delete_cart(cart):
        return [], "🛒 **Cart is empty**", 0

    with gr.Row():
      checkout_button = gr.Button("✅ Checkout", variant="secondary")

    add_button.click(
        fn=add_items,
        inputs=[items_to_add, cart],
        outputs=[cart, cart_display, cart_size]
    )

    checkout_button.click(
        fn=checkout,
        inputs=[cart,Total_coins_state],
        outputs=[checkout_result,cart,cart_display,cart_size,Total_coins_state,current_coins]
    )

    delete_button.click(
        fn=delete_cart,
        inputs=[cart],
        outputs=[cart, cart_display, cart_size]
    )

demo.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3b0d06628630b55e73.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


