<a href="https://colab.research.google.com/github/Cloud-Course-Group-Phoenix/Project-Pheonix/blob/main/HW2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports and Package installations

In [1]:
# pip installs
!pip install firebase
!pip install gradio
!pip install paho-mqtt

#================================= make sure all pip installs are above this line ============================================

# import to clear the installation code output
from IPython.display import clear_output
clear_output()

In [2]:
#imports
import gradio as gr
import json
import time
from firebase import firebase
import paho.mqtt.client as mqtt
import requests
import re
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from urllib.parse import urljoin, urlparse
import nltk
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
DBLink = "https://wordbank-c75f1-default-rtdb.firebaseio.com/"


# Admin panel functions

# Index

In [None]:
class DbService:
    def __init__(self,Dblink):
        self.dbLink= Dblink

    def insert_to_db(self,results):
        FBconn = firebase.FirebaseApplication(self.dbLink,None)
        FBconn.put('/','terms',results)

    def get_from_db(self):
        FBconn = firebase.FirebaseApplication(self.dbLink,None)
        results = FBconn.get('/','terms')
        return results


class QueryService:
    def __init__(self,url):
        self.url = url

    def fetch_page(self):
        response = requests.get(self.url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            return soup
        else:
            return None

    def index_words(self, soup, index = {}, url = ''):
        words = re.findall(r'\w+', soup.get_text())
        for word in words:
            word = word.lower()
            if word in index:
                index[word]["Appearences"] += 1
                # Add URL to docid if it's not already there
                if url and url not in index[word]["DocIDs"]:
                    index[word]["DocIDs"].append(url)
            else:
                # Initialize with count and docid list containing the current URL
                index[word] = {"Appearences": 1, "DocIDs": [url] if url else []}

        return index

    def remove_stop_words(self,index):
      stop_words = set(stopwords.words('english'))
      for stop_word in stop_words:
        if stop_word in index:
          del index[stop_word]
      return index

class Crawler:
  def __init__(self, url):
    self.url = url

  #Fetches all sub urls from a given url
  def get_sub_urls(self, url):
    sub_urls = []
    stack = [url]
    while stack:
      url = stack.pop()
      response = requests.get(url)
      response.raise_for_status()  # Raise an exception for bad responses
      soup = BeautifulSoup(response.content, 'html.parser')
      for link in soup.find_all('a', href=True):
          href = link['href']
          absolute_url = urljoin(url, href)  # Make URL absolute

          if (absolute_url.startswith(url)) and (absolute_url != url) and (absolute_url not in sub_urls):
              sub_urls.append(absolute_url)
              stack.append(absolute_url)

    return sub_urls



def main():
  dbService = DbService(DBLink)
  url = "https://mqtt.org/"
  crawler = Crawler(url)
  rankService = RankService()
  sub_urls = crawler.get_sub_urls(url)
  index = {}
  for sub_url in sub_urls:
    queryService = QueryService(sub_url)
    soup = queryService.fetch_page()
    index = queryService.index_words(soup, index, sub_url)
    index = queryService.remove_stop_words(index)
  dbService.insert_to_db(index)


if __name__ == '__main__':
  main()

#Search Engine UI

In [None]:
from typing_extensions import Never
def search_word(query):
    if not query:
        return "Please enter a search term"

    # Get the index from the database
    dbService = DbService(DBLink)
    index = dbService.get_from_db()

    if not index:
        return "No index found in the database. Please run the indexing process first."

    # Process the query
    query = query.lower().strip()

    # Search for the word in the index
    if query in index:
        # Get the DocIDs (URLs) where this word appears
        urls = index[query]["DocIDs"]
        appearances = index[query]["Appearences"]

        # Format the results
        result = f"Found '{query}' in {len(urls)} pages with {appearances} total appearances:\n\n"
        for i, url in enumerate(urls, 1):
            result += f"{i}. {url}\n"

        return result
    else:
        return f"No results found for '{query}'."

# Create the Gradio interface for the search engine
search_interface = gr.Interface(
    fn=search_word,
    inputs=gr.Textbox(placeholder="Enter a word to search..."),
    outputs=gr.Textbox(label="Search Results", lines=10),
    title="Index Search Engine",
    description="Search for words and find the URLs where they appear.",
    allow_flagging='never',
)

# Launch the search interface
search_interface.launch(inline=True)


The `allow_flagging` parameter in `Interface` is deprecated.Use `flagging_mode` instead.



It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0be240a963dfcaf369.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




# Sensor data caching and processing

In [4]:
# Data proccessing
#def process_data(data):

db_url = "https://project-pheonix-39eef-default-rtdb.europe-west1.firebasedatabase.app/"
FBconn = firebase.FirebaseApplication(db_url,None)

# Data saving in DB
def send_to_db(path, data):
  FBconn.put('/',f'/fake/{path}',data)
  return

# create an mqtt connection
def on_connect(client, userdata, flags, rc):
  if rc == 0:
    print("Connected to MQTT Broker!\nSubscribing to topics")

    # Subscribe to the relevant topics
    client.subscribe("braude/D106/indoor")
    client.subscribe("braude/D106/outdoor")

    print("Successfully subscribed to topics!")
  else:
    print(f"Failed to connect, return code {rc}")

def on_disconnect(client, userdata, rc):
  if rc != 0:
    for i in range(5):
      print(f"Unexpected disconnection (error code: {rc}). Attempting to reconnect number {i + 1} in 5 seconds...")
      time.sleep(5)
      try:
        client.reconnect()
      except Exception as e:
        print(f"Reconnection attempt failed: {e}")

def on_message(client, userdata, msg):

  topic = msg.topic
  payload = msg.payload.decode('utf-8')  # Decode the byte string to a string

  print(f"Received JSON message on topic '{topic}': {payload}")
  entry = None
  try:
    sensor_data = json.loads(payload)
    print(f"Parsed JSON data: {sensor_data}")

    if topic == "braude/D106/indoor":
      send_to_db(f"indoor/{int(time.time())}", sensor_data)

    elif topic == "braude/D106/outdoor":
      send_to_db(f"outdoor/{int(time.time())}", sensor_data)

  except json.JSONDecodeError as e:
    print(f"Error decoding JSON: {e}")
    print(f"Problematic payload: {payload}")

# connect to the MQTT publisher
client = mqtt.Client()
client.on_connect = on_connect
client.on_disconnect = on_disconnect
client.on_message = on_message
client.connect("broker.hivemq.com", 1883, keepalive = 600)
client.loop_start()

time.sleep(5)
client.loop_stop()
client.disconnect()

  client = mqtt.Client()


Connected to MQTT Broker!
Subscribing to topics
Successfully subscribed to topics!


<MQTTErrorCode.MQTT_ERR_SUCCESS: 0>

# Screens

In [20]:
import plotly.graph_objects as go
from datetime import datetime
# Sensor data pulling from DB for indoors
data_indoor = FBconn.get('/fake/indoor',None)
data_keys_indoor = list(data_indoor.keys())
data_values_indoor = list(data_indoor.values())
readable_times_indoor = [datetime.utcfromtimestamp(int(ts)).strftime('%H:%M:%S') for ts in data_keys_indoor]

#Sensor data pulling form DB for outdoors
data_outdoor = FBconn.get('/fake/outdoor',None)
data_keys_outdoor = list(data_outdoor.keys())
data_values_outdoor = list(data_outdoor.values())
readable_times_outdoor = [datetime.utcfromtimestamp(int(ts)).strftime('%H:%M:%S') for ts in data_keys_outdoor]

enviorment = ['indoor','outdoor']
sensors = {'indoor':['Distance','Temperature','Humidity','Pressure'] , 'outdoor':['DLIGHT']}
dropdown_env = None
dropdown_sensor = None
#updates Dropbox values
def rs_change(rs):
  return gr.Dropdown(choices = sensors[rs],value=sensors[rs][0])

# Data visualization
def plot_graph(place,name):
    if place == 'indoor':
      val_arr = [value[name] for value in data_values_indoor]
      fig = go.Figure()
      fig.add_trace(go.Scatter(x=readable_times_indoor, y=val_arr, mode='lines+markers', name=name))
      fig.update_layout(title='Sensor {} Over Time'.format(name), xaxis_title='Time', yaxis_title=name)
    elif place == 'outdoor':
      val_arr = [value[name] for value in data_values_outdoor]
      fig = go.Figure()
      fig.add_trace(go.Scatter(x=readable_times_outdoor, y=val_arr, mode='lines+markers', name=name))
      fig.update_layout(title='Sensor {} Over Time'.format(name), xaxis_title='Time', yaxis_title=name)
    return fig

with gr.Blocks() as app:

  rs = gr.Dropdown(choices = enviorment, value='indoor')
  rs_sensors = gr.Dropdown(choices = sensors['indoor'],interactive = True)

  rs.change(fn=rs_change, inputs=rs, outputs=rs_sensors)

  pl = gr.Interface(fn=plot_graph, inputs=[rs ,rs_sensors], outputs=gr.Plot(label="Graph"))


# Gradio interface
#gr.Interface(fn=plot_graph, inputs=[rs ,rs_sensors], outputs=gr.Plot(label="Graph"))

app.launch(inline=True)

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://22bfd835b8f957e757.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


