<a href="https://colab.research.google.com/github/RamiAmasha31/CloudCourse/blob/main/TencentSearchAPI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Cloud Course Course
---
___
Tencent API
---
___
This notebook holds the function that will query and search throught the indexed the pages

Initialize Libraries and Firebase connection

In [None]:
!pip install firebase



Collecting firebase
  Downloading firebase-4.0.1-py3-none-any.whl (12 kB)
Installing collected packages: firebase
Successfully installed firebase-4.0.1


In [None]:

import hashlib
from datetime import datetime


#Prepare Firebase
from firebase import firebase
fbConn = firebase.FirebaseApplication('https://braudecloud-18-02-2024-default-rtdb.europe-west1.firebasedatabase.app/',None)
#fbConn = firebase.FirebaseApplication('https://smart-howl-250311.firebaseio.com/',None)


In [None]:
# @title Search Words

def get_indexed_pages(query):

  #Get the data for the given query
  indexed_pages = fbConn.get('/index/' + query, None)
  hashed_titles_unsorted = list(indexed_pages.keys())
  hashed_titles_sorted = sorted(hashed_titles_unsorted, key=lambda x: indexed_pages[x]['count'], reverse=True)
  return hashed_titles_sorted

def get_pages_info(hashed_urls):
  links = []

  # Iterate over each hashed URL
  for hashed_url in hashed_urls:
    if hashed_url is not None:
    # Retrieve the URL from the pages node using the hashed URL
      page_data = fbConn.get('/pages/' + hashed_url, None)

      if page_data:
        links.append(page_data)

  if links:
    return links
  else:
    return []


search_word_from_input = "cloud" # @param {type:"string"}


hashed_pages = get_indexed_pages(search_word_from_input)
links = get_pages_info(hashed_pages[:5])


for link in links:
    print(link)



{'title': 'Tencent - Tencent 腾讯', 'url': 'https://www.tencent.com/en-us/about.html'}


In [None]:
# Example usage
# @title Add Index Word
new_index = "business" # @param {type:"string"}


def hash_word(word_to_hash):
  return hashlib.md5(word_to_hash.encode()).hexdigest()

def add_index_word(word):
  hashed_word = hash_word(word)
  data_to_upload = {
    "term": word
  }
  result = fbConn.put('/words', hashed_word,data=data_to_upload)

#add_index_word(new_index)

queries = ["tencent","cloud","games","studio","holdings","business","china","nvidia","graphics","development"]
for word in queries:
  add_index_word(word)

In [None]:
# @title Word Frequency Data

def get_index_data():
    data = fbConn.get('index',None)
    #keys = list(data.keys()) if data else []
    return data

def getAverageFrequencyData():
  word_data = get_index_data()
  average_frequency = {}
  for word, links in word_data.items():
    total_count = sum(count_dict['count'] for count_dict in links.values())
    num_links = len(links)
    average_frequency[word] = round(total_count / num_links)

  chart_data = []
  for word, frequency in average_frequency.items():
    chart_data.append({'x': len(word_data[word]), 'y':frequency , 'label': word})

  return chart_data

def getTotalFrequencyData():
  word_data = get_index_data()
  total_frequency = {}
  for word, links in word_data.items():
    total_frequency[word] = sum(count_dict['count'] for count_dict in links.values())

  chart_data = []
  for word, frequency in total_frequency.items():
    chart_data.append({'x': len(word_data[word]) , 'y':frequency, 'label': word})

  return chart_data

print(getAverageFrequencyData())
print(getTotalFrequencyData())


[{'x': 16, 'y': 2, 'label': 'china'}, {'x': 13, 'y': 7, 'label': 'cloud'}, {'x': 16, 'y': 26, 'label': 'tencent'}]
[{'x': 16, 'y': 30, 'label': 'china'}, {'x': 13, 'y': 88, 'label': 'cloud'}, {'x': 16, 'y': 415, 'label': 'tencent'}]


Users
---
___


In [None]:
# @title Sign up
def hash_user(user,password):
  posthash = user+password
  return hashlib.md5(posthash.encode()).hexdigest()

def sign_up(username,password):
  hashed_user = hash_user(username,password)
  data_to_upload = {
    "username": username,
    "password": password
  }
  result = fbConn.put('/users',hashed_user,data=data_to_upload)
  print(result)



sign_up("ward","pass")

{'password': 'pass', 'username': 'ward'}


In [None]:
# @title Sign in
def hash_user(user,password):
  prehash = user+password
  return hashlib.md5(prehash.encode()).hexdigest()

def sign_in(username,password):
  hashed_user = hash_user(username,password)

  user_data = fbConn.get('/users/' + hashed_user, None)
  if user_data is not None:
    session_data={
        "user": hashed_user,
        "start_time": datetime.now()
    }
    result = fbConn.post("sessions", data=session_data)
    return result['name']
  else:
    return None



sign_in("ward","pass")

'-Nu0_JAk68P1myeefJZ-'

In [None]:
# @title New Word Search. Also adds Session

def sort_pages(pages_to_sort):
  sorted_pages=[]
  #print(pages_to_sort)
  for key in pages_to_sort:
    path='pointed/' + key +'/pointed_from'
    array_data = fbConn.get(path, None)
    #print(path," ",array_data)
    if array_data:
        length = len(array_data)
        count = length if length > 0 else 1
        pages_to_sort[key]['count'] = count * pages_to_sort[key]['count']

  #print(pages_to_sort)
  #Sort Pages
  if pages_to_sort:
    hashed_titles_unsorted = list(pages_to_sort.keys())
    hashed_titles_sorted = sorted(hashed_titles_unsorted, key=lambda x: pages_to_sort[x]['count'], reverse=True)
    sorted_pages = hashed_titles_sorted
  return sorted_pages

def get_indexed_pages(query,session):
  #Get the data for the given query
  indexed_pages = fbConn.get('/index/' + query, None)

  link_hashes = sort_pages(indexed_pages)

  #Add the query and response to a session
  session_data={
      "action": "search",
      "datetime": datetime.now(),
      "input": query,
      "output": link_hashes
  }
  fbConn.post("/sessions/"+session, data=session_data)

  return link_hashes

def get_pages_info(hashed_urls):
  links = []

  # Iterate over each hashed URL
  for hashed_url in hashed_urls:
    if hashed_url is not None:
    # Retrieve the URL from the pages node using the hashed URL
      page_data = fbConn.get('/pages/' + hashed_url, None)
      if page_data:
        links.append(page_data)

  if links:
    return links
  else:
    return []


hashed_pages = get_indexed_pages("china","-Nu0_JAk68P1myeefJZ-")
links = get_pages_info(hashed_pages[:5])


for link in links:
    print(link)


{'title': 'Media - Tencent 腾讯', 'url': 'https://www.tencent.com/en-us/media/news.html?type=financial'}
{'title': 'Media - Tencent 腾讯', 'url': 'https://www.tencent.com/en-us/media/news.html?type=media'}
{'title': 'Investors - Tencent 腾讯', 'url': 'https://www.tencent.com/en-us/investors/board-members.html'}
{'title': 'Tencent 腾讯', 'url': 'https://www.tencent.com/en-us/index.html'}
{'title': 'Employees - Tencent 腾讯', 'url': 'https://www.tencent.com/en-us/employees.html#staff-con-4'}
