In [72]:
#Coding Exercise: Decoding a Secret Msg
#Python
#Aubay Azzarouk 

#Problem: 
#Decode Google Doc containing Unicode Characters, and their positions in a 2-d Grid.

#Your task is to write a function that takes in a URL f
#for such a google doc as an arg, 
#retrieves and parses the data in the doc,
#prints the grid of characters. 

#When printed in a fixed-width font, the charactesr in the grid will form a graphic showing a 
#sequence of uppercase letters, which is the secret message.

''' Key points: 

- The document specifies the Unicode characters in the grid, along with x, y coordinates of each charcter. 
- The min possible value of these coordinates is 0, 
there is no max possible values, 
- so the grid can be arbitrarily large. 
- Any positions in the grid that do not have a specified character, 
- should be filled with a space character. 
- You can assume the doc will always have the same format 
- as the example doc linked above. 

- Helper functions , w / at least one func. 
- takes in one arg, string. url, w / input data. 
- When called, prints the grid of characters specified by input data, displaying 
- graphic of correctly oriented up case leters. 

'''


' Key points: \n\n- The document specifies the Unicode characters in the grid, along with x, y coordinates of each charcter. \n- The min possible value of these coordinates is 0, \nthere is no max possible values, \n- so the grid can be arbitrarily large. \n- Any positions in the grid that do not have a specified character, \n- should be filled with a space character. \n- You can assume the doc will always have the same format \n- as the example doc linked above. \n\n- Helper functions , w / at least one func. \n- takes in one arg, string. url, w / input data. \n- When called, prints the grid of characters specified by input data, displaying \n- graphic of correctly oriented up case leters. \n\n'

In [73]:
pip install bs4


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [74]:
#Using BeautifulSoup class to extract/parse the HTML google doc: 
from bs4 import BeautifulSoup # version 4 for update. 
import httpx #import http client lib for data extraction. 

#Define google doc - url: 
#Url of Google Doc to decode
google_doc = "https://docs.google.com/document/d/e/2PACX-1vRMx5YQlZNa3ra8dYYxmv-QIQ3YJe8tbI3kqcuC7lQiZm-CSEznKfN_HYNSpoXcZIV3Y_O3YoUB1ecq/pub"
#read_google_doc(google_doc) 

#Modularized Code to decode a secret msg.

#Helper functions: 
def fetch_html(url: str) -> str:
    '''fetch raw html content from input url'''
    http_data = httpx.get(url) #store call to fetch data via http get request 
    http_data.raise_for_status() #check if valid (general)
    return http_data.text #return html content (as str format)

def parse_html_table(html:str) -> list: 
    ''' parse html table to extract data and return as lst (x,y,char) tuple
        Assume ech row has 3 <td> elements. x,y coord and char. 
        *remove [1:] slicing if first row of table is NOT header*'''
    
    #define b.soup object as (html (content,parser) -> tree)
    parsed_html_tree = BeautifulSoup(html, 'html.parser')
    # find all <tr> (tablerow) elements, and skip header row(1)
    tr_occurences = parsed_html_tree.find_all('tr')[1:]

    coordinates = []
    #iterate through each table row elements and extract x,y, and char values into list.
    for r in tr_occurences: 
        element_list = r.find_all('td')
        if len(element_list) == 3: #strict 
            x = int(element_list[0].text.strip())
            char = element_list[1].text.strip() #extract char text (2nd) <td> element.
            y = int(element_list[2].text.strip())
            coordinates.append((x,y,char)) #(x,y,char) tuple.

    return coordinates

def build_grid(coords: list) -> list:
    '''Building 2-dim grid from allocated character indices'''

    #determine size of the grid based on max x,y vals. 
    max_x = max(x for x, y, _ in coords)
    max_y = max(y for x,y,_ in coords)

    #grid = list of lists, with space and max rows/cols.
    grid = [[' ' for _ in range(max_x + 1)] for _ in range(max_y + 1)]

    #place char in correspondence to proper x,y coordinate
    for x,y,char in coords:
        grid[y][x] = char

    return grid 

#reversed grid print (decoded msg (letter 'F'))          
def print_grid_as_msg(grid:list): 
    '''Print Grid in top-down orientation'''
    #flip rows/y-values from top to bottom s.t y = 0 is at the bottom w/ respect to cartesian grids.
    for i,row in enumerate(reversed(grid)):
        #print row id (00 via 02d) alongside correct labels post-reversal of bottom-top coord system.
        print(f"{len(grid) - 1 - i:02d} {''.join(row)}")
    
#Main
def read_google_doc(url_id: str):
    html = fetch_html(url_id) #store call to fetch data via http get request 
    coords = parse_html_table(html)
    grid = build_grid(coords)
    print_grid_as_msg(grid)

read_google_doc(google_doc)

#test:

new_msg = 'https://docs.google.com/document/d/e/2PACX-1vTER-wL5E8YC9pxDx43gk8eIds59GtUUk4nJo_ZWagbnrH0NFvMXIw6VWFLpf5tWTZIT9P9oLIoFJ6A/pub'

read_google_doc(new_msg)


    

02 █▀▀▀
01 █▀▀ 
00 █   
06 ██░    ███░ ██████░    ███████░  ██░           ███░ ██████████░ ████████░    ████████░  
05 ██░  ███░     ██░    ███░    ██░ ███░   ███░   ██░  ██░         ██░     ██░  ██░     ██░
04 ██░███░       ██░   ███░          ██░  █████░ ███░  ██░         ██░      ██░ ██░     ██░
03 ████░         ██░   ██░           ███░ ██░██░ ██░   ████████░   ██░      ██░ ████████░  
02 ██░███░       ██░   ███░           ██░██░ ██░██░    ██░         ██░      ██░ ██░     ██░
01 ██░  ███░     ██░    ███░    ██░   ████░   ████░    ██░         ██░     ██░  ██░     ██░
00 ██░    ███░ ██████░    ███████░     ██░     ██░     ██████████░ ████████░    ████████░  
