<a href="https://colab.research.google.com/github/Avina20/Steganography/blob/main/steganography.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import requests
import re
from typing import List, Tuple, Dict

In [3]:
def parse_character_data(html_content: str) -> List[Tuple[int, str, int]]:
    """
    Parse character data from the HTML content of the Google Doc.

    Returns:
        List of tuples containing (x_coord, character, y_coord)
    """
    characters = []

    # Try to parse as HTML table first
    table_pattern = r'<table[^>]*>(.*?)</table>'
    table_match = re.search(table_pattern, html_content, re.DOTALL | re.IGNORECASE)

    if table_match:
        table_content = table_match.group(1)
        # Extract table rows
        row_pattern = r'<tr[^>]*>(.*?)</tr>'
        rows = re.findall(row_pattern, table_content, re.DOTALL | re.IGNORECASE)

        for i, row in enumerate(rows):
            if i == 0:  # Skip header row
                continue

            # Extract cell contents
            cell_pattern = r'<td[^>]*>(.*?)</td>'
            cells = re.findall(cell_pattern, row, re.DOTALL | re.IGNORECASE)

            if len(cells) >= 3:
                try:
                    # Clean cell contents
                    x_str = re.sub(r'<[^>]+>', '', cells[0]).strip()
                    char_str = re.sub(r'<[^>]+>', '', cells[1]).strip()
                    y_str = re.sub(r'<[^>]+>', '', cells[2]).strip()

                    x_coord = int(x_str)
                    y_coord = int(y_str)

                    if char_str:  # Make sure character is not empty
                        characters.append((x_coord, char_str, y_coord))
                except (ValueError, IndexError):
                    continue

    # If no table found or no data extracted, try plain text parsing
    if not characters:
        # Remove HTML tags and get plain text
        clean_text = re.sub(r'<[^>]+>', '\n', html_content)
        lines = [line.strip() for line in clean_text.split('\n') if line.strip()]

        # Look for data in sequence: number, character, number
        i = 0
        while i < len(lines) - 2:
            try:
                # Check if we have a sequence of: number, character, number
                if (lines[i].lstrip('-').isdigit() and
                    len(lines[i+1]) == 1 and
                    lines[i+2].lstrip('-').isdigit()):

                    x_coord = int(lines[i])
                    char = lines[i+1]
                    y_coord = int(lines[i+2])

                    characters.append((x_coord, char, y_coord))
                    i += 3
                else:
                    i += 1
            except (ValueError, IndexError):
                i += 1

    return characters


In [4]:
def parse_google_doc_grid(url: str, debug: bool = False) -> None:
    """
    Reads a Google Doc containing Unicode characters and their 2D grid positions,
    then prints the grid with characters forming a graphic message.

    Args:
        url (str): URL to the Google Doc (should be a /pub URL for public access)
        debug (bool): If True, print debug information

    The document format should contain:
    - x-coordinate: horizontal position (0 = leftmost)
    - Character: Unicode character to place
    - y-coordinate: vertical position (0 = bottom)

    Coordinate system: (0,0) is bottom-left, x increases left-to-right, y increases bottom-to-top
    """

    # Fetch the document content
    try:
        response = requests.get(url)
        response.raise_for_status()
        html_content = response.text

        if debug:
            print("Raw HTML content (first 1000 chars):")
            print(html_content[:1000])
            print("\n" + "="*50 + "\n")

    except requests.RequestException as e:
        print(f"Error fetching document: {e}")
        return

    # Parse the character data from the HTML
    characters = parse_character_data(html_content)

    if debug:
        print(f"Parsed characters: {characters}")
        print("\n" + "="*50 + "\n")

    if not characters:
        print("No character data found in the document")

        # Try to show what we did find for debugging
        if debug:
            clean_text = re.sub(r'<[^>]+>', '\n', html_content)
            lines = [line.strip() for line in clean_text.split('\n') if line.strip()]
            print("Clean text lines:")
            for i, line in enumerate(lines[:20]):  # Show first 20 lines
                print(f"{i}: '{line}'")
        return

    # Create and print the grid
    print_grid(characters)

In [5]:
def print_grid(characters: List[Tuple[int, str, int]]) -> None:
    """
    Print the 2D grid with characters at their specified positions.

    Args:
        characters: List of (x_coord, character, y_coord) tuples
    """

    if not characters:
        return

    # Find the bounds of the grid
    min_x = min(char[0] for char in characters)
    max_x = max(char[0] for char in characters)
    min_y = min(char[2] for char in characters)
    max_y = max(char[2] for char in characters)

    # Create a dictionary for fast lookup
    char_dict: Dict[Tuple[int, int], str] = {}
    for x, char, y in characters:
        char_dict[(x, y)] = char

    # Print the grid (y decreases from top to bottom for display)
    # Since (0,0) is bottom-left, we print from max_y down to min_y
    for y in range(max_y, min_y - 1, -1):
        row = ""
        for x in range(min_x, max_x + 1):
            if (x, y) in char_dict:
                row += char_dict[(x, y)]
            else:
                row += " "
        print(row.rstrip())  # Remove trailing spaces

    print(f"\nGrid dimensions: {max_x - min_x + 1} x {max_y - min_y + 1}")
    print(f"Total characters: {len(characters)}")

In [6]:
example_url = "" #enter your google doc link here

print("Parsing Google Doc and rendering grid...")
print("=" * 50)

# First try with debug to see what's happening
parse_google_doc_grid(example_url, debug=True)

Parsing Google Doc and rendering grid...
Raw HTML content (first 1000 chars):
<!DOCTYPE html><html><head><title>Coding assessment input data</title><link rel="shortcut icon" href="https://ssl.gstatic.com/docs/documents/images/kix-favicon-2023q4.ico"><meta name="referrer" content="origin"><script nonce="O3353z-Oq1dd7kHWJJKFQw">
        var DOCS_timing = {};
        var DOCS_drawing_load = {};
        var DOCS_drawing_decode = {};

        function _DOCS_record_fn(id) {
          if (window.performance && window.performance.now) {
            DOCS_drawing_load[id] = performance.now();
            const imgEl = document.getElementById(id);
            if (imgEl && imgEl.decode) {
              imgEl.decode().then(() => {
                DOCS_drawing_decode[id] = performance.now();
                if (typeof DOCS_notifyDrawingDecode === 'function') {
                  DOCS_notifyDrawingDecode(id);
                }
              });
            }
          }

          if (typeof DOCS_noti