# Markdown to Google Docs Converter

This notebook converts markdown meeting notes into a well-formatted Google Doc with proper heading styles, nested bullet points, checkboxes, and styled mentions.

In [27]:
# Import libraries for Google Docs API, file upload, and retry handling
import os
import re
import time
import socket

from google.colab import auth, files
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

In [28]:
# Authenticate the current Colab session with a Google account
try:
    auth.authenticate_user()
    print("✅ Authentication successful!")
except Exception as e:
    print(
        "❌ Authentication failed. "
        "Ensure you are logged into a Google account in this Colab session.\n"
        f"Error: {e}"
    )

✅ Authentication successful!


In [29]:
def parse_markdown_line(line: str):
    """
    Parse a single markdown line to detect style, indentation, and list type.

    Returns:
        (text, style, indent_level, is_bullet, is_checkbox)
    """
    stripped_line = line.lstrip()
    if not stripped_line:
        return None

    # Calculate indentation depth (assuming 2 spaces per level)
    leading_spaces = len(line) - len(stripped_line)
    indent_level = leading_spaces // 2

    # 1. Heading detection (# to ######)
    heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped_line)
    if heading_match:
        return (
            heading_match.group(2),
            f'HEADING_{len(heading_match.group(1))}',
            0,
            False,
            False,
        )

    # 2. Checkbox detection
    if stripped_line.startswith('- [ ]') or stripped_line.startswith('- [x]'):
        clean_text = re.sub(r'^-\s\[[ xX]\]\s+', '', stripped_line)
        return clean_text, 'NORMAL_TEXT', indent_level, False, True

    # 3. Bullet detection (* or -)
    bullet_match = re.match(r'^[-*]\s+(.*)', stripped_line)
    if bullet_match:
        return bullet_match.group(1), 'NORMAL_TEXT', indent_level, True, False

    # Default case
    return stripped_line, 'NORMAL_TEXT', 0, False, False

In [30]:
def execute_with_backoff(request_func):
    """
    Execute a Google Docs API request with basic retry logic for
    rate limits or transient network errors.
    """
    for attempt in range(3):  # Try up to 3 times
        try:
            return request_func.execute()
        except HttpError as e:
            if e.resp.status in [429, 500, 503]:
                # Exponential backoff for retriable status codes
                time.sleep(2 ** attempt)
                continue
            # Non-retriable API error
            raise
        except (socket.timeout, OSError):
            # Retry on transient network issues
            time.sleep(2 ** attempt)
            continue
    raise RuntimeError("Request failed after multiple retries.")

In [31]:
def convert_md_to_google_doc():
    """
    Upload a markdown (.md) file, convert it into a formatted Google Doc,
    and apply headings, nested bullet lists, checkboxes, mentions, and footer styling.

    Handles:
        - Missing file selection
        - Google Docs API errors
        - Transient network errors via execute_with_backoff
    """
    # --- Step 1: File input ---
    print("Upload your .md file:")
    uploaded_file = files.upload()
    if not uploaded_file:
        print("Error: No file selected.")
        return

    file_name = list(uploaded_file.keys())[0]
    content = uploaded_file[file_name].decode("utf-8")

    # --- Step 2: Initialize API ---
    try:
        service = build("docs", "v1")
    except Exception as e:
        print(f"Error initializing Google Docs API client: {e}")
        return

    try:
        doc = service.documents().create(
            body={"title": file_name.replace(".md", "")}
        ).execute()
        doc_id = doc.get("documentId")
    except Exception as e:
        print(f"Error creating Google Doc: {e}")
        return

    try:
        # --- Step 3: Build requests from markdown ---
        requests = []

        # Google Docs uses 1-based indices for content positions
        current_index = 1
        lines = content.split("\n")

        for line in lines:
            parsed = parse_markdown_line(line)

            # Handle empty lines for spacing
            if not parsed:
                requests.append({
                    "insertText": {
                        "location": {"index": current_index},
                        "text": "\n"
                    }
                })
                current_index += 1
                continue

            clean_text, style, indent, is_bullet, is_checkbox = parsed
            full_text = clean_text + "\n"

            # 1. Insert Text
            requests.append({
                "insertText": {
                    "location": {"index": current_index},
                    "text": full_text
                }
            })

            # 2. Heading Styles
            if style.startswith("HEADING"):
                requests.append({
                    "updateParagraphStyle": {
                        "range": {
                            "startIndex": current_index,
                            "endIndex": current_index + len(full_text)
                        },
                        "paragraphStyle": {"namedStyleType": style},
                        "fields": "namedStyleType"
                    }
                })

            # 3. Lists and Nested Indentation
            if is_bullet or is_checkbox:
                preset = "BULLET_CHECKBOX" if is_checkbox else "BULLET_DISC_CIRCLE_SQUARE"
                requests.append({
                    "createParagraphBullets": {
                        "range": {
                            "startIndex": current_index,
                            "endIndex": current_index + len(full_text)
                        },
                        "bulletPreset": preset
                    }
                })

                if indent > 0:
                    requests.append({
                        "updateParagraphStyle": {
                            "range": {
                                "startIndex": current_index,
                                "endIndex": current_index + len(full_text)
                            },
                            "paragraphStyle": {
                                "indentFirstLine": {
                                    "magnitude": indent * 36,
                                    "unit": "PT"
                                },
                                "indentStart": {
                                    "magnitude": indent * 36,
                                    "unit": "PT"
                                }
                            },
                            "fields": "indentFirstLine,indentStart"
                        }
                    })

            # 4. Stylize Mentions (@name)
            for m in re.finditer(r"@\w+", full_text):
                requests.append({
                    "updateTextStyle": {
                        "range": {
                            "startIndex": current_index + m.start(),
                            "endIndex": current_index + m.end()
                        },
                        "textStyle": {
                            "bold": True,
                            "foregroundColor": {
                                "color": {
                                    "rgbColor": {"red": 0.1, "green": 0.2, "blue": 0.7}
                                }
                            }
                        },
                        "fields": "bold,foregroundColor"
                    }
                })

            # 5. Footer Styling
            footer_keywords = ["recorded by", "duration", "---"]
            if any(key in line.lower() for key in footer_keywords):
                requests.append({
                    "updateTextStyle": {
                        "range": {
                            "startIndex": current_index,
                            "endIndex": current_index + len(full_text)
                        },
                        "textStyle": {
                            "italic": True,
                            "fontSize": {"magnitude": 9, "unit": "PT"},
                            "foregroundColor": {
                                "color": {
                                    "rgbColor": {"red": 0.4, "green": 0.4, "blue": 0.4}
                                }
                            }
                        },
                        "fields": "italic,fontSize,foregroundColor"
                    }
                })

            current_index += len(full_text)

        # Execute all formatting requests at once
        execute_with_backoff(
            service.documents().batchUpdate(
                documentId=doc_id, body={"requests": requests}
            )
        )

        print("\n✅ SUCCESS!")
        print(f"Link: https://docs.google.com/document/d/{doc_id}/edit")

    except HttpError as err:
        print(f"❌ API ERROR: {err.resp.status} - {err._get_reason()}")
    except socket.timeout:
        print("❌ ERROR: Connection timed out.")
    except Exception as e:
        print(f"❌ UNEXPECTED ERROR: {e}")

In [32]:
# Run the full markdown → Google Doc conversion
convert_md_to_google_doc()

Upload your .md file:




Saving meeting.md to meeting (7).md





✅ SUCCESS!
Link: https://docs.google.com/document/d/1Ov4OwQ9LNRNA6uWvVz_o7E2FL7O-wMBKzmejZ_foChA/edit
