In [16]:
!pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib --quiet

from google.colab import auth
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials

import re

In [17]:
auth.authenticate_user()

# If you have previously downloaded credentials, you could load them.
# For simplicity, we let `auth.authenticate_user()` handle the login in Colab.

import google.auth
creds, project_id = google.auth.default()
from googleapiclient.discovery import build
docs_service = build('docs', 'v1', credentials=creds)

In [18]:
markdown_text = """
# Product Team Sync - May 15, 2023

## Attendees
- Sarah Chen (Product Lead)
- Mike Johnson (Engineering)
- Anna Smith (Design)
- David Park (QA)

## Agenda

### 1. Sprint Review
* Completed Features
  * User authentication flow
  * Dashboard redesign
  * Performance optimization
    * Reduced load time by 40%
    * Implemented caching solution
* Pending Items
  * Mobile responsive fixes
  * Beta testing feedback integration

### 2. Current Challenges
* Resource constraints in QA team
* Third-party API integration delays
* User feedback on new UI
  * Navigation confusion
  * Color contrast issues

### 3. Next Sprint Planning
* Priority Features
  * Payment gateway integration
  * User profile enhancement
  * Analytics dashboard
* Technical Debt
  * Code refactoring
  * Documentation updates

## Action Items
- [ ] @sarah: Finalize Q3 roadmap by Friday
- [ ] @mike: Schedule technical review for payment integration
- [ ] @anna: Share updated design system documentation
- [ ] @david: Prepare QA resource allocation proposal

## Next Steps
* Schedule individual team reviews
* Update sprint board
* Share meeting summary with stakeholders

## Notes
* Next sync scheduled for May 22, 2023
* Platform demo for stakeholders on May 25
* Remember to update JIRA tickets

---
Meeting recorded by: Sarah Chen
Duration: 45 minutes
"""

In [19]:
# =======================================
# Step 4: Utility - Parse Markdown Lines
# =======================================
def parse_markdown_lines(markdown_text):
    """
    Very basic parser to interpret:
      - Heading levels (#, ##, ###)
      - Bulleted items (* or -)
      - Checkboxes (- [ ])
      - Distinguish main vs sub bullets based on indentation
      - Extract special 'mentions' like @name
    Returns a list of (line_type, content, indent_level, checkbox, mentions)
    """
    lines = markdown_text.split('\n')

    parsed_lines = []
    for line in lines:
        # Trim trailing spaces
        raw_line = line.rstrip()
        if not raw_line:
            # Skip empty lines
            continue

        if raw_line.strip() in ('---', '--'):
            continue

        # Detect heading (simple approach)
        heading_match = re.match(r'^(#+)\s+(.*)', raw_line)
        if heading_match:
            hashes = heading_match.group(1)
            content = heading_match.group(2).strip()
            level = len(hashes)  # 1 -> H1, 2 -> H2, 3 -> H3
            parsed_lines.append({
                'type': f'heading{level}',
                'content': content,
                'indent': 0,
                'checkbox': False,
                'mentions': re.findall(r'(@[A-Za-z0-9-_]+)', content)
            })
            continue

        # Detect bullet or checkbox
        bullet_match = re.match(r'^(\s*)([-|\*])(\s*)(.*)', raw_line)
        if bullet_match:
            indent_spaces = len(bullet_match.group(1))
            content_part = bullet_match.group(4)
            # Check if it's a checkbox pattern: - [ ] or - [x] etc.
            checkbox_match = re.match(r'^\[.\]\s*(.*)', content_part)
            if checkbox_match:
                # It's a checkbox. We only handle '- [ ]' as "unchecked" in this example
                checkbox_text = checkbox_match.group(1).strip()
                # Mentions
                mentions_found = re.findall(r'(@[A-Za-z0-9-_]+)', checkbox_text)
                parsed_lines.append({
                    'type': 'checkbox',
                    'content': checkbox_text,
                    'indent': indent_spaces,
                    'checkbox': True,
                    'mentions': mentions_found
                })
            else:
                # Normal bullet
                mentions_found = re.findall(r'(@[A-Za-z0-9-_]+)', content_part)
                parsed_lines.append({
                    'type': 'bullet',
                    'content': content_part.strip(),
                    'indent': indent_spaces,
                    'checkbox': False,
                    'mentions': mentions_found
                })
            continue

        # Detect sub-bullets with '*'
        star_bullet_match = re.match(r'^(\s*)(\*)(\s*)(.*)', raw_line)
        if star_bullet_match:
            indent_spaces = len(star_bullet_match.group(1))
            content_part = star_bullet_match.group(4)
            mentions_found = re.findall(r'(@[A-Za-z0-9-_]+)', content_part)
            parsed_lines.append({
                'type': 'bullet',
                'content': content_part.strip(),
                'indent': indent_spaces,
                'checkbox': False,
                'mentions': mentions_found
            })
            continue

        # If none of the above, treat as normal text
        mentions_found = re.findall(r'(@[A-Za-z0-9-_]+)', raw_line)
        parsed_lines.append({
            'type': 'paragraph',
            'content': raw_line.strip(),
            'indent': 0,
            'checkbox': False,
            'mentions': mentions_found
        })

    return parsed_lines

In [20]:
# ===================================================
# Step 5: Building the Requests for the Docs API
# ===================================================
def build_requests(parsed_lines):
    """
    Builds a list of requests compatible with the Docs API batchUpdate.
    - Insert text line by line
    - Apply paragraph style (headings, bullets, etc.)
    - Convert checkboxes
    - Style mentions
    - For simplicity, we insert line by line with a newline
    """
    requests = []

    # Track the current index (where the next text insertion will happen)
    current_index = 1  # Document start index is 1 (0 is the start of body)

    for line_info in parsed_lines:
        content = line_info['content']
        line_type = line_info['type']
        indent = line_info['indent']
        mentions = line_info['mentions']

        # 1. Insert the text (with a trailing newline)
        requests.append({
            'insertText': {
                'location': {
                    'index': current_index
                },
                'text': content + '\n'
            }
        })

        inserted_chars = len(content) + 1  # +1 for newline
        start_index = current_index
        end_index = current_index + inserted_chars  # Exclusive
        current_index = end_index

        # 2. Depending on line type, apply text/paragraph style
        if line_type.startswith('heading'):
            # heading1, heading2, heading3
            level = line_type[-1]  # '1', '2', or '3'
            heading_style = f'HEADING_{level}'
            requests.append({
                'updateParagraphStyle': {
                    'range': {
                        'startIndex': start_index,
                        'endIndex': end_index
                    },
                    'paragraphStyle': {
                        'namedStyleType': heading_style
                    },
                    'fields': 'namedStyleType'
                }
            })
        elif line_type == 'bullet' or line_type == 'checkbox':
            # Make it a bullet list.
            # If it's a checkbox, we change the glyph type to CHECKBOX
            if line_type == 'checkbox':
                glyph_type = 'BULLET_CHECKBOX'
            else:
              glyph_type = 'BULLET_DISC_CIRCLE_SQUARE'

            requests.append({
                'createParagraphBullets': {
                    'range': {
                        'startIndex': start_index,
                        'endIndex': end_index
                    },
                    'bulletPreset': glyph_type
                }
            })

            # Handle indentation (basic approach: each 2 or 4 spaces -> nesting level)
            # Let's assume every 2 spaces = 1 indent level
            nest_level = indent // 2
            if nest_level > 0:
                requests.append({
                    'updateParagraphStyle': {
                        'range': {
                            'startIndex': start_index,
                            'endIndex': end_index
                        },
                        'paragraphStyle': {
                            'indentStart': {
                                'magnitude': nest_level * 36,  # 36pt ~ 0.5 inches
                                'unit': 'PT'
                            }
                        },
                        'fields': 'indentStart'
                    }
                })

        elif line_type == 'paragraph':
            # No special style. Just a normal paragraph.
            pass

        # 3. Style the mention(s) if present
        # We'll make them bold (and color them red, for example).
        if mentions:
            # We need to locate each mention substring
            # Then apply text style for each mention.
            for mention in mentions:
                mention_start = content.find(mention)
                if mention_start != -1:
                    # The substring range in the doc
                    mention_range_start = start_index + mention_start
                    mention_range_end = mention_range_start + len(mention)
                    requests.append({
                        'updateTextStyle': {
                            'range': {
                                'startIndex': mention_range_start,
                                'endIndex': mention_range_end
                            },
                            'textStyle': {
                                'bold': True,
                                'foregroundColor': {
                                    'color': {
                                        'rgbColor': {
                                            'red': 1.0,
                                            'green': 0.0,
                                            'blue': 0.0
                                        }
                                    }
                                }
                            },
                            'fields': 'bold,foregroundColor'
                        }
                    })

    return requests


In [21]:
# ====================================================
# Step 6: Create the Document & Apply Batch Updates
# ====================================================
def create_document_from_markdown(title, markdown_text):
    """
    Creates a new Google Doc with the given title and inserts
    content based on the parsed Markdown text.
    """
    try:
        # 1. Create an empty document
        doc = docs_service.documents().create(body={'title': title}).execute()
        doc_id = doc.get('documentId')

        print(f"Created document with ID: {doc_id}")

        # 2. Parse the markdown
        parsed_lines = parse_markdown_lines(markdown_text)

        # 3. Build requests
        requests = build_requests(parsed_lines)

        # 4. Send the batchUpdate request
        result = docs_service.documents().batchUpdate(
            documentId=doc_id,
            body={'requests': requests}
        ).execute()

        print("Document updated successfully.")

        return doc_id

    except HttpError as error:
        print(f"An error occurred: {error}")
        return None


In [22]:
# ===========================
# Step 7: Run the Conversion
# ===========================
document_id = create_document_from_markdown(
    title="Product Team Sync - Automated",
    markdown_text=markdown_text
)

print("All done!")
print("Check your Google Drive for the new document.")

Created document with ID: 1a4gExdOYOh0ZhkydBgY2Kx3jLqfcwIWRZfpf5Ta0OF0
Document updated successfully.
All done!
Check your Google Drive for the new document.
