Begin by installing the dependencies for the environment

In [1]:
!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib



In [2]:
#Import the library used to parse the text
import re

In [3]:
#Import libraries for the google api
from google.colab import auth
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

In [4]:
#Use the built in google cloud authentication for colab
auth.authenticate_user()

In [5]:
#Function used to read the unformatted input data file
def readFile(filename):
    try:
      with open(file_path, 'r') as f:
          content = f.read()
      return content
    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found. Please ensure it's uploaded and the path is correct.")
    except Exception as e:
        print(f"An error occurred: {e}")

In [6]:
#File used to transform the unformatted string into a dictionary that's easier to use
def parse_meeting_markdown(text: str):
    lines = text.splitlines()

    content = {
        "title": "",
        "sections": [],
        "footer": [],
    }

    current_section = None
    current_subsection = None
    in_footer = False

    for raw_line in lines:
        line = raw_line.rstrip("\n")

        # Separator to start footer
        if line.strip() == "---":
            in_footer = True
            continue

        # Footer section
        if in_footer:
            if line.strip():  # non-empty
                content["footer"].append(line.strip())
            continue

        # Skip empty lines before we know where they belong
        if not line.strip():
            continue

        # Title: first markdown H1
        if line.startswith("# ") and not content["title"]:
            content["title"] = line[2:].strip()
            continue

        # Section heading: ## ...
        if line.startswith("## "):
            heading = line[3:].strip()
            current_section = {
                "heading": heading,
                "level": 2,
            }
            # Decide container keys based on name
            if heading == "Attendees":
                current_section["items"] = []
            elif heading == "Action Items":
                current_section["checkboxes"] = []
            else:
                current_section["bullets"] = []
            current_subsection = None
            content["sections"].append(current_section)
            continue

        # Sub-section heading: ### ...
        if line.startswith("### "):
            heading = line[4:].strip()
            # Ensure current section can carry subsections
            if current_section is None:
                continue
            if "subsections" not in current_section:
                current_section["subsections"] = []
            current_subsection = {
                "heading": heading,
                "level": 3,
                "bullets": []
            }
            current_section["subsections"].append(current_subsection)
            continue

        # Bulleted or checklist items
        stripped = line.lstrip()
        indent = len(line) - len(stripped)

        # Checkbox: - [ ]
        m_checkbox = re.match(r"-\s*\[\s*\]\s*(.+)", stripped)
        if m_checkbox and current_section and "checkboxes" in current_section:
            text_after = m_checkbox.group(1).strip()
            current_section["checkboxes"].append(text_after)
            continue

        # Normal bullet: - or * at start
        if stripped.startswith(("-", "*")):
            # Remove leading marker and one space
            bullet_text = stripped[1:].lstrip()

            # Attendees
            if current_section and current_section.get("heading") == "Attendees":
                current_section["items"].append(bullet_text)
                continue

            # If we are inside a subsection (Sprint Review, etc.)
            if current_subsection is not None:
                # Preserve indentation by encoding it into the string
                prefix = " " * indent
                current_subsection["bullets"].append(prefix + bullet_text)
                continue

            # Otherwise, treat as a top-level bullet in section
            if current_section is not None:
                # Ensure bullets list exists (for sections like Next Steps, Notes)
                if "bullets" not in current_section:
                    current_section["bullets"] = []
                prefix = " " * indent
                current_section["bullets"].append(prefix + bullet_text)
                continue

        # Fallback: standalone text lines (e.g., "Completed Features")
        # treat as bullet within current subsection/section when appropriate
        if current_subsection is not None:
            current_subsection["bullets"].append(line)
        elif current_section is not None:
            # for "Agenda" we might want to skip plain text;
            # adjust behavior as needed
            if "bullets" not in current_section:
                current_section["bullets"] = []
            current_section["bullets"].append(line)

    return content

In [7]:
#function used to create a new document on the authenticated accounts drive
def create_document(title):
  try:
    # Create a blank document with the given title
    docs_service = build('docs', 'v1')
    body = {'title': title}
    doc = docs_service.documents().create(body=body).execute()
    document_id = doc.get('documentId')
    print(f"Document created with ID: {document_id}")
    return docs_service, document_id
  except HttpError as e:
    print(f"An HTTP error occurred: {e}")
    raise

In [8]:
#function used to build a request batch from parsed notes content. The request batch is then used
#to build the content of the new document
#The styling is assembled here too
def build_requests_from_content(content):
    requests = []
    index = 1  # start after beginning of document

    def insert_paragraph(text, heading=None, bold_spans=None, color_spans=None):
        nonlocal index, requests

        start = index
        # Insert text + newline
        requests.append({
            "insertText": {
                "location": {"index": index},
                "text": text + "\n"
            }
        })
        end = start + len(text)

        # Paragraph style (heading)
        if heading:
            requests.append({
                "updateParagraphStyle": {
                    "range": {
                        "startIndex": start,
                        "endIndex": end + 1  # include newline
                    },
                    "paragraphStyle": {
                        "namedStyleType": heading
                    },
                    "fields": "namedStyleType"
                }
            })

        # Text styles (bold, color)
        if bold_spans:
            for s, e in bold_spans:
                requests.append({
                    "updateTextStyle": {
                        "range": {
                            "startIndex": start + s,
                            "endIndex": start + e
                        },
                        "textStyle": {"bold": True},
                        "fields": "bold"
                    }
                })
        if color_spans:
            for s, e, color in color_spans:
                requests.append({
                    "updateTextStyle": {
                        "range": {
                            "startIndex": start + s,
                            "endIndex": start + e
                        },
                        "textStyle": {
                            "foregroundColor": {
                                "color": {
                                    "rgbColor": color
                                }
                            }
                        },
                        "fields": "foregroundColor"
                    }
                })

        index = end + 1  # move past newline

    def apply_bullets(start_index, end_index, level=0, checkbox=False):
      if checkbox:
          preset = "BULLET_CHECKBOX"          # checklist style
      else:
          preset = "BULLET_DISC_CIRCLE_SQUARE"  # normal bullets

      requests.append({
          "createParagraphBullets": {
              "range": {
                  "startIndex": start_index,
                  "endIndex": end_index,
              },
              "bulletPreset": preset,
          }
      })

    # Title as Heading 1
    insert_paragraph(content["title"], heading="HEADING_1")

    # Sections
    for section in content["sections"]:
        # Heading 2 for main sections
        insert_paragraph(section["heading"], heading="HEADING_2")

        # Attendees (simple bullets)
        if "items" in section:
            start_bullets = index
            for item in section["items"]:
                insert_paragraph(item)
            end_bullets = index
            apply_bullets(start_bullets, end_bullets, level=0, checkbox=False)

        # Agenda with subsections and nested bullets
        if "subsections" in section:
            for sub in section["subsections"]:
                insert_paragraph(sub["heading"], heading="HEADING_3")

                # Handle nested bullets via indentation count
                start_bullets = index
                for line in sub["bullets"]:
                    stripped = line.lstrip()
                    indent = len(line) - len(stripped)
                    # Insert the raw text line
                    insert_paragraph(stripped)
                end_bullets = index
                apply_bullets(start_bullets, end_bullets, level=0, checkbox=False)

        # Action Items: checkboxes with styled @mentions
        if "checkboxes" in section:
            start_check = index
            for item in section["checkboxes"]:
                # Find "@name" span
                at_pos = item.find("@")
                bold_spans = []
                color_spans = []
                if at_pos != -1:
                    # assume the mention ends at the first colon or space
                    end_pos = item.find(":", at_pos)
                    if end_pos == -1:
                        end_pos = len(item)
                    bold_spans.append((at_pos, end_pos))
                    color_spans.append((at_pos, end_pos, {"red": 0.0, "green": 0.0, "blue": 0.8}))
                #use [1:] to remove the @
                insert_paragraph(item[1:], bold_spans=bold_spans, color_spans=color_spans)
            end_check = index
            apply_bullets(start_check, end_check, level=0, checkbox=True)

        # Simple bullets sections (Next Steps, Notes)
        if "bullets" in section and "subsections" not in section:
            start_bullets = index
            for line in section["bullets"]:
                insert_paragraph(line)
            end_bullets = index
            apply_bullets(start_bullets, end_bullets, level=0, checkbox=False)

    # Footer: distinct style (e.g., italics + gray, smaller)
    for footer_line in content["footer"]:
        start = index
        requests.append({
            "insertText": {
                "location": {"index": index},
                "text": footer_line + "\n"
            }
        })
        end = start + len(footer_line)
        requests.append({
            "updateTextStyle": {
                "range": {"startIndex": start, "endIndex": end},
                "textStyle": {
                    "italic": True,
                    "foregroundColor": {
                        "color": {
                            "rgbColor": {"red": 0.4, "green": 0.4, "blue": 0.4}
                        }
                    },
                    "fontSize": {"magnitude": 10, "unit": "PT"}
                },
                "fields": "italic,foregroundColor,fontSize"
            }
        })
        index = end + 1

    return requests


In [9]:
# Reading the input data text file:
file_path = '/assessmentData.md'
text = readFile(file_path)

In [10]:
parsedText = parse_meeting_markdown(text)

In [11]:
#Creates the document on your google drive using the title parsed from the text file
docs_service, document_id = create_document(parsedText['title'])



Document created with ID: 1yppNl48TgJ3IKqGJfYHnreAXWDCWJUH7R9ZTodicYsY


In [12]:
#Create the request batch which is used for the content of the new document
requests = build_requests_from_content(parsedText)

print(requests)

[{'insertText': {'location': {'index': 1}, 'text': 'Product Team Sync - May 15, 2023\n'}}, {'updateParagraphStyle': {'range': {'startIndex': 1, 'endIndex': 34}, 'paragraphStyle': {'namedStyleType': 'HEADING_1'}, 'fields': 'namedStyleType'}}, {'insertText': {'location': {'index': 34}, 'text': 'Attendees\n'}}, {'updateParagraphStyle': {'range': {'startIndex': 34, 'endIndex': 44}, 'paragraphStyle': {'namedStyleType': 'HEADING_2'}, 'fields': 'namedStyleType'}}, {'insertText': {'location': {'index': 44}, 'text': 'Sarah Chen (Product Lead)\n'}}, {'insertText': {'location': {'index': 70}, 'text': 'Mike Johnson (Engineering)\n'}}, {'insertText': {'location': {'index': 97}, 'text': 'Anna Smith (Design)\n'}}, {'insertText': {'location': {'index': 117}, 'text': 'David Park (QA)\n'}}, {'createParagraphBullets': {'range': {'startIndex': 44, 'endIndex': 133}, 'bulletPreset': 'BULLET_DISC_CIRCLE_SQUARE'}}, {'insertText': {'location': {'index': 133}, 'text': 'Agenda\n'}}, {'updateParagraphStyle': {'ra

In [13]:
#Update the new document
docs_service.documents().batchUpdate(
    documentId=document_id,
    body={"requests": requests}
).execute()

{'replies': [{},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {},
  {}],
 'writeControl': {'requiredRevisionId': 'AHwbPDKhDN7Pzo7k33QriEtRkvgSY5aJ_ZT0xLXa4LZN2DSksmn5TW_SmfIgyRNYKSOompgxuDAYnH93CFauZA'},
 'documentId': '1yppNl48TgJ3IKqGJfYHnreAXWDCWJUH7R9ZTodicYsY'}