In [1]:
###

In [None]:
import os, logging, json, time, datetime, asyncio, sys, pathlib, subprocess, tempfile, shutil
import logging.handlers
from pptx import Presentation
from feedback_parsing.feedback_extraction_notes import extract_feedback_from_ppt_notes
from feedback_parsing.feedback_extraction_onslide import extract_feedback_from_ppt_onslide
from feedback_parsing.feedback_extraction_mail import extract_feedback_from_email
from feedback_parsing.feedback_classifier import classify_feedback_instructions
from agents.formatting_agent import formatting_agent
from agents.cleanup_agent import cleanup_agent
from agents.visual_enhancement_agent import visual_enhancement_agent
from utils.utils import generate_slide_context, convert_pptx_to_pdf, extract_slide_xml_from_ppt
from code_manipulation.code_generator import generate_python_code
from code_manipulation.code_executor import execute_code_in_docker
from code_manipulation.xml_code_generator import generate_modified_xml_code
from code_manipulation.xml_code_injector import inject_xml_into_ppt

from dotenv import load_dotenv
load_dotenv()

def setup_logging(log_file):
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    logger.handlers.clear()
    
    file_handler = logging.handlers.RotatingFileHandler(log_file, encoding="utf-8")
    file_handler.setLevel(logging.INFO)
        
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    file_handler.setFormatter(formatter)
    console_handler.setFormatter(formatter)
    
    logger.addHandler(file_handler)
    logger.addHandler(console_handler)
    
setup_logging("presentation_automation_logs.log")


def process_with_python_pptx(pptx_path, task_specifications, slide_context_cache):
    logging.info("------------------- Starting python-pptx based automation -------------------")
    
    logging.info("Generating Python code for task specifications...")
    tasks_with_code = []
    for task_specification in task_specifications:
        slide_number = task_specification["slide_number"]
        slide_context = slide_context_cache[slide_number]
        code = generate_python_code(task_specification, slide_context)
        if code:
            tasks_with_code.append({
                "slide_number": slide_number,
                "generated_code": code,
                "original_instruction": task_specification["original_instruction"],
                "description": task_specification["task_description"],
                "action": task_specification["action"],
                "target_element_hint": task_specification.get("target_element_hint", ""),
            })
            logging.info(f"Generated code for task: {task_specification['task_description']}")
        else:
            logging.warning(f"Failed to generate code for task: {task_specification['task_description']}")

    logging.info(f"Generated {len(tasks_with_code)} code snippets for the given task specifications.")


    logging.info("Executing Code in Docker...")
    execution_success, final_output_file_path, execution_report = execute_code_in_docker(tasks_with_code, pptx_path)
    
    if execution_success:
        logging.info(f"Code execution completed. Status: {execution_report.get('status', 'unknown')}")
        if final_output_file_path:
            logging.info(f"Modified presentation saved to: {final_output_file_path}")
        else: 
            logging.error("Execution reported success but no output file path was returned.")
            execution_success = False 

        if execution_report.get("errors"):
            logging.warning(f"Some code snippets failed during execution ({len(execution_report['errors'])}):")
            for error_detail in execution_report["errors"]:
                logging.warning(f"  - Task Index {error_detail.get('task_index', 'N/A')}: {error_detail.get('error', 'Unknown error')}")
    else:
        logging.error("Code execution in Docker failed.")
        if execution_report.get("errors"):
            logging.error("Details of execution failure:")
            for error_detail in execution_report["errors"]:
                logging.error(f"  - Task Index {error_detail.get('task_index', 'N/A')}: {error_detail.get('error', 'Unknown error')}")
                        
    return execution_success, final_output_file_path, execution_report


def process_with_xml(pptx_path, task_specifications, slide_context_cache):
    logging.info("------------------- Starting XML-based processing as fallback -------------------")
    
    execution_report = {
        "status": "unknown", 
        "errors": [], 
        "processed_count": 0, 
        "success_count": 0
    }
    
    try:
        temp_dir = tempfile.mkdtemp(prefix="ppt_xml_")
        working_pptx_path = os.path.join(temp_dir, "working_copy.pptx")
        shutil.copy2(pptx_path, working_pptx_path)
        
        slide_tasks = {}
        for task in task_specifications:
            slide_number = task["slide_number"]
            if slide_number not in slide_tasks:
                slide_tasks[slide_number] = []
            slide_tasks[slide_number].append(task)
        
        for slide_number, tasks in slide_tasks.items():
            logging.info(f"Processing {len(tasks)} tasks for slide {slide_number}")
            
            current_xml = extract_slide_xml_from_ppt(working_pptx_path, slide_number)
            if not current_xml:
                logging.error(f"Failed to extract XML for slide {slide_number}. Skipping all tasks for this slide.")
                for task in tasks:
                    execution_report["errors"].append({
                        "task_index": execution_report["processed_count"],
                        "error": f"Failed to extract XML for slide {slide_number}",
                        "description": task.get("task_description", "Unknown")
                    })
                    execution_report["processed_count"] += 1
                continue
                
            slide_context = slide_context_cache[slide_number]
            
            # Apply each task sequentially
            for i, task in enumerate(tasks):
                task_desc = task.get("task_description", f"Task #{i+1}")
                logging.info(f"Applying task {i+1}/{len(tasks)} for slide {slide_number}: {task_desc}")
                
                modified_xml = generate_modified_xml_code(
                    original_xml=current_xml,
                    agent_task_specification=task,
                    slide_context=slide_context
                )
                
                execution_report["processed_count"] += 1
                
                if not modified_xml:
                    logging.error(f"Failed to generate modified XML for task: {task_desc}")
                    execution_report["errors"].append({
                        "task_index": execution_report["processed_count"] - 1,
                        "error": "Failed to generate modified XML",
                        "description": task_desc
                    })
                    continue
                    
                if modified_xml == current_xml:
                    logging.warning(f"No XML changes made for task: {task_desc}")
                    execution_report["success_count"] += 1
                    continue
                
                current_xml = modified_xml
                
                success = inject_xml_into_ppt(working_pptx_path, slide_number, modified_xml)
                
                if success:
                    logging.info(f"Successfully applied XML changes for task: {task_desc}")
                    execution_report["success_count"] += 1
                else:
                    logging.error(f"Failed to inject modified XML for task: {task_desc}")
                    execution_report["errors"].append({
                        "task_index": execution_report["processed_count"] - 1,
                        "error": "Failed to inject modified XML",
                        "description": task_desc
                    })
        
        base_filename = os.path.splitext(os.path.basename(pptx_path))[0]
        final_output_filename = f"{base_filename}_xml_modified.pptx"
        output_dir = os.path.abspath("./output_ppts")
        os.makedirs(output_dir, exist_ok=True)
        final_output_path = os.path.join(output_dir, final_output_filename)
        
        shutil.copy2(working_pptx_path, final_output_path)
        logging.info(f"Final modified presentation saved to: {final_output_path}")

        shutil.rmtree(temp_dir)
        
        if execution_report["success_count"] == execution_report["processed_count"]:
            execution_report["status"] = "success"
        elif execution_report["success_count"] > 0:
            execution_report["status"] = "partial_success"
        else:
            execution_report["status"] = "failed"
            
        execution_success = execution_report["success_count"] > 0
        return execution_success, final_output_path, execution_report
        
    except Exception as e:
        logging.error(f"XML processing failed with unexpected error: {e}", exc_info=True)
        execution_report["status"] = "failed"
        execution_report["errors"].append({
            "task_index": -1, 
            "error": f"Global XML processing error: {str(e)}",
            "description": "Overall XML processing"
        })
        return False, None, execution_report


def main(pptx_path, email_path="path/to/email.txt"):
    overall_pipeline_status = "unknown"
    
    try:
        logging.info("======================= Starting PPT Automation Pipeline ========================")
        logging.info(f"Input PPTX: {pptx_path}")
        
        logging.info("Loading presentation...")
        prs = Presentation(pptx_path)

        pdf_path = convert_pptx_to_pdf(pptx_path)

        image_cache = {}
        slide_context_cache = {}

        # Step 1: Feedback Extraction
        logging.info("Extracting feedback from all sources...")
        feedback_notes = extract_feedback_from_ppt_notes(pptx_path)
        feedback_onslide = extract_feedback_from_ppt_onslide(pptx_path)
        feedback_mail = extract_feedback_from_email(email_path)
        
        all_feedback = feedback_notes + feedback_onslide + feedback_mail
        logging.info(f"Total feedback instructions extracted: {len(all_feedback)}")

        # Step 2: Instruction Interpretation
        logging.info("Classifying and extracting tasks from feedback...")
        categorized_tasks = classify_feedback_instructions(all_feedback)
        logging.info(f"Categorized Tasks: {len(categorized_tasks)}")

        # Step 3: Delegate tasks to specialized agents
        logging.info("Processing Tasks with Agents & Context...")
        task_specifications = []
        for task in categorized_tasks:
            category = task["category"]
            slide_number = task["slide_number"]
            instruction = task["original_instruction"]
            
            # Generate or retrieve slide context
            if slide_number not in slide_context_cache:
                slide_context_cache[slide_number] = generate_slide_context(prs, slide_number, pdf_path, image_cache)
            slide_context = slide_context_cache[slide_number]
            
            # Delegate to appropriate agent
            if category == "formatting":
                task_with_desc = formatting_agent(task, slide_context)
            elif category == "cleanup":
                task_with_desc = cleanup_agent(task, slide_context)
            elif category == "visual_enhancement":
                task_with_desc = visual_enhancement_agent(task, slide_context)
            else:
                logging.warning(f"Unknown category: {category} for instruction: '{instruction}'")
                continue
            
            if task_with_desc:
                task_specifications.extend(task_with_desc)
        
        if not task_specifications:
            logging.warning("No tasks generated from the feedback. Nothing to process.")
            return None, {"status": "no_tasks", "message": "No tasks to process"}
        
        # Step 4: Try python-pptx approach first
        pptx_success, pptx_output_path, pptx_report = process_with_python_pptx(
            pptx_path, 
            task_specifications, 
            slide_context_cache
        )
        
        if pptx_success and pptx_report.get("status") in ["success", "partial_success"]:
            logging.info("Python-pptx approach was successful. Using its output.")
            
            success_rate = pptx_report.get("success_count", 0) / len(task_specifications) * 100
            
            if pptx_report["status"] == "partial_success" and success_rate < 60:
                logging.info(f"Success rate of python-pptx approach was only {success_rate:.1f}%. Trying XML approach for a possibly better result.")
            else:
                final_output_path = pptx_output_path
                final_report = {
                    "approach": "python-pptx",
                    "execution_report": pptx_report,
                    "final_status": pptx_report["status"],
                    "success_rate": success_rate,
                    "output_path": pptx_output_path
                }
                
                overall_pipeline_status = pptx_report["status"]
                return final_output_path, final_report
        
        # Step 5: Fallback to XML approach if python-pptx failed or had low success rate
        logging.info("Falling back to XML approach...")
        xml_success, xml_output_path, xml_report = process_with_xml(
            pptx_path, 
            task_specifications, 
            slide_context_cache
        )
        
        # Determine which result to use
        if not pptx_success and not xml_success:
            logging.error("Both approaches failed. No successful output generated.")
            final_output_path = None
            overall_pipeline_status = "failed"
            final_report = {
                "approach": "both_failed",
                "pptx_report": pptx_report,
                "xml_report": xml_report,
                "final_status": "failed"
            }
        elif not pptx_success and xml_success:
            logging.info("XML approach succeeded where python-pptx failed. Using XML output.")
            final_output_path = xml_output_path
            overall_pipeline_status = xml_report["status"]
            final_report = {
                "approach": "xml_fallback",
                "execution_report": xml_report,
                "final_status": xml_report["status"],
                "success_rate": xml_report.get("success_count", 0) / len(task_specifications) * 100,
                "output_path": xml_output_path
            }
        else:
            # Both succeeded or python-pptx partially succeeded - compare results
            pptx_success_rate = pptx_report.get("success_count", 0) / len(task_specifications) * 100
            xml_success_rate = xml_report.get("success_count", 0) / len(task_specifications) * 100
            
            if xml_success_rate > pptx_success_rate:
                logging.info(f"XML approach had better success rate ({xml_success_rate:.1f}% vs {pptx_success_rate:.1f}%). Using XML output.")
                final_output_path = xml_output_path
                overall_pipeline_status = xml_report["status"]
                final_report = {
                    "approach": "xml_better",
                    "pptx_report": pptx_report,
                    "xml_report": xml_report,
                    "final_status": xml_report["status"],
                    "success_rate": xml_success_rate,
                    "output_path": xml_output_path
                }
            else:
                logging.info(f"Python-pptx approach had better or equal success rate ({pptx_success_rate:.1f}% vs {xml_success_rate:.1f}%). Using python-pptx output.")
                final_output_path = pptx_output_path
                overall_pipeline_status = pptx_report["status"]
                final_report = {
                    "approach": "pptx_better",
                    "pptx_report": pptx_report,
                    "xml_report": xml_report,
                    "final_status": pptx_report["status"],
                    "success_rate": pptx_success_rate,
                    "output_path": pptx_output_path
                }
        
        return final_output_path, final_report
    
    except FileNotFoundError as fnf_err:
        logging.error(f"File not found error during pipeline: {fnf_err}")
        overall_pipeline_status = "failed"
        return None, {"status": "failed", "error": str(fnf_err)}
    except Exception as e:
        logging.error(f"Pipeline failed with an unexpected error: {e}", exc_info=True)
        overall_pipeline_status = "failed"
        return None, {"status": "failed", "error": str(e)}
    finally:
        logging.info(f"================== Pipeline Finished with Overall Status: {overall_pipeline_status} ===================")


if __name__ == "__main__":
    # pptx_path = os.path.abspath("./input_ppts/pptx/font_test1.pptx")
    # pptx_path = os.path.abspath("./input_ppts/pptx/font_test2.pptx")
    # pptx_path = os.path.abspath("./input_ppts/pptx/cleanup_test.pptx")
    # pptx_path = os.path.abspath("./input_ppts/pptx/cleanup_test2.pptx")
    # pptx_path = os.path.abspath("./input_ppts/pptx/table_alignment_test.pptx")
    # pptx_path = os.path.abspath("./input_ppts/pptx/table_alignment_test2.pptx")
    pptx_path = os.path.abspath("./input_ppts/pptx/consistent.pptx")
    # pptx_path = os.path.abspath("./input_ppts/pptx/consistent3.pptx")

    logging.info(f"Input PPTX: {pptx_path}")
    output_path, report = main(pptx_path)
    
    if output_path:
        logging.info(f"Process completed. Final output at: {output_path}")
        logging.info(f"Final report: {json.dumps(report, indent=2)}")
    else:
        logging.error("Process failed. No output generated.")


In [2]:
XML_MODIFICATION_PROMPT = """
    # PowerPoint XML Modification Expert - Comprehensive Feature Set

    You are a **State-of-the-Art expert in PowerPoint XML structure**, possessing comprehensive knowledge to modify PPTX XML for a wide range of presentation enhancements.
    Your task is to generate **detailed and actionable XML modification instructions** to implement diverse PPTX changes, encompassing formatting, cleanup, and visual enhancements, based on the provided feedback instruction, task details, and slide context.

    **Your Expertise:**
    You are proficient in manipulating PPTX XML to achieve effects equivalent to actions performed via the PowerPoint UI, including but not limited to:

    - **Formatting Enhancements:** Font changes, text styling, alignment, spacing, bullet points, table formatting, graph enhancements, color adjustments, line styles, borders, template application.
    - **Cleanup Operations:** Element alignment, spacing adjustments, removing redundancies, standardizing styles, splitting tables, converting text to tables/bullets.
    - **Visual Enhancements:** Color and opacity adjustments, background refinement, icon placement, timeline optimization, layering effects, border styling, image optimization, legend placement, alternative layouts.

    **Input Context:**
    - Agent: {agent_name}
    - Slide Index: {slide_index}
    - Original User Instruction: "{original_instruction}"
    - Task Description: "{task_description}"  <- Detailed description of the PPT manipulation task.
    - Action: {action}                  <- Programmatic action category (e.g., 'change_font_style', 'resize_table').
    - Target Element Hint: "{target_element_hint}" <- Hint for the element to target (e.g., 'title', 'table').
    - Parameters: {params}                 <- Parameters for the action (e.g., {'font_name': 'Arial', 'size': 12}, {'constraint': 'fit_slide'}).
    - Slide XML Structure: "{slide_xml_structure}" <- XML content of the slide.

    **Comprehensive Instructions for XML Modification Instruction Generation:**

    1.  **Holistic Task Understanding:**  Thoroughly understand the `task_description` and `original_instruction` to grasp the user's intent for the PPTX modification. Consider the overall goal: formatting, cleanup, or visual enhancement.
    2.  **Deep Dive into Slide XML:**  Analyze the `slide_xml_structure` to identify the XML elements corresponding to the `target_element_hint` and understand their current properties and structure.
    3.  **Leverage Extensive PPTX XML Schema Knowledge:** Utilize your comprehensive knowledge of the PowerPoint Open XML Presentation Language schema to determine the precise XML elements and attributes that need modification to achieve the desired effect. Refer to the "PowerPoint XML Guidance" section below for detailed information on various XML elements and properties.
    4.  **Generate Detailed XML Modification Instructions:** Create a structured set of XML modification instructions. For each instruction, specify:
        *   **Target XML Element(s):** Use XPath expressions for precise element selection whenever possible. Be specific in targeting the correct elements based on `target_element_hint` and XML analysis.
        *   **Modification Type:** Clearly state the type of modification:
            *   **Attribute Modification:** Changing the value of an existing attribute (e.g., font name, font size, color values, position, size).
            *   **Element Creation:** Adding new XML elements (use sparingly and only when necessary, e.g., adding missing font property elements).
            *   **Element Deletion (Avoid if Possible):**  Generally avoid deleting elements unless absolutely necessary and you are certain it won't break the PPTX structure.
        *   **New Value or XML Snippet:** Provide the new attribute value or the XML snippet to be inserted or used for modification. Ensure values are in the correct format (e.g., font sizes in EMU, colors in RGB hex, positions/sizes in EMU).
    5.  **Prioritize Attribute Modification:** Whenever possible, prefer attribute modification over complex element creation or deletion, as attribute changes are generally safer and less likely to corrupt the PPTX structure.
    6.  **Maintain XML Validity:**  Crucially, ensure that all generated XML modification instructions result in **valid and well-formed PPTX XML**. Double-check XML syntax, namespaces, element relationships, and attribute values against the PPTX XML schema.
    7.  **Preserve Existing Structure and Content:**  Your XML modifications should be targeted and minimally invasive. **Preserve the existing XML structure and content** of the slide as much as possible. Only modify the specific elements and attributes necessary to achieve the requested task. Avoid making broad or sweeping changes that could unintentionally alter other parts of the slide.
    8.  **Structured Output:** Organize the XML Modification Instructions in a clear, structured text format, similar to the examples below, making them easy to parse and implement programmatically.

    ## PowerPoint XML Guidance (Comprehensive Reference):

    ### 1. Core XML Namespaces (Essential):
    - `a`:  `http://schemas.openxmlformats.org/drawingml/2006/main` (DrawingML - for shapes, text, styles, colors)
    - `p`:  `http://schemas.openxmlformats.org/presentationml/2006/main` (PresentationML - for slide structure, shapes on slides)
    - `r`:  `http://schemas.openxmlformats.org/officeDocument/2006/relationships` (Relationships - for linking parts within PPTX)

    ### 2. Key XML Elements & Attributes (Organized by Feature Category):

    #### 2.1. Text Formatting:
    - `<p:sp>` (Shape): Container for text boxes and other shapes.
    - `<p:txBody>` (Text Body): Contains text content and formatting for a shape.
        - `<a:bodyPr>`: Properties for the text body itself (e.g., wrapping, rotation).
        - `<a:lstStyle>`: List styles (bullet points, numbering).
        - `<a:p>` (Paragraph): Represents a paragraph of text.
            - `<a:pPr>` (Paragraph Properties): Formatting for the paragraph (alignment, indentation, spacing).
                - `algn` attribute: Text alignment (values: `l`, `ctr`, `r`, `just`, `dist`). Use `pptx.enum.text.PP_PARAGRAPH_ALIGNMENT` enums for values.
            - `<a:r>` (Text Run): Represents a run of text with consistent formatting within a paragraph.
                - `<a:rPr>` (Run Properties): Formatting for the text run (font, size, bold, color).
                    - `<a:latin typeface="Font Name"/>`: Font name for Latin characters. Use `typeface` attribute.
                    - `<a:sz val="Font Size in EMU"/>`: Font size in EMU (English Metric Units, 1pt = 100 EMU). Convert points to EMU (e.g., 12pt = 1200 EMU). Use `val` attribute.
                    - `<a:b val="0|1"/>`: Bold (0=false, 1=true). Use `val` attribute.
                    - `<a:i val="0|1"/>`: Italic (0=false, 1=true). Use `val` attribute.
                    - `<a:u val="none|sng|dbl|..."/>`: Underline type. Use `pptx.enum.text.MSO_TEXT_UNDERLINE_TYPE` enums for values.
                    - `<a:solidFill><a:srgbClr val="RRGGBB"/></a:solidFill>`: Solid color fill for text. Use `<a:srgbClr val="RRGGBB"/>` to set RGB hex color.
                - `<a:t>` (Text): Actual text content of the run.

    #### 2.2. Shape Formatting & Styling:
    - `<p:spPr>` (Shape Properties): Contains properties for shape appearance and geometry.
        - `<a:xfrm>` (Transform): Defines shape position, size, rotation, and flip.
            - `<a:off x="X-EMU" y="Y-EMU"/>`: Position offset (top-left corner) in EMU. Use `x` and `y` attributes.
            - `<a:ext cx="Width-EMU" cy="Height-EMU"/>`: Size extent (width and height) in EMU. Use `cx` and `cy` attributes.
        - `<a:ln>` (Line Properties): Formatting for shape outline/border.
            - `<a:solidFill><a:srgbClr val="RRGGBB"/></a:solidFill>`: Solid color for line.
            - `<a:prstDash val="dash|dashDot|solid|..."/>`: Dash style for line. Use `pptx.enum.dml.MSO_LINE_DASH_STYLE` enums for values.
            - `<a:w val="Width-EMU"/>`: Line width in EMU.
        - `<a:fill>` (Fill Properties): Formatting for shape fill color.
            - `<a:solidFill><a:srgbClr val="RRGGBB"/></a:solidFill>`: Solid color fill.
            - `<a:gradFill>`: Gradient fill (complex - refer to PPTX documentation for details).
            - `<a:noFill/>`: No fill.

    #### 2.3. Table Formatting:
    - `<p:graphicFrame>`: Container shape for tables and charts.
    - `<a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/table">`:  Identifies table data within `<p:graphicFrame>`.
    - `<a:tbl>` (Table): Table element.
        - `<a:tblPr>` (Table Properties): Properties for the table itself (style, layout).
        - `<a:tblGrid>` (Table Grid): Defines column widths.
            - `<a:gridCol w="Width-EMU"/>`: Column width in EMU.
        - `<a:tr>` (Table Row): Table row.
        - `<a:tc>` (Table Cell): Table cell.
            - `<a:tcPr>` (Table Cell Properties): Cell formatting (fill, borders, margins).
            - `<a:txBody>` (Text Body): Text content within the cell (same structure as shape text bodies).

    #### 2.4. Chart Formatting (Basic):
    - `<p:graphicFrame>`: Container shape for tables and charts.
    - `<a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/chart">`: Identifies chart data within `<p:graphicFrame>`.
    - `<c:chartSpace>` (Chart Space): Root element for chart XML (very complex structure - refer to Office Open XML Part 4 - DrawingML Charts).
        - `<c:chart>` (Chart): Chart element itself.
            - `<c:plotArea>` (Plot Area): Area where chart data is plotted.
            - `<c:chartTitle>` (Chart Title): Chart title.
                - `<c:txPr>` (Text Properties): Text formatting for chart title (similar to shape text bodies).
            - `<c:legend>` (Legend): Chart legend.
                - `<c:txPr>` (Text Properties): Text formatting for chart legend.
            - `<c:ser>` (Series): Chart data series.
                - `<c:tx>` (Series Text): Series name.
                - `<c:cat>` (Category Axis Data): Category labels.
                - `<c:val>` (Value Axis Data): Data values.
            - `<c:catAx>` (Category Axis): Category axis.
            - `<c:valAx>` (Value Axis): Value axis.
                - `<c:majorGridlines>`: Major gridlines.
                - `<c:minorGridlines>`: Minor gridlines.
                - `<c:axLbl>` (Axis Labels): Axis labels.
                    - `<c:txPr>` (Text Properties): Text formatting for axis labels.

    #### 2.5. Image/Picture Elements:
    - `<p:pic>` (Picture): Image element.
        - `<p:blipFill>` (Blip Fill): Defines how the image fills the shape.
            - `<a:blip r:embed="rId#"/>`: Reference to the image data (using relationship ID).
        - `<p:spPr>` (Shape Properties): Positioning and size of the image shape.

    #### 2.6. Connector Shapes:
    - `<p:cxnSp>` (Connector Shape): Represents a connector line.
        - `<p:spPr>` (Shape Properties): Positioning and geometry of the connector.
        - `<a:ln>` (Line Properties): Formatting for the connector line (style, color, width).
"""

In [1]:
import inspect
import pptx
from pptx import Presentation

def get_module_documentation(module, depth=0, max_depth=3):
    """Extract documentation from a module recursively"""
    if depth > max_depth:
        return {}
    
    docs = {}
    for name, obj in inspect.getmembers(module):
        # Filter out private attributes and methods
        if name.startswith('_'):
            continue
            
        # Get documentation
        if inspect.isclass(obj):
            class_doc = inspect.getdoc(obj) or "No documentation"
            method_docs = {}
            
            # Get methods and their documentation
            for method_name, method in inspect.getmembers(obj, inspect.isfunction):
                if not method_name.startswith('_'):
                    method_docs[method_name] = inspect.getdoc(method) or "No documentation"
            
            docs[name] = {
                "type": "class",
                "doc": class_doc,
                "methods": method_docs
            }
        
        # For modules, recurse deeper
        elif inspect.ismodule(obj) and obj.__name__.startswith('pptx'):
            docs[name] = {
                "type": "module",
                "doc": inspect.getdoc(obj) or "No documentation",
                "members": get_module_documentation(obj, depth+1, max_depth)
            }
    
    return docs

# Extract docs from pptx module
api_docs = get_module_documentation(pptx)

# Format as string for prompt
import json
api_docs_str = json.dumps(api_docs, indent=2)
print(api_docs_str)
#save to file

def save(api_docs_str):
    with open("api_docs.json", "w") as f:
        f.write(api_docs_str)    
    print("API docs saved to api_docs.json")

save(api_docs_str)


{
  "NotesMasterPart": {
    "type": "class",
    "doc": "Notes master part.\n\nCorresponds to package file `ppt/notesMasters/notesMaster1.xml`.",
    "methods": {
      "drop_rel": "Remove relationship identified by `rId` if its reference count is under 2.\n\nRelationships with a reference count of 0 are implicit relationships. Note that only XML\nparts can drop relationships.",
      "get_image": "Return an |Image| object containing the image related to this slide by *rId*.\n\nRaises |KeyError| if no image is related by that id, which would generally indicate a\ncorrupted .pptx file.",
      "get_or_add_image_part": "Return `(image_part, rId)` pair corresponding to `image_file`.\n\nThe returned |ImagePart| object contains the image in `image_file` and is\nrelated to this slide with the key `rId`. If either the image part or\nrelationship already exists, they are reused, otherwise they are newly created.",
      "load_rels_from_xml": "load _Relationships for this part from `xml_rels`.