In [1]:
from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE_TYPE
from pptx.shapes.connector import Connector

In [2]:
pptx_path = "test.pptx"
prs = Presentation(pptx_path)

In [3]:

def count_lines(slide):
    """
    Counts the number of LINE shapes in a PPTX file.

    - Iterates through all slides and shapes.
    - Checks if the shape type is LINE (MSO_SHAPE_TYPE.LINE).
    - Increments the count for each detected line.

    Args:
        slide (Slide): A PowerPoint slide object from a .pptx file.

    Returns:
        int: The total number of LINE shapes found in the PPTX file.
    """
    line_count = 0

    for shape in slide.shapes:
        if shape.shape_type == MSO_SHAPE_TYPE.LINE:  # LINE (9) - Connector or Arrow
            line_count += 1

    return line_count

In [4]:
def extract_lines_positions(slide):
    """
    Extracts the positions of all line shapes (including arrows and connectors) from a given PowerPoint slide.

    Args:
        slide (Slide): A PowerPoint slide object from a .pptx file.

    Returns:
        dict: A dictionary where:
            - Key (int): The index of the line shape within the slide.
            - Value (tuple): The start and end coordinates of the line in the format (x1, y1, x2, y2), where:
                - (x1, y1): The starting point of the line.
                - (x2, y2): The ending point of the line.
    """
    lines = {}

    # Iterate through all shapes on the current slide
    for i, shape in enumerate(slide.shapes):
        if shape.shape_type == MSO_SHAPE_TYPE.LINE:
            shape: Connector = shape  # Type hinting for better readability
            x1, y1 = shape.begin_x, shape.begin_y  # Get the start point (x1, y1)
            x2, y2 = shape.end_x, shape.end_y  # Get the end point (x2, y2)
            lines[i] = (x1, y1, x2, y2)  # Store line coordinates in the dictionary

    return lines

In [5]:
# Run the function
for slide_num, slide in enumerate(prs.slides, start=1):
    line_count = count_lines(slide)
    print(line_count)

    lines = extract_lines_positions(slide)
    for line_id, (x1, y1, x2, y2) in lines.items():
        print(f"Line {line_id}: Start({x1}, {y1}) → End({x2}, {y2})")

8
Line 9: Start(1566054, 427306) → End(1011291, 611780)
Line 10: Start(1566054, 427306) → End(2130968, 594001)
Line 11: Start(2130968, 778381) → End(2130969, 925145)
Line 12: Start(2130969, 1057130) → End(2130969, 1185064)
Line 13: Start(1011291, 743765) → End(1011293, 925142)
Line 14: Start(1011293, 1057127) → End(1011293, 1185064)
Line 15: Start(1011293, 1317049) → End(1325105, 1484708)
Line 16: Start(2130969, 1317049) → End(1762932, 1514959)
8
Line 9: Start(1566054, 427306) → End(1011291, 611780)
Line 10: Start(1566054, 427306) → End(2130968, 594001)
Line 11: Start(2130968, 778381) → End(2130969, 925145)
Line 12: Start(2130969, 1057130) → End(2130969, 1185064)
Line 13: Start(1193645, 1368348) → End(959993, 1499401)
Line 14: Start(1948617, 1368349) → End(2182269, 1499402)
Line 15: Start(1011291, 743765) → End(1011293, 925142)
Line 16: Start(1011293, 1057127) → End(1011293, 1185064)


# Plotting for check

In [None]:
import matplotlib.pyplot as plt

def plot_lines_from_pptx(prs):
    """
    Plots all extracted lines from the PPTX file to verify correct extraction.

    Args:
        prs: The PowerPoint presentation object.
    """
    plt.figure(figsize=(10, 6))

    for slide_num, slide in enumerate(prs.slides, start=1):
        lines = extract_lines_positions(slide)  # Extract lines for this slide

        for line_id, (x1, y1, x2, y2) in lines.items():
            plt.plot([x1, x2], [y1, y2], marker="o", label=f"Slide {slide_num} Line {line_id}")

    # Flip Y-axis to match PPTX layout (PPTX origin is top-left, matplotlib is bottom-left)
    plt.gca().invert_yaxis()

    plt.xlabel("X Coordinate")
    plt.ylabel("Y Coordinate")
    plt.title("Extracted Lines from PPTX")
    plt.legend(loc="best", fontsize="small", markerscale=0.5)  # Legend to identify slides
    plt.grid(True)
    plt.show()

# Run the function to plot all extracted lines
plot_lines_from_pptx(prs)