In [1]:
import json
from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE_TYPE

In [2]:
def slide_to_json(pptx_file, json_output):
    # Open the PowerPoint file
    prs = Presentation(pptx_file)

    slide_width = prs.slide_width
    slide_height = prs.slide_height

    # convert from EMUs in the PowerPoint file to inches, then to pixels
    slide_width_pixels = slide_width / 914400 * 96
    slide_height_pixels = slide_height / 914400 * 96

    slide_info = {
        "canvas_size": {
            "width": slide_width_pixels,
            "height": slide_height_pixels
        },
        "slide_index": 0,
        "elements": []
    }

    first_slide = prs.slides[0]
    # Iterate over shapes in the first slide
    for shape in first_slide.shapes:
        element = {
            "type": None, 
            "position": {
                "x" : shape.left/914400*96,
                "y" : shape.top/914400*96
            }, 
            "size": {
                "width" : shape.width/914400*96,
                "height" : shape.height/914400*96
            }
        }

        # Identify the shape type
        if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
            element["type"] = "image"
        elif shape.shape_type == MSO_SHAPE_TYPE.TEXT_BOX or shape.shape_type == MSO_SHAPE_TYPE.PLACEHOLDER:
            element["type"] = "text"
            # Extract the text content
            content = shape.text if hasattr(shape, "text") else ""

            # If 'x' appears twice in the content, skip saving the content
            if content.count('x') != 2:
                element["content"] = content
            else: 
                element["content"] = ""
        elif shape.shape_type == MSO_SHAPE_TYPE.LINE: 
            element["type"] = "line"

        if element["type"] is not None:
            slide_info["elements"].append(element)

    # Write the slide info to a JSON file
    with open(json_output, 'w') as f:
        json.dump(slide_info, f, indent=4)

    print(f"Slide info saved to {json_output}")


In [3]:
# run 
pptx_file = "Participant 1001 – Macula Fundus.pptx"
json_output = "Macula_Fundus_Layout.json"
slide_to_json(pptx_file, json_output)

Slide info saved to Macula_Fundus_Layout.json


In [4]:
# In this json file, reorder the elements 
# reorder based on their x and y positions
# y position is the primary key and x position is the secondary key
# in ascending order
# save the reordered json file as Macula_Fundus_Layout_reordered.json
with open(json_output, 'r') as f:
    slide_info = json.load(f)

slide_info["elements"] = sorted(slide_info["elements"], key=lambda x: (x["position"]["y"], x["position"]["x"]))
reordered_json_output = "Macula_Fundus_Layout_reordered.json"
with open(reordered_json_output, 'w') as f:
    json.dump(slide_info, f, indent=4)

print(f"Reordered slide info saved to {reordered_json_output}")

Reordered slide info saved to Macula_Fundus_Layout_reordered.json


In [5]:
# In this json file
# If type of element is image, add a new key "label" with value ""
# If type of element is text and coneent key is "", add a new key "label" with value ""

with open(reordered_json_output, 'r') as f:
    slide_info = json.load(f)

for element in slide_info["elements"]:
    if element["type"] == "image":
        element["label"] = ""
    elif element["type"] == "text" and element["content"] == "":
        element["label"] = ""

reordered_json_output = "Macula_Fundus_Layout_reordered.json"
with open(reordered_json_output, 'w') as f:
    json.dump(slide_info, f, indent=4)

print(f"Updated slide info saved to {reordered_json_output}")

Updated slide info saved to Macula_Fundus_Layout_reordered.json


In [6]:
labels = ["iCare_Eidon_R_Macula_Color Photography", 
"Optomed_Aurora_R_Macula or Optic Disc_Color Photography",
"Topcon_Maestro2_R_Macula_Color Photography",
"Topcon_Triton_R_Macula, 12 x 12_Color Photography",
"Topcon_Triton_R_Macula, 6 x 6_Color Photography",

"iCare_Eidon_L_Macula_Color Photography",
"Optomed_Aurora_L_Macula or Optic Disc_Color Photography",
"Topcon_Maestro2_L_Macula_Color Photography",
"Topcon_Triton_L_Macula, 12 x 12_Color Photography",
"Topcon_Triton_L_Macula, 6 x 6_Color Photography",

"iCare_Eidon_R_Macula_Autofluorescence",
"iCare_Eidon_L_Macula_Autofluorescence",

"iCare_Eidon_R_Macula_Infrared Reflectance",
"Heidelberg_Spectralis_R_Macula_Infrared Reflectance",
"Topcon_Maestro2_R_Macula, 6 x 6_Infrared Reflectance",
"Zeiss_Cirrus_R_Macula_Infrared Reflectance",

"iCare_Eidon_L_Macula_Infrared Reflectance",
"Heidelberg_Spectralis_L_Macula_Infrared Reflectance",
"Topcon_Maestro2_L_Macula, 6 x 6_Infrared Reflectance",
"Zeiss_Cirrus_L_Macula_Infrared Reflectance"]

In [7]:
# for all elements with type image, set the label to the corresponding label in the labels list
# in the existing order
with open(reordered_json_output, 'r') as f:
    slide_info = json.load(f)

image_elements = [element for element in slide_info["elements"] if element["type"] == "image"]
for i, element in enumerate(image_elements):
    element["label"] = labels[i]

with open(reordered_json_output, 'w') as f:
    json.dump(slide_info, f, indent=4)

print(f"Updated slide info saved to {reordered_json_output}")

Updated slide info saved to Macula_Fundus_Layout_reordered.json


In [8]:
# for all elements with type text and content is "", set the label to the corresponding label in the labels list
# in the existing order
with open(reordered_json_output, 'r') as f:
    slide_info = json.load(f)

text_elements = [element for element in slide_info["elements"] if element["type"] == "text" and element["content"] == ""]
for i, element in enumerate(text_elements):
    element["label"] = labels[i]

with open(reordered_json_output, 'w') as f:
    json.dump(slide_info, f, indent=4)

print(f"Updated slide info saved to {reordered_json_output}")

Updated slide info saved to Macula_Fundus_Layout_reordered.json
