Element → name of the color type in the theme.
<br>dk1: "Dark 1" — primary dark color.
<br>lt1: "Light 1" — primary light color.
<br>dk2, lt2, accent1–accent6: additional colors for accents and variations.
<br>hlink: hyperlink color.
<br>folHlink: visited hyperlink color.
<br>Value → hex color code (RRGGBB), the same as in HTML/CSS.
<br>For example: "1F1047" = dark purple/blue.

In [21]:
import pandas as pd
import xml.etree.ElementTree as ET
import zipfile
import os

def extract_xml(path):
    extract_dir = "extracted_content"
    with zipfile.ZipFile(path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
    return extract_dir

extract_xml("Pokémon Analysis (1).pptx")

'extracted_content'

In [20]:
import pandas as pd
import xml.etree.ElementTree as ET

def extract_styling_from_xml(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()

    ns = {"a": "http://schemas.openxmlformats.org/drawingml/2006/main"}
    data = []

    # Kleurenschema
    for clrScheme in root.findall(".//a:clrScheme", namespaces=ns):
        for color in clrScheme:
            color_name = color.tag.split('}')[-1]
            srgb_element = color.find('.//a:srgbClr', namespaces=ns)
            color_value = srgb_element.attrib.get('val') if srgb_element is not None else None
            data.append({
                'Type': 'Color',
                'Element': color_name,
                'Value': color_value
            })

    # Lettertypen
    fontScheme = root.find(".//a:fontScheme", namespaces=ns)
    if fontScheme is not None:
        for font_type in fontScheme:
            font_name = font_type.tag.split('}')[-1]
            latin_font = font_type.find(".//a:latin", namespaces=ns)
            typeface = latin_font.attrib.get('typeface') if latin_font is not None else None
            data.append({
                'Type': 'Font',
                'Element': font_name,
                'Value': typeface
            })

    return pd.DataFrame(data)


# Gebruik:
xml_path = r"extracted_content\ppt\theme\theme2.xml"  # jouw XML-bestand
df_styling = extract_styling_from_xml(xml_path)
print(df_styling)


     Type    Element   Value
0   Color        dk1  FFFFFF
1   Color        lt1  22244E
2   Color        dk2  503259
3   Color        lt2  765186
4   Color    accent1  B07CC6
5   Color    accent2  FAF6E7
6   Color    accent3  E3DFD2
7   Color    accent4  FFFFFF
8   Color    accent5  FFFFFF
9   Color    accent6  FFFFFF
10  Color      hlink  765186
11  Color   folHlink  0097A7
12   Font  majorFont   Arial
13   Font  minorFont   Arial


In [None]:
import os
from docx2pdf import convert
from pdf2image import convert_from_path
from PIL import ImageChops, Image, ImageDraw

# --- CONFIG ---
file1 = "Zoekwoordenonderzoek .docx"
file2 = "Zoekwoordenonderzoek 2.docx"

# Output folders
pdf1 = "file1.pdf"
pdf2 = "file2.pdf"
diff_dir = "diff_output"
os.makedirs(diff_dir, exist_ok=True)

# Step 1: Convert DOCX -> PDF
convert(file1, pdf1)
convert(file2, pdf2)

# Step 2: Convert PDFs -> images
pages1 = convert_from_path(pdf1)
pages2 = convert_from_path(pdf2)

# Ensure same number of pages
num_pages = min(len(pages1), len(pages2))

for i in range(num_pages):
    img1 = pages1[i].convert("RGB")
    img2 = pages2[i].convert("RGB")

    # Ensure same size
    if img1.size != img2.size:
        img2 = img2.resize(img1.size)

    # Step 3: Compare images
    diff = ImageChops.difference(img1, img2)

    # Highlight differences
    bbox = diff.getbbox()
    if bbox:
        # Enhance visibility of diffs in red overlay
        mask = diff.convert("L").point(lambda x: 255 if x > 20 else 0)
        red_overlay = Image.new("RGB", img1.size, (255, 0, 0))
        diff_highlight = Image.composite(red_overlay, img1, mask)

        out_path = os.path.join(diff_dir, f"diff_page_{i+1}.png")
        diff_highlight.save(out_path)
        print(f"Differences found on page {i+1}, saved to {out_path}")
    else:
        print(f"No visible differences on page {i+1}.")
