In [1]:
# Import Libraries
import os
import pandas as pd
import io
import glob
import win32com.client
import easyocr
from pptx import Presentation
from PIL import Image
from io import BytesIO

# Create all Paths / Dataframes
duplicate_ppts_folder = r""
dynamic_ppts_folder = r""
powerbi_exports_path = r""
dynamic_powerbi_folder = r""

template_path = r""
standard_chart_path = r""
deck_title_col = "title"

template_df = pd.read_excel(template_path, sheet_name="Relationship Decks", usecols=[deck_title_col, 'mappedaccountid'])

duplicate_ppt_files = [f for f in os.listdir(duplicate_ppts_folder) if f.endswith(('.pptx', '.ppt'))]
powerbi_ppt_files = [f for f in os.listdir(powerbi_exports_path) if f.endswith(('.pptx', '.ppt'))]

# Create Deck Title Labels representing each divided "_"
labels = [""]

# Ensure the output directory exists
if not os.path.exists(dynamic_powerbi_folder):
    os.makedirs(dynamic_powerbi_folder)

# PowerBI to Dynamic PPT Slide Mapping
mapping = {4:11, 5:14, 6:15, 7:16, 8:17, 9:18, 10:19, 11:24, 12:25, 13:26, 14:27, 15:28, 16:29}

# List of slide numbers to check in the Dynamic PPT
slides_to_check = list(mapping.values())


In [None]:
# Data Cleaning Function
def clean_title(title):
    invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
    for char in invalid_chars:
        title = title.replace(char, ' ')

    if title.endswith('_'):
        title = title[:-1]

    return title.strip()

template_df['Cleaned Title'] = template_df[deck_title_col].apply(clean_title)

template_df['Edited'] = template_df[deck_title_col] != template_df['Cleaned Title']

template_df


In [None]:
# Ensure the DataFrame has the 'Cleaned Title' column and it aligns with the number of files
if 'Cleaned Title' in template_df.columns:
    duplicate_ppt_files = os.listdir(duplicate_ppts_folder)
    num_files = len(duplicate_ppt_files)
    num_titles = len(template_df)

    if num_files == num_titles:
        for ppt_file, new_name in zip(duplicate_ppt_files, template_df['Cleaned Title']):
            old_file_path = os.path.join(duplicate_ppts_folder, ppt_file)
            new_file_path = os.path.join(duplicate_ppts_folder, f"{new_name}.pptx")

            if os.path.exists(old_file_path):
                if os.path.exists(new_file_path):
                    print(f"Skipping renaming for {ppt_file} as {new_name}.pptx already exists.")
                    continue
                os.rename(old_file_path, new_file_path)
            else:
                print(f"File {ppt_file} does not exist in the specified folder.")

        print("Renaming completed.")

        # Refresh the list of PowerPoint files after renaming
        duplicate_ppt_files = [f for f in os.listdir(duplicate_ppts_folder) if f.endswith('.pptx')]

        # Parsing the new file names
        for file in duplicate_ppt_files:
            file_name = os.path.basename(file).split('.')[0]
            print(f"\nTitle: {file_name}\n")
            values = file_name.split('_')

            file_info = {}
            for index, label in enumerate(labels):
                file_info[label] = values[index] if index < len(values) else ""

            for label in labels:
                print(f"{label}: {file_info[label]}")

    else:
        print(f"Error: The number of files ({num_files}) does not match the number of titles ({num_titles}).")
else:
    print("Error: 'Cleaned Title' column missing in DataFrame.")


In [None]:
def replace_placeholders(slide, label_dict):
    for shape in slide.shapes:
        if shape.has_text_frame:
            text_frame = shape.text_frame
            for paragraph in text_frame.paragraphs:
                for run in paragraph.runs:
                    if "placeholder" in run.text:
                        if "placeholder" in label_dict and label_dict["placeholder"]:
                            run.text = run.text.replace("<placeholder>", label_dict["placeholder"])
                            run.text = run.text.replace("<placeholder>", "")
                        else:
                            run.text = run.text.replace("<placeholder>", "")
                            if "<LOB>" in run.text:
                                run.text = run.text.replace("<placeholder>", label_dict.get("placeholder", ""))

                    for label in labels:
                        if label not in ["placeholder", "placeholder"]:
                            placeholder = "<" + label + ">"
                            if placeholder in run.text:
                                run.text = run.text.replace(placeholder, label_dict.get(label, ""))

for file in duplicate_ppt_files:
    if file.endswith('.pptx'):
        original_file_path = os.path.join(duplicate_ppts_folder, file)
        file_name = os.path.basename(file).split('.')[0]
        values = file_name.split('_')

        label_dict = dict.fromkeys(labels, "")
        label_dict.update(dict(zip(labels, values)))

        presentation = Presentation(original_file_path)

        for slide in presentation.slides:
            replace_placeholders(slide, label_dict)

        new_file_path = os.path.join(dynamic_ppts_folder, file)
        presentation.save(new_file_path)
        print(f"Processed and saved: {new_file_path}")


In [None]:
# Fills out Dynamic Brackets within the PowerPoint
# Function to replace placeholders with actual values or remove them if no value is present
def replace_placeholders(slide, label_dict):
    for shape in slide.shapes:
        if shape.has_text_frame:
            text_frame = shape.text_frame
            for paragraph in text_frame.paragraphs:
                for run in paragraph.runs:
                    for label in labels:
                        placeholder = "<" + label + ">"
                        if placeholder in run.text:
                            run.text = run.text.replace(placeholder, label_dict.get(label, ""))

# Loop over each file in the directory
for file in duplicate_ppt_files:
    if file.endswith('.pptx'):
        original_file_path = os.path.join(duplicate_ppts_folder, file)

        file_name = os.path.basename(file).split('.')[0]

        values = file_name.split('_')

        label_dict = dict.fromkeys(labels, "")
        label_dict.update(dict(zip(labels, values)))

        presentation = Presentation(original_file_path)

        for slide in presentation.slides:
            replace_placeholders(slide, label_dict)

        new_file_path = os.path.join(dynamic_ppts_folder, file)

        presentation.save(new_file_path)
        print(f"Processed and saved: {new_file_path}")


In [None]:
# Create a dictionary mapping accountid to deck title
account_id_to_name = pd.Series(template_df['Cleaned Title'].values, index=template_df['mappedaccountid'].astype(str)).to_dict()

# Initialize EasyOCR reader
reader = easyocr.Reader(['en'])

# Function to extract the first encountered numeric string using EasyOCR
def ocr_image(image_stream):
    result = reader.readtext(image_stream)
    for detection in result:
        text = detection[1]
        if text.isdigit():
            return text
    return None

# Process each PowerPoint file
for ppt_file in powerbi_ppt_files:
    old_file_path = os.path.join(powerbi_exports_path, ppt_file)
    prs = Presentation(old_file_path)
    first_slide = prs.slides[0]

    for shape in first_slide.shapes:
        if shape.shape_type == 13:
            image_stream = shape.image.blob
            account_id = ocr_image(image_stream)

            if account_id:
                new_name = account_id_to_name.get(account_id)
                if new_name:
                    new_file_path = os.path.join(powerbi_exports_path, f"{new_name}.pptx")
                    os.rename(old_file_path, new_file_path)
                    print(f"Renamed {ppt_file} to {new_name}.pptx")
                else:
                    print(f"No matching name found for account ID {account_id} in {ppt_file}")
                # Output the mapping of account ID to Cleaned Title
                print(f"Processed account ID: {account_id}, mapped title: {account_id_to_name.get(account_id, 'No matching title')}")
            break

print("Renaming completed.")

In [None]:
# Loop through all PowerPoint files in the PowerBI exports directory, attach images to the corresponding dynamic PowerPoint files
for powerbi_file in glob.glob(os.path.join(powerbi_exports_path, '*.pptx')):
    powerbi_presentation = Presentation(powerbi_file)
    dynamic_file_name = os.path.basename(powerbi_file)

    dynamic_file_path = os.path.join(dynamic_ppts_folder, dynamic_file_name)
    if os.path.exists(dynamic_file_path):
        dynamic_presentation = Presentation(dynamic_file_path)

        print(f"Connecting {powerbi_file} with {dynamic_file_path}")

        slide_height = powerbi_presentation.slide_height
        slide_width = powerbi_presentation.slide_width

        for powerbi_slide_number, dynamic_slide_number in mapping.items():
            powerbi_slide = powerbi_presentation.slides[powerbi_slide_number - 1]
            dynamic_slide = dynamic_presentation.slides[dynamic_slide_number - 1]

            for shape in powerbi_slide.shapes:
                if shape.shape_type == 13:
                    image_bytes = shape.image.blob
                    image_stream = io.BytesIO(image_bytes)

                    left = top = 0
                    dynamic_slide.shapes.add_picture(image_stream, left, top, width=slide_width, height=slide_height)
                    break

        dynamic_presentation.save(os.path.join(dynamic_powerbi_folder, dynamic_file_name))
        print(f"Completed processing for {dynamic_file_name}")
    else:
        print(f"No corresponding dynamic file found for {dynamic_file_name}")

print("All files processed.")


In [8]:
# Open PowerPoint application
Application = win32com.client.Dispatch("PowerPoint.Application")

# Function to get the crop dimensions of an image in PowerPoint
def get_image_crop_dimensions(presentation_path, slide_numbers):
    presentation = Application.Presentations.Open(presentation_path)
    crop_dimensions = {}
    for slide_num in slide_numbers:
        slide = presentation.Slides(slide_num)
        for shape in slide.Shapes:
            if shape.Type == 13:
                pic = shape.PictureFormat
                crop_dimensions[slide_num] = (pic.CropLeft, pic.CropTop, pic.CropRight, pic.CropBottom)
                break
    presentation.Close()
    return crop_dimensions

# Get the crop dimensions from the standard chart for the specified slides
standard_crop_dimensions = get_image_crop_dimensions(standard_chart_path, slides_to_check)

if not standard_crop_dimensions:
    raise Exception("No images found on the specified slides of the standard chart presentation.")

# Function to apply crop dimensions to images on specified slides of each presentation in the directory
def apply_crop_to_all_presentations(directory_path, crop_dimensions_dict):
    for filename in os.listdir(directory_path):
        if filename.endswith(".pptx"):
            presentation_path = os.path.join(directory_path, filename)
            presentation = Application.Presentations.Open(presentation_path)
            for slide_num, dimensions in crop_dimensions_dict.items():
                slide = presentation.Slides(slide_num)
                for shape in slide.Shapes:
                    if shape.Type == 13:
                        shape.PictureFormat.CropLeft = dimensions[0]
                        shape.PictureFormat.CropTop = dimensions[1]
                        shape.PictureFormat.CropRight = dimensions[2]
                        shape.PictureFormat.CropBottom = dimensions[3]
            presentation.Save()
            presentation.Close()

# Apply the crop dimensions to all PowerPoints in the exports folder
apply_crop_to_all_presentations(dynamic_powerbi_folder, standard_crop_dimensions)

# Quit PowerPoint application
Application.Quit()
