In [1]:
### Installing the necessary libraries 
# !pip install python-pptx openpyxl

# Importing the Presentation class from the python-pptx module to handle PowerPoint operations.
from pptx import Presentation

# Importing the Workbook class from the openpyxl module to handle Excel operations.
from openpyxl import Workbook

1. Read the content from the PowerPoint.
2. Export the content to an Excel sheet.

In [2]:
# Create a function to extract text from a PowerPoint:
# Function to extract text from a PowerPoint file.
def extract_text_from_ppt(ppt_file):
    # Load the PowerPoint file into a Presentation object.
    presentation = Presentation(ppt_file)
    slides_text = []  # Initialize an empty list to store text from each slide.

    # Loop through each slide in the presentation.
    for slide in presentation.slides:
        slide_text = ''  # Initialize an empty string to store text from the current slide.

        # Loop through each shape (element) in the slide.
        for shape in slide.shapes:
            # Check if the shape has a text attribute (i.e., it contains text).
            if hasattr(shape, "text"):
                # Append the shape's text to the slide_text string.
                slide_text += shape.text + '\n'
        # Append the text from the current slide to the slides_text list.
        slides_text.append(slide_text)

    return slides_text  # Return the list containing text from all slides.

In [3]:
# Function to export the extracted text into an Excel file.
def export_to_excel(data, excel_file):
    # Create a new Excel workbook.
    wb = Workbook()
    
    # Loop through each set of slide texts (from different PowerPoints).
    for idx, slides_text in enumerate(data, 1):
        # If it's the first PowerPoint, use the already created default sheet.
        if idx == 1:
            ws = wb.active
            ws.title = f"PowerPoint {idx}"
        # For subsequent PowerPoints, create new sheets.
        else:
            ws = wb.create_sheet(title=f"PowerPoint {idx}")
        
        # Loop through each slide's text.
        for row_idx, slide_text in enumerate(slides_text, 1):
            # Insert the slide text as a new row in the Excel sheet.
            ws.append([slide_text])

    # Save the filled Excel workbook to the specified file.
    wb.save(excel_file)

In [4]:
# Function to convert text from multiple PowerPoint files into separate sheets of an Excel file.
def ppt_to_excel(ppt_files, excel_file):
    # Extract text from each PowerPoint and store it in a list.
    all_data = [extract_text_from_ppt(ppt_file) for ppt_file in ppt_files]
    # Export the extracted data to the Excel file.
    export_to_excel(all_data, excel_file)

In [7]:
# List of PowerPoint files to process.
ppt_files = ['Wk1_Risk_Mgmt.pptx', 'Wk2_Regulatory_Env.pptx', 'Wk3_Compliance Privacy Governance.pptx', 'Wk4_AI_Ethics.pptx', 'Wk5_Cybersecurity.pptx', 'Wk7_Data_Quality.pptx', 'Wk8_Strategic_Planning_IT.pptx']
# Name of the output Excel file.
output_excel_file = 'midterm_output.xlsx'

# Convert the text from the PowerPoint files into the Excel file.
ppt_to_excel(ppt_files, output_excel_file)