<a href="https://colab.research.google.com/github/Majids-Hamm/Pharmacovigillance/blob/main/WordDataToPPTXSlides.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install necessary libraries for working with Word and PowerPoint files.
# This command is specific to Google Colab.
!pip install python-docx python-pptx

import docx
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.dml import MSO_THEME_COLOR
from pptx.dml.color import RGBColor
from pptx.enum.text import MSO_ANCHOR, PP_ALIGN



In [2]:
def get_table_data(doc_path, table_index):
    """
    Extracts data from a specific table in a Word document.

    Args:
        doc_path (str): The path to the Word document.
        table_index (int): The index of the table to extract (0-based).

    Returns:
        tuple: A tuple containing the header row (list of strings) and data rows (list of lists of strings).
    """
    try:
        doc = docx.Document(doc_path)
    except docx.shared.NoSuchPartError:
        print(f"Error: Word file not found at '{doc_path}'. Please ensure it is uploaded to the Colab environment.")
        return None, None
    except Exception as e:
        print(f"An error occurred while opening the Word file: {e}")
        return None, None

    if table_index >= len(doc.tables):
        print(f"Error: Table with index {table_index} not found. The document only has {len(doc.tables)} tables.")
        return None, None

    table = doc.tables[table_index]
    data_rows = []

    # Extract header from the first row of the table
    header = [cell.text.strip() for cell in table.rows[0].cells]

    # Extract data from the remaining rows
    for row in table.rows[1:]:
        row_data = [cell.text.strip() for cell in row.cells]
        data_rows.append(row_data)

    return header, data_rows

In [3]:
def create_slides_from_data(prs, data_rows, header_row, title, slide_layout, column_widths):
    """
    Creates new slides with tables from the given data, applying styling to match the template.

    Args:
        prs (pptx.presentation.Presentation): The PowerPoint presentation object.
        data_rows (list): A list of lists, where each inner list is a row of data.
        header_row (list): A list of strings for the table header.
        title (str): The title for the new slides.
        slide_layout (pptx.slide.SlideLayout): The slide layout object to use.
        column_widths (list): A list of floats for column widths in Inches.
    """
    ROWS_PER_TABLE = 9

    # Define table styles
    header_fill_color = RGBColor(0x00, 0x20, 0x60) # Dark blue
    header_font_color = RGBColor(0xFF, 0xFF, 0xFF) # White
    table_font_color = RGBColor(0x00, 0x00, 0x00) # Black

    # Process data in chunks of 9 rows
    for i in range(0, len(data_rows), ROWS_PER_TABLE):
        data_chunk = data_rows[i:i + ROWS_PER_TABLE]

        # Add a new slide using the template's layout
        slide = prs.slides.add_slide(slide_layout)

        # Set the title of the slide
        title_shape = slide.shapes.title
        title_shape.text = title
        title_shape.text_frame.paragraphs[0].font.size = Pt(20)
        title_shape.text_frame.paragraphs[0].font.name = 'Times New Roman'

        # Calculate table position and size to fit the slide
        left = Inches(0.5)
        top = Inches(1.5)
        width = prs.slide_width - Inches(1.0)
        height = Inches(5.9) # Fixed height to fit the rows without bulging

        table_shape = slide.shapes.add_table(len(data_chunk) + 1, len(header_row), left, top, width, height)
        table = table_shape.table

        # Set column widths
        for col_idx, w in enumerate(column_widths):
            table.columns[col_idx].width = Inches(w)

        # Populate and style the table
        for row_idx, row_data in enumerate([header_row] + data_chunk):
            # Ensure the row has the same number of columns as the header
            row_data = row_data[:len(header_row)]
            for col_idx, cell_data in enumerate(row_data):
                cell = table.cell(row_idx, col_idx)
                cell.text = cell_data

                # Set vertical and horizontal alignment
                cell.vertical_anchor = MSO_ANCHOR.MIDDLE
                cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER

                # Style the header row
                if row_idx == 0:
                    cell.fill.solid()
                    cell.fill.fore_color.rgb = header_fill_color
                    cell.text_frame.paragraphs[0].font.color.rgb = header_font_color
                    cell.text_frame.paragraphs[0].font.size = Pt(18)
                    cell.text_frame.paragraphs[0].font.name = 'Times New Roman'
                    cell.text_frame.paragraphs[0].font.bold = True
                else:
                    # Style data rows with alternating colors
                    if row_idx % 2 == 1:
                        cell.fill.solid()
                        cell.fill.fore_color.rgb = RGBColor(0xF2, 0xF2, 0xF2) # Light Gray
                    else:
                        cell.fill.background()
                    cell.text_frame.paragraphs[0].font.color.rgb = table_font_color
                    cell.text_frame.paragraphs[0].font.size = Pt(16)
                    cell.text_frame.paragraphs[0].font.name = 'Times New Roman'

    print(f"Created {len(data_rows) // ROWS_PER_TABLE + (1 if len(data_rows) % ROWS_PER_TABLE > 0 else 0)} slides for '{title}' data.")


In [4]:
# --- Main Script ---

docx_path = '30omztables.docx'
pptx_template_path = '30omzSlides.pptx'
output_pptx_path = 'output_sae_slides.pptx'

# 1. Read data from the Word document
print("Reading data from Word document...")
# Table 0: Serious Adverse Events (Results in death)
header_row_death, death_sae_data = get_table_data(docx_path, 0)
if not death_sae_data:
    print("Could not retrieve death SAE data. Exiting.")
    exit()

# Table 1: Other SAEs (Non-death)
header_row_other, other_sae_data = get_table_data(docx_path, 1)
if not other_sae_data:
    print("Could not retrieve other SAE data. Exiting.")
    exit()

Reading data from Word document...


In [5]:
# 2. Load the PowerPoint template
print("Loading PowerPoint template...")
try:
    prs = Presentation(pptx_template_path)
except FileNotFoundError:
    print(f"Error: Template file not found at '{pptx_template_path}'. Please upload the template file.")
    exit()

# 3. Get a reference slide layout from the template
template_slide_layout = prs.slide_layouts[0]

# 4. Clean up existing slides to start fresh
# We no longer remove existing slides. New slides will be appended to the end.
print("Preserving existing slides. New slides will be appended.")

# 5. Create slides for death SAEs
print("Generating slides for death SAEs...")
# Define the new header and reorder the data for death SAEs
new_header_death = ["Subject ID", "Group", "PT", "Causality Assessment"]
reordered_death_sae_data = [[row[1], row[0], row[2], row[3]] for row in death_sae_data]
# Define custom column widths to fit the slide
col_widths_death = [3.0, 2.0, 3.0, 3.9]
create_slides_from_data(prs, reordered_death_sae_data, new_header_death, "Serious Adverse Events-Details (Results in death)", template_slide_layout, col_widths_death)

# 6. Create slides for other SAEs
print("Generating slides for other SAEs...")
# Define the new header for other SAEs with 5 columns, with the 4th and 5th swapped
new_header_other = ["Subject ID", "Group", "PT", "Seriousness Criteria", "Causality Assessment"]
# Reorder the data to match the new header. The original data from the Word doc is now mapped correctly.
reordered_other_sae_data = [[row[1], row[0], row[2], row[3], row[4]] for row in other_sae_data]
# Define custom column widths to fit the slide
col_widths_other = [2.1, 1.0, 2.3, 3.7, 2.8]
create_slides_from_data(prs, reordered_other_sae_data, new_header_other, "Serious Adverse Events-Details (Other SAEs)", template_slide_layout, col_widths_other)


Loading PowerPoint template...
Preserving existing slides. New slides will be appended.
Generating slides for death SAEs...
Created 9 slides for 'Serious Adverse Events-Details (Results in death)' data.
Generating slides for other SAEs...
Created 37 slides for 'Serious Adverse Events-Details (Other SAEs)' data.


In [6]:
# 7. Save the new presentation
print(f"Saving the new presentation to '{output_pptx_path}'...")
prs.save(output_pptx_path)

print("Process completed successfully!")

Saving the new presentation to 'output_sae_slides.pptx'...
Process completed successfully!
