In [1]:
pip install python-pptx

Collecting python-pptx
  Downloading python_pptx-1.0.2-py3-none-any.whl.metadata (2.5 kB)
Collecting XlsxWriter>=0.5.7 (from python-pptx)
  Downloading xlsxwriter-3.2.5-py3-none-any.whl.metadata (2.7 kB)
Downloading python_pptx-1.0.2-py3-none-any.whl (472 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.8/472.8 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading xlsxwriter-3.2.5-py3-none-any.whl (172 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m172.3/172.3 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: XlsxWriter, python-pptx
Successfully installed XlsxWriter-3.2.5 python-pptx-1.0.2


In [75]:
from pptx import Presentation
# Load your presentation template
prs = Presentation("Website Assessment Report.pptx")  # Replace with your actual file name
# Choose the slide layout index you're interested in
# Slide layouts are indexed from 0
layout_index = 2  # 🔁 Change this index as needed to inspect other layouts
# Get the layout
slide_layout = prs.slide_layouts[layout_index]
# Print placeholder details
print(f"Details for layout [{layout_index}]: {slide_layout.name}")
for placeholder in slide_layout.placeholders:
    print(
        f"Placeholder index: {placeholder.placeholder_format.idx}, "
        f"Type: {placeholder.placeholder_format.type}, "
        f"Name: '{placeholder.name}'"
    )

Details for layout [2]: 2
Placeholder index: 1, Type: PICTURE (18), Name: 'Picture Placeholder 2'
Placeholder index: 0, Type: TITLE (1), Name: 'Title 1'


In [80]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pptx import Presentation
from pptx.enum.shapes import PP_PLACEHOLDER
from pptx.util import Inches, Pt
from pptx.dml.color import RGBColor
import numpy as np
import re, os

# --- CONFIG ---
EXCEL_FILE   = 'Sample Audit Report v1.xlsx'
TEMPLATE_PPT = 'Website Assessment Report.pptx'
OUTPUT_PPT   = 'Generated_Final.pptx'
CHART_LAYOUT = '2'
TOP_N_BARS   = 10

# --- 1) Load & Clean Data ---
df = pd.read_excel(EXCEL_FILE, sheet_name='Domain Audit Report', skiprows=3)
df.columns = df.columns.str.strip()
active = df[df['Level of Traffic Volume'] != 'N/A - Dead Links / Redirects'].copy()
active['Total_Score'] = active['Compliance Score'].str.split().str[3].astype(float)
active['Compliance Score'] = active['Compliance Score'].str.split().str[0].astype(float)
active['Normalized Compliance Score'] = active['Compliance Score'] / active['Total_Score']

# --- 2) Open PPT & Find Chart-Layout Slide ---
prs = Presentation(TEMPLATE_PPT)
# find the layout object
chart_layout = next((l for l in prs.slide_layouts if l.name == CHART_LAYOUT), None)
if not chart_layout:
    raise RuntimeError(f"Layout '{CHART_LAYOUT}' not found in template.")

# find the first slide already using that layout
first_chart_slide_idx = next(
    (i for i, s in enumerate(prs.slides) if s.slide_layout.name == CHART_LAYOUT),
    None
)
# if none exists, create one at the end
if first_chart_slide_idx is None:
    slide_for_first = prs.slides.add_slide(chart_layout)
    first_chart_slide_idx = len(prs.slides) - 1
else:
    slide_for_first = prs.slides[first_chart_slide_idx]

# --- 3) Generate Visualizations ---
viz_specs = []
for col in active.columns:
    if col == 'Total_Score':
        continue
    if active[col].nunique() >= 0.9 * len(active):
        continue

    data = active[col].dropna()
    safe = re.sub(r'[^\w\-]', '_', col)
    fig, ax = plt.subplots(figsize=(8, 5))

    if pd.api.types.is_numeric_dtype(data):
        sns.histplot(data, kde=True, stat='density', color='skyblue', edgecolor='black', ax=ax)
        m = data.mean()
        ax.axvline(m, color='red', linestyle='--', linewidth=2)
        ax.text(m, ax.get_ylim()[1]*0.9, f"µ={m:.1f}", ha='center', va='top', color='red', weight='bold')
        ax.set_title(f"{col} Distribution", weight='bold')
        suffix = 'hist'
    else:
        counts = data.value_counts()
        if len(counts) < 5:
            colors = sns.color_palette("Set2", len(counts))
            explode = [0.05]*len(counts)
            wedges, texts, autotexts = ax.pie(
                counts.values, labels=counts.index,
                autopct='%1.1f%%', startangle=90,
                shadow=True, explode=explode, colors=colors
            )
            for at in autotexts:
                at.set_fontsize(10); at.set_weight('bold')
            ax.set_title(f"Distribution of {col}", weight='bold')
            ax.axis('equal')
            suffix = 'pie'
        else:
            top = counts.head(TOP_N_BARS)
            sns.barplot(x=top.values, y=top.index, palette='viridis', edgecolor='black', ax=ax)
            ax.set_title(f"Top {TOP_N_BARS} Values in {col}", weight='bold')
            ax.set_xlabel('Count'); ax.set_ylabel(col)
            for i, v in enumerate(top.values):
                ax.text(v + max(top.values)*0.01, i, str(v), va='center', weight='bold')
            suffix = 'hbar'

    fig.tight_layout()
    img_path = f"viz_{safe}_{suffix}.png"
    fig.savefig(img_path, dpi=200)
    plt.close(fig)
    viz_specs.append((col, img_path))

# --- 4) Insert First Visualization into That Slide ---
first_col, first_img = viz_specs[0]

# Title placeholder index 0
try:
    title_ph = slide_for_first.placeholders[11]
    title_ph.text = f"{first_col} Distribution"
except KeyError:
    # fallback: manual textbox
    tb = slide_for_first.shapes.add_textbox(Inches(1), Inches(0.5), Inches(8), Inches(1))
    p = tb.text_frame.paragraphs[0]
    r = p.add_run()
    r.text = f"{first_col} Distribution"
    r.font.size = Pt(24); r.font.bold = True; r.font.color.rgb = RGBColor(0,0,0)

# Picture placeholder index 10
try:
    pic_ph = slide_for_first.placeholders[10]
    slide_for_first.shapes.add_picture(first_img, pic_ph.left, pic_ph.top, pic_ph.width, pic_ph.height)
except KeyError:
    # fallback: manual placement
    slide_for_first.shapes.add_picture(first_img, Inches(1), Inches(1.5), width=Inches(7.5), height=Inches(4.5))

# --- 5) Insert Remaining Charts Before Final Slide ---
insert_pos = len(prs.slides) - 1  # one before the last slide
for col, img_path in viz_specs[1:]:
    sld = prs.slides.add_slide(chart_layout)
    # move it just before last
    rId = prs.slides._sldIdLst[-1]
    prs.slides._sldIdLst.remove(rId)
    prs.slides._sldIdLst.insert(insert_pos, rId)
    insert_pos += 1

    # set title if placeholder exists
    if sld.shapes.title:
        sld.shapes.title.text = f"{col} Distribution"
    # find any picture placeholder
    ph = next((ph for ph in sld.placeholders
               if ph.placeholder_format.type in (PP_PLACEHOLDER.PICTURE, PP_PLACEHOLDER.CHART)
                  or 'Picture' in ph.name),
              None)
    if ph:
        sld.shapes.add_picture(img_path, ph.left, ph.top, ph.width, ph.height)
    else:
        sld.shapes.add_picture(img_path, Inches(1), Inches(1.5), width=Inches(7.5), height=Inches(4.5))

# --- 6) Save ---
prs.save(OUTPUT_PPT)
print("✅ PPT generated:", OUTPUT_PPT)



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=top.values, y=top.index, palette='viridis', edgecolor='black', ax=ax)
  fig.tight_layout()

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=top.values, y=top.index, palette='viridis', edgecolor='black', ax=ax)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=top.values, y=top.index, palette='viridis', edgecolor='black', ax=ax)


✅ PPT generated: Generated_Final.pptx
