In [7]:
import pandas as pd
import matplotlib.pyplot as plt
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image as RLImage
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet

# Load dataset
df = pd.read_csv("/mnt/data/Sample - Superstore.csv")

# Summary calculations
total_sales = df["Sales"].sum()
total_profit = df["Profit"].sum()
top_category = df.groupby("Category")["Sales"].sum().idxmax()

# Chart: Sales by Category
category_sales = df.groupby("Category")["Sales"].sum()
plt.figure(figsize=(6,4))
category_sales.plot(kind="bar")
plt.tight_layout()
chart_path = "/mnt/data/sales_by_category.png"
plt.savefig(chart_path)
plt.close()

# Create PDF
pdf_path = "/mnt/data/Superstore_Report.pdf"
styles = getSampleStyleSheet()
doc = SimpleDocTemplate(pdf_path, pagesize=A4)

story = []

# Title
story.append(Paragraph("Superstore Data Visualization Report", styles["Title"]))
story.append(Spacer(1, 12))

# Summary
story.append(Paragraph(f"<b>Total Sales:</b> ${total_sales:,.2f}", styles["Normal"]))
story.append(Paragraph(f"<b>Total Profit:</b> ${total_profit:,.2f}", styles["Normal"]))
story.append(Paragraph(f"<b>Top Category by Sales:</b> {top_category}", styles["Normal"]))
story.append(Spacer(1, 20))

# Chart
story.append(Paragraph("Sales by Category", styles["Heading2"]))
# Convert pixels to points (1 px â‰ˆ 0.75 pt)
story.append(RLImage(chart_path, width=400*0.75, height=300*0.75))
story.append(Spacer(1, 20))

# Build PDF
doc.build(story)

print(f"PDF report generated: {pdf_path}")


ModuleNotFoundError: No module named 'reportlab'