In [None]:
To write a report - to add in any Jupiter Notebook- From MOna
Just change the text inside """" in the report"""

In [None]:
# To generate a Report:
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib import colors
from reportlab.lib.units import inch
# Define PDF file path
pdf_file_path = "Chunking_Strategy_Report.pdf"
# Create the PDF document
doc = SimpleDocTemplate(pdf_file_path, pagesize=A4)
elements = []
# Styles
styles = getSampleStyleSheet()
title_style = styles["Heading1"]
heading_style = styles["Heading2"]
normal_style = styles["BodyText"]
# Title
elements.append(Paragraph("Chunking Strategy Analysis Report", title_style))
elements.append(Spacer(1, 0.2*inch))
# Introduction
intro = """
This report summarizes the analysis of different chunking strategies applied to a 
Trustworthy AI podcast transcript and a structured PDF document. The goal was 
to evaluate how chunking strategies impact semantic preservation, retrieval 
quality, and suitability for Retrieval-Augmented Generation (RAG) systems.
"""
elements.append(Paragraph(intro, normal_style))
elements.append(Spacer(1, 0.2*inch))
# Key Findings
elements.append(Paragraph("Key Findings", heading_style))
findings = """
- Fixed-size chunking is simple but frequently breaks sentences and paragraphs.
- Recursive character chunking preserves document structure effectively.
- Token-based chunking aligns best with LLM context windows.
- Semantic chunking preserves meaning but is computationally expensive.
"""
elements.append(Paragraph(findings, normal_style))
elements.append(Spacer(1, 0.2*inch))
# Recommendations
elements.append(Paragraph("Recommendations", heading_style))
recommendations = """
PDF Document → Recursive Character Chunking (chunk_size=1000, overlap=200)  
Reason: Maintains paragraph and section boundaries for structured documents.  
Podcast Transcript → Token-Based Chunking (chunk_size=500 tokens, overlap=50)  
Reason: Optimizes conversational flow and aligns with LLM processing limits.
"""
elements.append(Paragraph(recommendations, normal_style))
elements.append(Spacer(1, 0.2*inch))
# Trade-off Table
elements.append(Paragraph("Chunking Strategy Trade-offs", heading_style))
data = [
    ["Strategy", "Pros", "Cons", "Best For"],
    ["Fixed-Size", "Simple, predictable", "Breaks context", "Uniform text"],
    ["Recursive", "Preserves structure", "More complex", "Structured PDFs"],
    ["Token-Based", "LLM-accurate sizing", "Needs tokenizer", "Production RAG"],
    ["Semantic", "Meaning-based", "Computationally expensive", "Complex content"]
]
table = Table(data, colWidths=[1.2*inch]*4)
table.setStyle(TableStyle([
    ('BACKGROUND', (0,0), (-1,0), colors.lightgrey),
    ('GRID', (0,0), (-1,-1), 0.5, colors.grey),
    ('FONTNAME', (0,0), (-1,-1), 'Helvetica'),
    ('FONTSIZE', (0,0), (-1,-1), 9),
]))
elements.append(table)
# Build PDF
doc.build(elements)
print(f"PDF report created successfully: {pdf_file_path}")