In [2]:
!pip install reportlab

Collecting reportlab
  Downloading reportlab-4.4.9-py3-none-any.whl.metadata (1.7 kB)
Downloading reportlab-4.4.9-py3-none-any.whl (2.0 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m64.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: reportlab
Successfully installed reportlab-4.4.9


In [3]:
from reportlab.lib.pagesizes import A4
from reportlab.lib import colors
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import (
    SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, Image
)
from datetime import datetime

class ProjectDocumentationGenerator:
    def __init__(self, filename="Project_Documentation.pdf"):
        self.filename = filename
        self.doc = SimpleDocTemplate(
            self.filename,
            pagesize=A4,
            rightMargin=72, leftMargin=72,
            topMargin=72, bottomMargin=72
        )
        self.styles = getSampleStyleSheet()
        self.story = []

        # --- Custom Styles ---
        self.create_custom_styles()

    def create_custom_styles(self):
        """Define custom styles for code blocks, headers, etc."""
        self.code_style = ParagraphStyle(
            'CodeStyle',
            parent=self.styles['BodyText'],
            fontName='Courier',
            fontSize=9,
            leading=12,
            backColor=colors.whitesmoke,
            borderPadding=5,
            spaceAfter=10,
        )
        self.title_style = self.styles['Title']
        self.h1_style = self.styles['Heading1']
        self.h2_style = self.styles['Heading2']
        self.body_style = self.styles['BodyText']
        self.bullet_style = ParagraphStyle(
            'Bullet',
            parent=self.styles['BodyText'],
            bulletIndent=10,
            leftIndent=20,
            spaceAfter=5
        )

    def add_title_page(self):
        """Generates a professional Title Page."""
        self.story.append(Spacer(1, 2 * inch))
        self.story.append(Paragraph("PROJECT DOCUMENTATION", self.title_style))
        self.story.append(Spacer(1, 0.5 * inch))
        self.story.append(Paragraph("<b>Job Application & CV Parser</b>", self.h1_style))
        self.story.append(Paragraph("<i>with OCR Automation & n8n Integration</i>", self.h2_style))
        self.story.append(Spacer(1, 3 * inch))

        # Meta Info Table
        data = [
            ["Generated Date:", datetime.now().strftime("%B %d, %Y")],
            ["Version:", "1.0.0"],
            ["Author:", "Development Team"],
            ["Technology:", "Python, FastAPI, Streamlit, n8n"]
        ]
        t = Table(data, colWidths=[2*inch, 3*inch])
        t.setStyle(TableStyle([
            ('FONTNAME', (0,0), (-1,-1), 'Helvetica'),
            ('FONTSIZE', (0,0), (-1,-1), 12),
            ('TEXTCOLOR', (0,0), (0,-1), colors.grey),
            ('ALIGN', (0,0), (-1,-1), 'LEFT'),
        ]))
        self.story.append(t)
        self.story.append(PageBreak())

    def add_overview(self):
        """Adds the Introduction and Features section."""
        self.story.append(Paragraph("1. Executive Overview", self.h1_style))
        text = """
        This project is a full-stack automated application tracking system (ATS) starter.
        It features a user-friendly frontend for submitting job applications and a robust backend
        that performs Optical Character Recognition (OCR) on resumes (PDF, Word, or Images)
        before sending the data to an n8n workflow for downstream processing.
        """
        self.story.append(Paragraph(text, self.body_style))
        self.story.append(Spacer(1, 0.2 * inch))

        self.story.append(Paragraph("1.1 Key Features", self.h2_style))
        features = [
            "<b>Universal File Support:</b> Accepts PDF, DOCX, PNG, JPG.",
            "<b>Intelligent Pre-processing:</b> Converts files to high-contrast grayscale for OCR.",
            "<b>Rapid OCR:</b> Uses RapidOCR (ONNX) for fast text extraction.",
            "<b>Automation Ready:</b> Instant integration with n8n Webhooks."
        ]
        for f in features:
            self.story.append(Paragraph(f"• {f}", self.bullet_style))
        self.story.append(Spacer(1, 0.3 * inch))

    def add_architecture(self):
        """Adds the Technical Architecture section."""
        self.story.append(Paragraph("2. System Architecture", self.h1_style))

        # Architecture Table
        data = [
            ["Component", "Technology", "Description"],
            ["Frontend", "Streamlit", "User interface for file uploads."],
            ["Backend API", "FastAPI", "Handles routing and file processing."],
            ["OCR Engine", "RapidOCR", "Extracts text from images."],
            ["Image Proc", "OpenCV / Pillow", "Grayscale conversion & resizing."],
            ["Automation", "n8n", "Workflow orchestration (Email, Sheets)."]
        ]

        t = Table(data, colWidths=[1.5*inch, 1.5*inch, 3*inch])
        t.setStyle(TableStyle([
            ('BACKGROUND', (0,0), (-1,0), colors.darkblue),
            ('TEXTCOLOR', (0,0), (-1,0), colors.whitesmoke),
            ('ALIGN', (0,0), (-1,-1), 'LEFT'),
            ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
            ('BOTTOMPADDING', (0,0), (-1,0), 12),
            ('BACKGROUND', (0,1), (-1,-1), colors.beige),
            ('GRID', (0,0), (-1,-1), 1, colors.black),
            ('FONTSIZE', (0,0), (-1,-1), 10),
            ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
        ]))
        self.story.append(t)
        self.story.append(Spacer(1, 0.3 * inch))

    def add_installation(self):
        """Adds Setup Instructions."""
        self.story.append(Paragraph("3. Installation & Setup", self.h1_style))

        self.story.append(Paragraph("Step 1: Create Environment", self.h2_style))
        code_env = """
conda create --name ocr-env python=3.11 -y
conda activate ocr-env
        """
        self.story.append(Paragraph(code_env.strip().replace('\n', '<br/>'), self.code_style))

        self.story.append(Paragraph("Step 2: Install Dependencies", self.h2_style))
        self.story.append(Paragraph("Ensure you have the following requirements:", self.body_style))
        code_req = """
pip install streamlit fastapi uvicorn requests pymupdf
pip install rapidocr_onnxruntime opencv-python pillow docx2pdf
        """
        self.story.append(Paragraph(code_req.strip().replace('\n', '<br/>'), self.code_style))

    def add_running_guide(self):
        """Adds How to Run section."""
        self.story.append(Paragraph("4. Running the Application", self.h1_style))
        self.story.append(Paragraph("The system requires two separate terminals:", self.body_style))

        self.story.append(Paragraph("Terminal A: Backend API", self.h2_style))
        self.story.append(Paragraph("uvicorn main:app --reload", self.code_style))

        self.story.append(Paragraph("Terminal B: Frontend UI", self.h2_style))
        self.story.append(Paragraph("streamlit run app.py", self.code_style))

    def build(self):
        """Assembles the PDF."""
        self.add_title_page()
        self.add_overview()
        self.add_architecture()
        self.add_installation()
        self.add_running_guide()

        try:
            self.doc.build(self.story)
            print(f"✅ Success! Documentation generated: {self.filename}")
        except Exception as e:
            print(f"❌ Error generating PDF: {str(e)}")

if __name__ == "__main__":
    pdf_gen = ProjectDocumentationGenerator()
    pdf_gen.build()

✅ Success! Documentation generated: Project_Documentation.pdf
