### Capture Docstrings

In [12]:
DOCUMENTATION_PROMPT_TEMPLATE = """
You are an expert technical writer tasked with producing **clear, complete,
developer-ready Markdown documentation** for the Python symbol `{name}`.

Node details
------------
• File Path: {path}  
• Raw Docstring:  
\"\"\"{docstring}\"\"\"

Instructions
------------
Write a Markdown block that includes **all** of the sections below (omit the
word “None” if a section truly has no content).  
Do **not** quote or reproduce the source code itself.

Sections to generate
1. **Function/Class Name and Signature** – infer the full signature if it is
   not spelled out in the docstring.
2. **Description** – high-level overview of behaviour and intent.
3. **Parameters / Attributes** – table with *name, type, description*.
4. **Expected Input** – data types, constraints, edge cases.
5. **Returns** – type and meaning of the return value.
6. **Detailed Logic** – step-by-step explanation of algorithms and any
   interactions with other components.
7. *(Optional)* **Raises / Errors** – exceptions or error conditions.
8. *(Optional)* **Usage Example** – a concise runnable snippet.

Formatting rules
• Use Markdown headers (`###`) for the top-level title and bold section names.  
• Keep line length ≤ 120 characters.  
• Do **not** invent behaviour not implied by the docstring.  
• Do **not** add conceptual-graph or dependency content.

Example layout
--------------
### calculate_payment(principal: float, annual_rate: float, num_payments: int) -> float

**Description:**  
Calculates the fixed periodic payment required to fully amortize a loan using the net present-value formula.

**Parameters:**  
| Name | Type  | Description                           |
|------|-------|---------------------------------------|
| principal | float | Initial loan amount              |
| annual_rate | float | Annual interest rate (e.g., 0.05) |
| num_payments | int | Total number of payments         |

**Expected Input:**  
• `principal` > 0  
• `annual_rate` ≥ 0  
• `num_payments` > 0

**Returns:**  
`float` – payment amount per period.

**Detailed Logic:**  
• Zero-interest shortcut divides `principal` evenly.  
• Otherwise computes periodic rate and applies amortisation formula.

Begin the Markdown documentation now.
"""


In [14]:
from __future__ import annotations

import ast
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from typing import Dict, Union

from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_openai import AzureChatOpenAI

In [19]:
# ────────────────────────────  LLM chain  ────────────────────────────
load_dotenv()                                     # pulls key / deployment name

llm = AzureChatOpenAI(
    deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
    temperature=0.2,
    max_tokens=1024,
)

prompt = PromptTemplate.from_template(DOCUMENTATION_PROMPT_TEMPLATE)
chain  = prompt | llm | StrOutputParser()   

In [15]:
def _process_file(py_path: Path) -> Dict[str, Dict[str, str]]:
    """
    Parse *one* Python file and return
    {symbol_name: {'path', 'docstring', 'Documentation'}}.
    """
    try:
        src  = py_path.read_text(encoding="utf-8")
        tree = ast.parse(src, filename=str(py_path))
    except (OSError, SyntaxError, UnicodeDecodeError):
        return {}                                  # skip unreadable / bad files

    out: Dict[str, Dict[str, str]] = {}
    for node in tree.body:                         # top-level defs only
        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
            doc = ast.get_docstring(node) or ""
            payload = {
                "name": node.name,
                "path": str(py_path),
                "docstring": doc,
            }
            out[node.name] = {
                "path": str(py_path),
                "docstring": doc,
                "Documentation": chain.invoke(payload),
            }
    return out

In [17]:
def collect_docstrings(
    root: Union[str, Path],
    *,
    workers: int | None = None
) -> Dict[str, Dict[str, str]]:
    """
    Recursively scan *root* for *.py files and return

        { symbol_name :
            { 'path': str,
              'docstring': str,
              'Documentation': str } }

    Documentation is generated by the LLM chain.
    """
    root     = Path(root).expanduser().resolve()
    workers  = workers or os.cpu_count()
    result: Dict[str, Dict[str, str]] = {}

    with ThreadPoolExecutor(max_workers=workers) as pool:
        futures = {pool.submit(_process_file, p): p
                   for p in root.rglob("*.py")}
        for fut in as_completed(futures):
            result.update(fut.result())

    return result

In [20]:
docs = collect_docstrings("CalculatorCode")

### Document Creation

In [23]:
from pathlib import Path
import os

def save_docs_to_files(docs: dict, output_root: str) -> None:
    """
    Save each LLM-generated Markdown string to
    output\\<project structure>\\<py-file-stem>\\<symbol>.md.

    Example
    -------
    Source path:  CalculatorCode\\app\\core\\config.py
    Output file:  output\\CalculatorCode\\app\\core\\config\\<symbol>.md
    """
    output_root = Path(output_root).resolve()

    for symbol, meta in docs.items():
        src_path = Path(meta["path"].replace("\\", "/")).resolve()          # normalise[1]

        # locate the project root folder (e.g. "CalculatorCode")
        try:
            start_idx = next(i for i, p in enumerate(src_path.parts)
                             if p.lower() == "calculatorcode")
        except StopIteration:                                              # fallback: full path
            start_idx = 0

        # path up to the .py file’s parent
        rel_parent = Path(*src_path.parts[start_idx:-1])

        # final directory: …/<py-file-stem>/
        target_dir = output_root / rel_parent / src_path.stem
        target_dir.mkdir(parents=True, exist_ok=True)

        # write the Markdown
        (target_dir / f"{symbol}.md").write_text(
            meta["Documentation"], encoding="utf-8"
        )

In [24]:
save_docs_to_files(docs, output_root= "docstring_output")