# Imports

In [1]:
from pathlib import Path
import json
import math
import re

import reportlab
from reportlab.pdfgen import canvas
from reportlab.pdfbase import pdfmetrics

# Page Setup

In [2]:
INCH = reportlab.lib.units.inch
PAGESIZE = reportlab.lib.pagesizes.letter  # US Letter: 8.5x11 inches
PAGE_W, PAGE_H = PAGESIZE

# Grid: 3x3 cards per page
COLS, ROWS = 3, 3
CARDS_PER_PAGE = COLS * ROWS

# Margins and padding
MARGIN_X = 0.00 * INCH
MARGIN_Y = 0.00 * INCH
CELL_PAD = 0.15 * INCH

# Card cell size (computed from page + margins)
CELL_W = (PAGE_W - 2 * MARGIN_X) / COLS
CELL_H = (PAGE_H - 2 * MARGIN_Y) / ROWS

# Text styles
TERM_FONT = "Helvetica-Bold"
DEF_FONT = "Helvetica"
COMMENT_FONT = "Helvetica-Oblique"

TERM_SIZE = 18
DEF_SIZE = 16
COMMENT_SIZE = 11

# Line spacing multipliers
TERM_LEADING = 1.15
DEF_LEADING = 1.20
COMMENT_LEADING = 1.15

# Cut line style
CUT_LINE_WIDTH = 0.6
CUT_LINE_GRAY = 0.65  # 0=black, 1=white

# Tag style
TAG_FONT = "Helvetica"
TAG_SIZE = 9
TAG_INSET = 0.07 * INCH
TAG_GRAY = 0.35

print(f"CELL_W inches: {CELL_W/INCH:.3f}")
print(f"CELL_H inches: {CELL_H/INCH:.3f}")

CELL_W inches: 2.833
CELL_H inches: 3.667


# Helpers

In [3]:
def normalize_ws(s: str) -> str:
    """
    Preserve explicit newlines, but clean up extra spaces/tabs on each line.
    """
    s = (s or "").replace("\r\n", "\n").replace("\r", "\n")
    lines = [re.sub(r"[ \t]+", " ", line).strip() for line in s.split("\n")]
    return "\n".join(lines).strip()

def wrap_lines(text: str, font_name: str, font_size: int, max_width: float):
    """
    Word-wrap text to max_width, while respecting explicit newlines.
    Returns a list of lines (strings). Blank lines preserved.
    """
    text = normalize_ws(text)
    if not text:
        return []

    out_lines = []
    for paragraph in text.split("\n"):
        paragraph = paragraph.strip()
        if paragraph == "":
            out_lines.append("")  # preserve blank line
            continue

        words = paragraph.split(" ")
        cur = ""
        for w in words:
            trial = w if not cur else cur + " " + w
            if pdfmetrics.stringWidth(trial, font_name, font_size) <= max_width:
                cur = trial
            else:
                if cur:
                    out_lines.append(cur)

                # hard-split if one token is too long
                if pdfmetrics.stringWidth(w, font_name, font_size) <= max_width:
                    cur = w
                else:
                    chunk = ""
                    for ch in w:
                        trial2 = chunk + ch
                        if pdfmetrics.stringWidth(trial2, font_name, font_size) <= max_width:
                            chunk = trial2
                        else:
                            if chunk:
                                out_lines.append(chunk)
                            chunk = ch
                    cur = chunk

        if cur:
            out_lines.append(cur)

    return out_lines

def draw_centered_block(c: canvas.Canvas, lines, font_name, font_size, leading, x_center, y_center):
    """
    Draw wrapped lines centered around (x_center, y_center).
    """
    if not lines:
        return

    total_h = len(lines) * font_size * leading
    y = y_center + total_h / 2 - font_size  # start near top of block

    c.setFont(font_name, font_size)
    for line in lines:
        if line == "":
            y -= font_size * leading
            continue
        c.drawCentredString(x_center, y, line)
        y -= font_size * leading


In [4]:
def draw_cut_lines(c: canvas.Canvas):
    c.setLineWidth(CUT_LINE_WIDTH)
    c.setStrokeGray(CUT_LINE_GRAY)

    # Vertical grid lines
    for col in range(1, COLS):
        x = MARGIN_X + col * CELL_W
        c.line(x, MARGIN_Y, x, PAGE_H - MARGIN_Y)

    # Horizontal grid lines
    for row in range(1, ROWS):
        y = MARGIN_Y + row * CELL_H
        c.line(MARGIN_X, y, PAGE_W - MARGIN_X, y)

def cell_origin(col: int, row: int):
    """
    row=0 is top row.
    Returns bottom-left (x,y) of the cell.
    """
    x = MARGIN_X + col * CELL_W
    y_top = PAGE_H - MARGIN_Y - row * CELL_H
    y = y_top - CELL_H
    return x, y

def draw_corner_tag(c: canvas.Canvas, text: str, x_left: float, y_bottom: float, w: float, h: float, corner="tl"):
    """
    Draw small tag text in a corner of the card content box.
    corner: 'tl' top-left, 'tr' top-right
    """
    text = (text or "").strip()
    if not text:
        return

    c.saveState()
    c.setFont(TAG_FONT, TAG_SIZE)
    c.setFillGray(TAG_GRAY)

    y = y_bottom + h - TAG_INSET - TAG_SIZE  # baseline near top
    if corner == "tr":
        x = x_left + w - TAG_INSET
        c.drawRightString(x, y, text)
    else:
        x = x_left + TAG_INSET
        c.drawString(x, y, text)

    c.restoreState()


In [5]:
def draw_card_front(c: canvas.Canvas, item: dict, col: int, row: int, term_tag: str):
    x, y = cell_origin(col, row)
    x0 = x + CELL_PAD
    y0 = y + CELL_PAD
    w = CELL_W - 2 * CELL_PAD
    h = CELL_H - 2 * CELL_PAD

    draw_corner_tag(c, term_tag, x0, y0, w, h, corner="tl")

    term = normalize_ws(item.get("term", ""))
    lines = wrap_lines(term, TERM_FONT, TERM_SIZE, w)

    draw_centered_block(
        c, lines, TERM_FONT, TERM_SIZE, TERM_LEADING,
        x_center=x0 + w/2,
        y_center=y0 + h/2
    )

def draw_card_back(c: canvas.Canvas, item: dict, col: int, row: int, defn_tag: str):
    x, y = cell_origin(col, row)
    x0 = x + CELL_PAD
    y0 = y + CELL_PAD
    w = CELL_W - 2 * CELL_PAD
    h = CELL_H - 2 * CELL_PAD

    draw_corner_tag(c, defn_tag, x0, y0, w, h, corner="tl")

    defn = normalize_ws(item.get("defn", ""))
    comment = normalize_ws(item.get("comment", ""))

    def_lines = wrap_lines(defn, DEF_FONT, DEF_SIZE, w)
    com_lines = wrap_lines(comment, COMMENT_FONT, COMMENT_SIZE, w)

    center_x = x0 + w/2
    mid_y = y0 + h/2

    # definition slightly above center
    draw_centered_block(
        c, def_lines, DEF_FONT, DEF_SIZE, DEF_LEADING,
        x_center=center_x,
        y_center=mid_y + 0.12*h
    )

    # comment below
    draw_centered_block(
        c, com_lines, COMMENT_FONT, COMMENT_SIZE, COMMENT_LEADING,
        x_center=center_x,
        y_center=mid_y - 0.22*h
    )


In [6]:
def make_flashcards_single_pdf(items, term_tag: str, defn_tag: str, out_pdf: Path):
    """
    Creates one PDF ordered for duplex printing:
      page 1 = fronts (page 1)
      page 2 = backs  (page 1)
      page 3 = fronts (page 2)
      page 4 = backs  (page 2)
      ...
    Print double sided and select "flip on long edge" when printing.
    """
    c = canvas.Canvas(str(out_pdf), pagesize=PAGESIZE)
    total_pages = math.ceil(len(items) / CARDS_PER_PAGE)

    for page_idx in range(total_pages):
        page_items = items[
            page_idx * CARDS_PER_PAGE : (page_idx + 1) * CARDS_PER_PAGE
        ]

        # ---------- FRONT PAGE ----------
        draw_cut_lines(c)
        for i, item in enumerate(page_items):
            row = i // COLS
            col = i % COLS
            draw_card_front(c, item, col, row, term_tag)
        c.showPage()

        # ---------- BACK PAGE (mirrored columns) ----------
        draw_cut_lines(c)
        for i, item in enumerate(page_items):
            row = i // COLS
            col = i % COLS
            mirrored_col = (COLS - 1) - col
            draw_card_back(c, item, mirrored_col, row, defn_tag)
        c.showPage()

    c.save()


In [7]:
def build_flashcards_from_vocab_json(vocab_path, out_dir="flashcards", mark_printed=False):
    """
    Given a vocab JSON file path (<parents>/<name>.json),
    write a flashcards pdf to <out_dir>/<name>.pdf
    """
    vocab_path = Path(vocab_path)

    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    out_pdf = out_dir / f"{vocab_path.stem}.pdf"

    with vocab_path.open("r", encoding="utf-8") as f:
        obj = json.load(f)

    # ASSUMED STRUCTURE
    term_tag = obj["term_tag"]
    defn_tag = obj["defn_tag"]
    vocab = obj["data"]

    # Only render unprinted cards
    items = [x for x in vocab if not x.get("printed", False)]

    make_flashcards_single_pdf(
        items=items,
        term_tag=term_tag,
        defn_tag=defn_tag,
        out_pdf=out_pdf,
    )

    # ---- mark_printed support ----
    if mark_printed:
        printed_terms = {normalize_ws(x.get("term", "")) for x in items}
        updated_vocab = []

        for x in vocab:
            x2 = dict(x)
            if normalize_ws(x2.get("term", "")) in printed_terms:
                x2["printed"] = True
            updated_vocab.append(x2)

        out_json = {
            "term_tag": term_tag,
            "defn_tag": defn_tag,
            "data": updated_vocab,
        }

        printed_path = out_dir / f"{vocab_path.stem}_printed.json"
        with printed_path.open("w", encoding="utf-8") as f:
            json.dump(out_json, f, ensure_ascii=False, indent=2)

    return {
        "vocab_path": str(vocab_path),
        "output_path": str(out_pdf),
        "total_cards_in_vocab": len(vocab),
        "total_cards_rendered": len(items),
        "term_tag": term_tag,
        "defn_tag": defn_tag,
    }

# Flashcard PDF Generation

In [8]:
result = build_flashcards_from_vocab_json("vocab/verbs.json", mark_printed=False)
result

{'vocab_path': 'vocab/verbs.json',
 'output_path': 'flashcards/verbs.pdf',
 'total_cards_in_vocab': 205,
 'total_cards_rendered': 205,
 'term_tag': 'Verb',
 'defn_tag': 'Verbo'}