<a href="https://colab.research.google.com/github/FredJones4/671_controlbook_D/blob/master/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# How to Make Flashcards of Your Ward

1. Click the "Run All" Button above, or just click “▶︎ Run” on the code below.

2. Down below all the lines of code, a "Choose File" button will appear. Click it, find & click the pdf of your ward directory (see [this link](https://github.com/code-for-neighborhood-needs/make_name_flashcards) for instructions), and click "Open".

3. When prompted, write down the name you would like for your [Anki](https://apps.ankiweb.net/) flashcards deck when it is uploaded to your [Anki](https://apps.ankiweb.net/) software.

4. A zip file with all of the information will download

In [None]:
# ─── 1. Install dependencies ───────────────────────────────────────────────────
!pip install pymupdf pandas matplotlib pillow genanki

# ─── 2. Upload your Ward Directory PDF ─────────────────────────────────────────
# (Click “▶︎ Run” then click the “Choose Files” button that appears below.)
from google.colab import files
import os

uploaded = files.upload()  # ← click “Choose Files” here
pdf_path = next(iter(uploaded.keys()))
print(f"✅ Uploaded: {pdf_path}")

# Derive a base name (PDF name without extension) for all outputs
base, _ = os.path.splitext(pdf_path)


# ─── 3. Import libraries & define pipeline functions ───────────────────────────
import re, random, zipfile, csv
import fitz  # PyMuPDF
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from PIL import Image
import genanki

def extract_cards(input_pdf: str, img_dir: str):
    os.makedirs(img_dir, exist_ok=True)
    NAME_RE = re.compile(r"^([A-Za-z][A-Za-z'\- ]+,\s+[A-Za-z][A-Za-z'\- ]+)$")
    PREF_RE = re.compile(r"^(?:Preferred:\s*)?([A-Za-z][A-Za-z'\- ]+)$")
    doc = fitz.open(input_pdf)
    rows = []
    for page in doc:
        page_dict = page.get_text("dict")
        names = []
        for block in page_dict.get("blocks", []):
            for line in block.get("lines", []):
                text = "".join(span.get("text","") for span in line.get("spans",[])).strip()
                if not text or "Ward Directory" in text or (len(text)==1 and text.isupper()):
                    continue
                m_full = NAME_RE.match(text)
                m_pref = PREF_RE.match(text)
                if m_full:
                    entry = {"full": m_full.group(1), "pref": None,
                             "y": line["bbox"][1], "matched": False,
                             "row_idx": len(rows)}
                    rows.append({"full_name": entry["full"], "pref_name": None, "img_file": ""})
                    names.append(entry)
                elif m_pref and names:
                    names[-1]["pref"] = m_pref.group(1)
                    rows[names[-1]["row_idx"]]["pref_name"] = m_pref.group(1)
        seen = set()
        for img_idx, img in enumerate(page.get_images(full=True), start=1):
            xref = img[0]
            if xref in seen: continue
            seen.add(xref)
            pix = fitz.Pixmap(doc, xref)
            if pix.n >= 5:
                pix = None
                continue
            img_name = f"{page.number+1:03d}_{img_idx}.png"
            img_path = os.path.join(img_dir, img_name)
            pix.save(img_path); pix = None
            rects = page.get_image_rects(xref)
            if not rects: continue
            y_img = rects[0].y0
            candidates = [n for n in names if not n["matched"]]
            if not candidates: continue
            closest = min(candidates, key=lambda n: abs(n["y"]-y_img))
            closest["matched"] = True
            rows[closest["row_idx"]]["img_file"] = img_path
    doc.close()
    return rows

def write_tsv(rows, tsv_path):
    with open(tsv_path, "w", newline="", encoding="utf8") as f:
        w = csv.writer(f, delimiter="\t")
        w.writerow(["Front","Back"])
        for r in rows:
            front = f'<img src="{r["img_file"]}">' if r["img_file"] else ""
            last, first = r["full_name"].split(", ")
            first = r["pref_name"] or first
            w.writerow([front, f"{first} {last}"])

def make_pdf(tsv_path, pdf_out):
    df = pd.read_csv(tsv_path, sep="\t")
    df["ImagePath"] = df["Front"].str.extract(r'<img src="([^"]+)"')
    df["FullName"] = df["Back"]
    df = df.dropna(subset=["ImagePath","FullName"]).reset_index(drop=True)
    with PdfPages(pdf_out) as pdf:
        for _, row in df.iterrows():
            if not os.path.isfile(row["ImagePath"]): continue
            fig, (ax1,ax2) = plt.subplots(1,2,figsize=(8.5,5.5))
            ax1.axis("off"); ax2.axis("off")
            ax1.imshow(Image.open(row["ImagePath"]))
            ax2.text(0.5,0.5,row["FullName"],
                     ha="center",va="center",fontsize=24,weight="bold",
                     transform=ax2.transAxes)
            pdf.savefig(fig, bbox_inches="tight")
            plt.close(fig)

def strip_img_prefix(html):
    return re.sub(r'<img\s+src="([^"]+)"\s*>',
                  lambda m: f'<img src="{os.path.basename(m.group(1))}">', html)

def build_apkg(tsv_path, img_dir, apkg_out, deck_name):
    deck_id = random.getrandbits(63)
    model_id = random.getrandbits(63)
    deck = genanki.Deck(deck_id, deck_name)
    model = genanki.Model(
        model_id,
        "Simple Model",
        fields=[{"name":"Front"},{"name":"Back"}],
        templates=[{
            "name":"Card 1",
            "qfmt":"{{Front}}",
            "afmt":'{{Front}}<hr id="answer">{{Back}}',
        }],
    )
    with open(tsv_path, encoding="utf-8") as f:
        reader = csv.reader(f, delimiter="\t"); next(reader,None)
        for front, back in reader:
            deck.add_note(genanki.Note(
                model=model,
                fields=[strip_img_prefix(front), strip_img_prefix(back)]
            ))
    pkg = genanki.Package(deck)
    pkg.media_files = [
        os.path.join(img_dir,fn)
        for fn in os.listdir(img_dir)
        if fn.lower().endswith((".png",".jpg",".jpeg"))
    ]
    pkg.write_to_file(apkg_out)

def bundle_all(zip_out, tsv_path, pdf_out, img_dir, apkg_out):
    with zipfile.ZipFile(zip_out,"w",zipfile.ZIP_DEFLATED) as zf:
        zf.write(tsv_path, os.path.basename(tsv_path))
        zf.write(pdf_out, os.path.basename(pdf_out))
        zf.write(apkg_out, os.path.basename(apkg_out))
        for root,_,files in os.walk(img_dir):
            for fn in files:
                full = os.path.join(root,fn)
                arc = os.path.relpath(full, start=os.path.dirname(img_dir))
                zf.write(full, arc)

# ─── 4. Configure & run ─────────────────────────────────────────────────────────
input_pdf  = pdf_filename
img_dir    = f"{base}_images"
tsv_path   = f"{base}.tsv"
pdf_out    = f"{base}.pdf"
apkg_out   = f"{base}.apkg"
zip_out    = f"{base}_bundle.zip"
deck_name  = f"{base.replace('_',' ').title()} Flashcards"

rows = extract_cards(input_pdf, img_dir)
write_tsv(rows, tsv_path)
print(f"• TSV → {tsv_path} ({len(rows)} cards)")

make_pdf(tsv_path, pdf_out)
print(f"• PDF → {pdf_out}")

build_apkg(tsv_path, img_dir, apkg_out, deck_name)
print(f"• APKG → {apkg_out}")

bundle_all(zip_out, tsv_path, pdf_out, img_dir, apkg_out)
print(f"• ZIP → {zip_out}")

# ─── 5. Download your outputs ───────────────────────────────────────────────────
# files.download(tsv_path)
# files.download(pdf_out)
# files.download(apkg_out)
files.download(zip_out)




Saving ward_directory.pdf to ward_directory (3).pdf
✅ Uploaded: ward_directory (3).pdf
