In [26]:
import pdfplumber
from pdfplumber.utils.pdfinternals import resolve_and_decode, resolve
from pprint import pprint

pdf = pdfplumber.open("data/Waymo-OL316-030324-Redacted.pdf")


In [22]:
def parse_field_helper(form_data, field, prefix=None):
    """ appends any PDF AcroForm field/value pairs in `field` to provided `form_data` list

        if `field` has child fields, those will be parsed recursively.
    """
    resolved_field = field.resolve()
    field_name = '.'.join(filter(lambda x: x, [prefix, resolve_and_decode(resolved_field.get("T"))]))
    if "Kids" in resolved_field:
        for kid_field in resolved_field["Kids"]:
            parse_field_helper(form_data, kid_field, prefix=field_name)
    if "T" in resolved_field or "TU" in resolved_field:
        # "T" is a field-name, but it's sometimes absent.
        # "TU" is the "alternate field name" and is often more human-readable
        # your PDF may have one, the other, or both.
        alternate_field_name  = resolve_and_decode(resolved_field.get("TU")) if resolved_field.get("TU") else None
        field_value = resolve_and_decode(resolved_field["V"]) if 'V' in resolved_field else None
        form_data.append([field_name, alternate_field_name, field_value])

form_data = []
fields = resolve(pdf.doc.catalog["AcroForm"])["Fields"]
for field in fields:
    parse_field_helper(form_data, field)

In [27]:
pprint(form_data)

[['click to', 'click to print.', None],
 ['click to', 'click to print.', None],
 ['click to 1', 'click to clear form.', None],
 ['click to 1', 'click to clear form.', None],
 ['MANufACTuRERS NAME',
  "Section 1. Manufacturers information. Enter manufacturer's name",
  'Waymo LLC'],
 ['AVT NuMBER_2', 'Enter Ay V T  number.', None],
 ['BuSINESS NAME', 'enter BUSINESS NAME', 'Waymo LLC'],
 ['TELEPhONE NuMBER.0', 'Enter TELEPHONE NUMBER area code.', None],
 ['TELEPhONE NuMBER.1', 'Enter telephone number.', None],
 ['TELEPhONE NuMBER', None, None],
 ['DRIVERS fuLL NAME First Middle last.1', 'enter Street address.', None],
 ['DRIVERS fuLL NAME First Middle last.0',
  "Enter driver's full name. First, Middle, last.",
  None],
 ['DRIVERS fuLL NAME First Middle last', None, None],
 ['DRIVER LICENSE NuMBER.1', 'enter city.', None],
 ['DRIVER LICENSE NuMBER.0', 'enter Driver LICENSE NUMBER', None],
 ['DRIVER LICENSE NuMBER', None, None],
 ['STATE.1', 'enter STATE', None],
 ['STATE.0', 'enter STAT