Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
josch committed Jun 21, 2014
0 parents commit ce1a1bf
Showing 1 changed file with 58 additions and 0 deletions.
58 changes: 58 additions & 0 deletions pdfrw-tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env python


import sys
import os
import zlib
import Image
import StringIO

from pdfrw import PdfReader, PdfDict, PdfArray, PdfName, PdfWriter

def process_image(image):
#if image.get("/Mask"):
# del(image["/Mask"])
#if image.get("/SMask"):
# del(image["/SMask"])
#if image.get("/ImageMask"):
# del(image["/ImageMask"])
# image["/Width"] = 1
# image["/Height"] = 1
# image["/Filter"] = PdfName("FlateDecode")
# imgdata = Image.open("empty.jpg")
# image.stream = zlib.compress(imgdata.tostring())
#print image
if image["/Filter"] == PdfName("FlateDecode"):
pass
elif image["/Filter"] == PdfName("DCTDecode"):
im = Image.open(StringIO.StringIO(image.stream))
outf = StringIO.StringIO()
im.save(outf, "JPEG", quality=45)
image.stream = outf.getvalue()
outf.close()
#image["/Filter"] = PdfName("FlateDecode")
#image.stream = zlib.compress(im.tostring())

def find_images(obj, visited=set()):
if not isinstance(obj, (PdfDict, PdfArray)):
return

# Don't get stuck in an infinite loop
myid = id(obj)
if myid in visited:
return
visited.add(myid)

if isinstance(obj, PdfDict):
if obj.Type == PdfName.XObject and obj.Subtype == PdfName.Image:
process_image(obj)
obj = obj.itervalues()

for item in obj:
find_images(item, visited)

if __name__ == '__main__':
inpfn,outfn = sys.argv[1:]
reader = PdfReader(inpfn)
find_images(reader)
PdfWriter().addpages(reader.pages).write(outfn)

0 comments on commit ce1a1bf

Please sign in to comment.