Permalink
Browse files

Can now read and write metadata

 - Reading metadata occurs on files that already exist
 - Writing metadata can only occur on PDDocuments that are in progress
 - The tests are fairly simple, but it may be worth building out PDFs
   separately with another app/tool instead of using the same tool-chain
   to test with
  • Loading branch information...
1 parent 116231f commit a6531a09cf46bf751fb61d5a7eefb8456767f054 @KushalP committed Apr 9, 2012
Showing with 72 additions and 10 deletions.
  1. +6 −10 src/camelot/core.clj
  2. +28 −0 src/camelot/metadata.clj
  3. +38 −0 test/camelot/test/metadata.clj
View
@@ -4,7 +4,8 @@
(org.apache.pdfbox.pdmodel.edit PDPageContentStream)
(org.apache.pdfbox.pdmodel.font PDType1Font)
(org.apache.pdfbox.util PDFMergerUtility)
- (java.io FileInputStream)))
+ (java.io FileInputStream))
+ (:use [camelot.metadata]))
(defonce font-map
{"Times-Roman" PDType1Font/TIMES_ROMAN
@@ -32,9 +33,9 @@
[doc-map filename]
{:pre [(and (map? doc-map)
(string? filename))]}
- (let [page (PDPage.)
- doc (doto (PDDocument.)
- (.addPage page))
+ (let [page (PDPage.)
+ doc (doto (PDDocument.)
+ (.addPage page))
content (PDPageContentStream. doc page)]
(try
(.beginText content)
@@ -44,12 +45,7 @@
(.endText content)
(.close content)
(when (contains? doc-map :metadata)
- (let [meta (PDDocumentInformation.)
- data (doc-map :metadata)]
- (.setAuthor meta (data :author))
- (.setKeywords meta (str/join ", " (data :keywords)))
- (.setTitle meta (data :title))
- (.setDocumentInformation doc meta)))
+ (set-metadata doc (doc-map :metadata)))
(.save doc filename)
(finally (if (not (nil? doc))
(.close doc))))
View
@@ -0,0 +1,28 @@
+(ns camelot.metadata
+ (:import (org.apache.pdfbox.pdmodel PDDocument PDDocumentInformation)
+ (java.io FileInputStream))
+ (:require [clojure.string :as str]))
+
+(defn get-metadata
+ "Given the location of a PDF file, provides the metadata it holds"
+ [filename]
+ (let [doc (PDDocument/load (FileInputStream. filename))
+ info (.getDocumentInformation doc)]
+ (try
+ {:author (.getAuthor info)
+ :title (.getTitle info)
+ :keywords (str/split (.getKeywords info) #", ")
+ :producer (.getProducer info)
+ :trapped (.getTrapped info)
+ :creator (.getCreator info)}
+ (finally (if (not (nil? doc))
+ (.close doc))))))
+
+(defn set-metadata
+ "Sets metadata to the provided PDDocument"
+ [^PDDocument document data]
+ (let [meta (PDDocumentInformation.)]
+ (.setAuthor meta (data :author))
+ (.setKeywords meta (str/join ", " (data :keywords)))
+ (.setTitle meta (data :title))
+ (.setDocumentInformation document meta)))
@@ -0,0 +1,38 @@
+(ns camelot.test.metadata
+ (:use [camelot.core]
+ [camelot.metadata]
+ [camelot.test.helpers])
+ (:use [clojure.test])
+ (:import (org.apache.pdfbox.pdmodel PDDocument)))
+
+(deftest get-metadata-produces-map
+ (let [filename (temp-pdf-filename)
+ doc (-> {:font "Helvetica-Bold"
+ :size 12
+ :text "Hello World"
+ :metadata {:author "Joe Bloggs"
+ :title "Hello World"
+ :keywords ["test" "hello" "world"]}}
+ (save-as filename))]
+ (is (= (get-metadata filename)
+ {:author "Joe Bloggs"
+ :title "Hello World"
+ :keywords ["test" "hello" "world"]
+ :producer nil
+ :trapped nil
+ :creator nil}))))
+
+(deftest set-metadata-saves-metadata-to-pddocument
+ (let [metadata {:author "Joe Bloggs"
+ :title "Hello World"
+ :keywords ["test" "hello" "world"]}
+ doc (PDDocument.)
+ info (.getDocumentInformation doc)]
+ (is (not (= "Joe Bloggs" (.getAuthor info))))
+ (is (not (= "Hello World" (.getTitle info))))
+ (is (not (= "test, hello, world" (.getKeywords info))))
+ (set-metadata doc metadata)
+ (let [info (.getDocumentInformation doc)]
+ (is (= "Joe Bloggs" (.getAuthor info)))
+ (is (= "Hello World" (.getTitle info)))
+ (is (= "test, hello, world" (.getKeywords info))))))

0 comments on commit a6531a0

Please sign in to comment.