From b3d97ffc9150da8593b0e845e1e5b680414f52d3 Mon Sep 17 00:00:00 2001 From: "Kartik K. Agaram" Date: Tue, 5 Jun 2012 18:37:23 -0700 Subject: [PATCH] multipart args can now include binary data http://arclanguage.org/item?id=16400 Multipart parsing now *super* slow. --- lib/srv.arc | 94 ++++++++++++++++++++++++++++++++------------------- lib/srv.arc.t | 4 +-- 2 files changed, 61 insertions(+), 37 deletions(-) diff --git a/lib/srv.arc b/lib/srv.arc index a6eb4f72e..8a95088d4 100644 --- a/lib/srv.arc +++ b/lib/srv.arc @@ -245,42 +245,61 @@ Connection: close")) (def handle-post (in out op args clen cooks ctype ip) (if (no clen) (respond-err out "Post request without Content-Length.") - (let body (string:readchars clen in) - (respond out op (+ args - (if (~begins downcase.ctype "multipart/form-data") - parseargs.body - (parse-multipart-args multipart-boundary.ctype body))) - cooks clen ctype in ip)))) + (respond out op (+ args + (if (~begins downcase.ctype "multipart/form-data") + (parseargs:string:readchars clen in) + (parse-multipart-args multipart-boundary.ctype in))) + cooks clen ctype in ip))) (def multipart-boundary(s) (let delim "boundary=" (+ "--" (cut s (+ (findsubseq delim s) len.delim))))) -(def parse-multipart-args(boundary body) - (let indices (find-all boundary body) - (accum yield - (each (index new-index) (zip indices cdr.indices) - (yield:parse-multipart-part body - (+ index len.boundary) - new-index))))) - -; a multipart boundary ends at start and starts at end -(def parse-multipart-part(body start end) - (= start (+ start 2)) ; skip first CRLF - (= end (- end 2)) ; lose the final CRLF before next boundary - ; Require a name header. +(def parse-multipart-args(boundary in) + (scan-past boundary in) ; skip prelude + (accum yield + (until (multipart-end boundary in) + (withs (headers scan-headers.in + ; only place that may contain multi-byte chars + body (scan-body boundary in)) + (when headers + (yield:parse-multipart-part headers body)))))) + +; "The final boundary is followed by two more hyphens to indicate that no +; further parts follow." +; -- http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html +; We've already read the boundary itself. +(def multipart-end(boundary in) + (aif peekc.in + (and (is #\- it) readc.in + (or (is #\- peekc.in) + ; "one #\- shalt thou not count, + ; excepting that thou then proceed to two" + (ero "malformed multipart input; boundary followed by just one '-'. Is it the final part or isn't it?"))) + (ero "malformed multipart input; request didn't have a final boundary"))) + +(def scan-headers(in) ; "The boundary must be followed immediately either by another CRLF and the ; header fields for the next part, or by two CRLFs, in which case there are no ; header fields for the next part.." ; -- http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html - (whenlet headers (parse-mime-header:until-2-crlfs body start) - (list (unstring:alref headers "name") - (w/table result - (= (result "contents") (past-2-crlfs body start end)) + (parse-mime-header:bytes-string:scan-past "\r\n\r\n" in)) + +(def scan-body(boundary in) + ; "The CRLF preceding the encapsulation line is considered part of the + ; boundary.." + ; -- http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html + (scan-past (+ "\r\n" boundary) in)) + +(def parse-multipart-part(headers body) + (awhen (and headers (alref headers "name")) + (list unstring.it + (w/table multipart-arg + (= (multipart-arg "contents") body) (each (property val) headers - (if (~iso "name" property) - (= result.property val))))))) + (unless (iso "name" property) + (= multipart-arg.property val))))))) ; parse lines of the form a=b; c=d; e=f; .. ; segments without '=' are passed through as single-elem lists @@ -288,16 +307,21 @@ Connection: close")) (map [tokens _ #\=] (tokens downcase.line (orf whitec (testify #\;))))) -(def find-all(pat seq (o index 0)) - (whenlet new-index (findsubseq pat seq index) - (cons new-index (find-all pat seq (+ 1 new-index))))) - -(def until-2-crlfs(s (o n 0)) - (cut s n (posmatch "\r\n\r\n" s n))) - -(def past-2-crlfs(s n (o end)) - (cut s (+ 4 (posmatch "\r\n\r\n" s n)) - end)) +; return list of bytes until pat is encountered +; pat is read from input but dropped from result +; all chars in pat must be 1-byte +(def scan-past(pat in) + (= pat (map int (rev:coerce pat 'cons))) + (let buffer nil + (until (iso pat (firstn len.pat buffer)) + (push readb.in buffer)) + (rev:nthcdr len.pat buffer))) + +; convert list of bytes to string +(def bytes-string(l) + (coerce (map [coerce _ 'char] + l) + 'string)) ; "\"abc\"" => "abc" (def unstring(s) diff --git a/lib/srv.arc.t b/lib/srv.arc.t index ac1c007b9..9d2da0e09 100644 --- a/lib/srv.arc.t +++ b/lib/srv.arc.t @@ -8,5 +8,5 @@ ;; 33 ;; -----------------------------57651155441074198547161975-- (test-iso "parse-multipart-args works" - (parse-multipart-args "--abc" "\r\n--abc\r\nContent-Disposition: form-data; name=\"a\"\r\n\r\n34\r\n--abc\r\nContent-Disposition: form-data; name=\"b\"\r\n\r\n209\r\n--abc--\r\n") - `(("a" ,(obj "contents" "34")) ("b" ,(obj "contents" "209")))) + (parse-multipart-args "--abc" (instring "\r\n--abc\r\nContent-Disposition: form-data; name=\"a\"\r\n\r\n34\r\n--abc\r\nContent-Disposition: form-data; name=\"b\"\r\n\r\n209\r\n--abc--\r\n")) + `(("a" ,(obj "contents" (map int '(#\3 #\4)))) ("b" ,(obj "contents" (map int '(#\2 #\0 #\9))))))