Skip to content

Commit

Permalink
multipart args can now include binary data
Browse files Browse the repository at this point in the history
http://arclanguage.org/item?id=16400
Multipart parsing now *super* slow.
  • Loading branch information
akkartik committed Jun 6, 2012
1 parent 6e1c721 commit b3d97ff
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 37 deletions.
94 changes: 59 additions & 35 deletions lib/srv.arc
Expand Up @@ -245,59 +245,83 @@ Connection: close"))
(def handle-post (in out op args clen cooks ctype ip)
(if (no clen)
(respond-err out "Post request without Content-Length.")
(let body (string:readchars clen in)
(respond out op (+ args
(if (~begins downcase.ctype "multipart/form-data")
parseargs.body
(parse-multipart-args multipart-boundary.ctype body)))
cooks clen ctype in ip))))
(respond out op (+ args
(if (~begins downcase.ctype "multipart/form-data")
(parseargs:string:readchars clen in)
(parse-multipart-args multipart-boundary.ctype in)))
cooks clen ctype in ip)))

(def multipart-boundary(s)
(let delim "boundary="
(+ "--" (cut s (+ (findsubseq delim s)
len.delim)))))

(def parse-multipart-args(boundary body)
(let indices (find-all boundary body)
(accum yield
(each (index new-index) (zip indices cdr.indices)
(yield:parse-multipart-part body
(+ index len.boundary)
new-index)))))

; a multipart boundary ends at start and starts at end
(def parse-multipart-part(body start end)
(= start (+ start 2)) ; skip first CRLF
(= end (- end 2)) ; lose the final CRLF before next boundary
; Require a name header.
(def parse-multipart-args(boundary in)
(scan-past boundary in) ; skip prelude
(accum yield
(until (multipart-end boundary in)
(withs (headers scan-headers.in
; only place that may contain multi-byte chars
body (scan-body boundary in))
(when headers
(yield:parse-multipart-part headers body))))))

; "The final boundary is followed by two more hyphens to indicate that no
; further parts follow."
; -- http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html
; We've already read the boundary itself.
(def multipart-end(boundary in)
(aif peekc.in
(and (is #\- it) readc.in
(or (is #\- peekc.in)
; "one #\- shalt thou not count,
; excepting that thou then proceed to two"
(ero "malformed multipart input; boundary followed by just one '-'. Is it the final part or isn't it?")))
(ero "malformed multipart input; request didn't have a final boundary")))

(def scan-headers(in)
; "The boundary must be followed immediately either by another CRLF and the
; header fields for the next part, or by two CRLFs, in which case there are no
; header fields for the next part.."
; -- http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html
(whenlet headers (parse-mime-header:until-2-crlfs body start)
(list (unstring:alref headers "name")
(w/table result
(= (result "contents") (past-2-crlfs body start end))
(parse-mime-header:bytes-string:scan-past "\r\n\r\n" in))

(def scan-body(boundary in)
; "The CRLF preceding the encapsulation line is considered part of the
; boundary.."
; -- http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html
(scan-past (+ "\r\n" boundary) in))

(def parse-multipart-part(headers body)
(awhen (and headers (alref headers "name"))
(list unstring.it
(w/table multipart-arg
(= (multipart-arg "contents") body)
(each (property val) headers
(if (~iso "name" property)
(= result.property val)))))))
(unless (iso "name" property)
(= multipart-arg.property val)))))))

; parse lines of the form a=b; c=d; e=f; ..
; segments without '=' are passed through as single-elem lists
(def parse-mime-header(line)
(map [tokens _ #\=]
(tokens downcase.line (orf whitec (testify #\;)))))

(def find-all(pat seq (o index 0))
(whenlet new-index (findsubseq pat seq index)
(cons new-index (find-all pat seq (+ 1 new-index)))))

(def until-2-crlfs(s (o n 0))
(cut s n (posmatch "\r\n\r\n" s n)))

(def past-2-crlfs(s n (o end))
(cut s (+ 4 (posmatch "\r\n\r\n" s n))
end))
; return list of bytes until pat is encountered
; pat is read from input but dropped from result
; all chars in pat must be 1-byte
(def scan-past(pat in)
(= pat (map int (rev:coerce pat 'cons)))
(let buffer nil
(until (iso pat (firstn len.pat buffer))
(push readb.in buffer))
(rev:nthcdr len.pat buffer)))

; convert list of bytes to string
(def bytes-string(l)
(coerce (map [coerce _ 'char]
l)
'string))

; "\"abc\"" => "abc"
(def unstring(s)
Expand Down
4 changes: 2 additions & 2 deletions lib/srv.arc.t
Expand Up @@ -8,5 +8,5 @@
;; 33
;; -----------------------------57651155441074198547161975--
(test-iso "parse-multipart-args works"
(parse-multipart-args "--abc" "\r\n--abc\r\nContent-Disposition: form-data; name=\"a\"\r\n\r\n34\r\n--abc\r\nContent-Disposition: form-data; name=\"b\"\r\n\r\n209\r\n--abc--\r\n")
`(("a" ,(obj "contents" "34")) ("b" ,(obj "contents" "209"))))
(parse-multipart-args "--abc" (instring "\r\n--abc\r\nContent-Disposition: form-data; name=\"a\"\r\n\r\n34\r\n--abc\r\nContent-Disposition: form-data; name=\"b\"\r\n\r\n209\r\n--abc--\r\n"))
`(("a" ,(obj "contents" (map int '(#\3 #\4)))) ("b" ,(obj "contents" (map int '(#\2 #\0 #\9))))))

0 comments on commit b3d97ff

Please sign in to comment.