Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Improve benchmarks

  • Loading branch information...
commit fa73069aef035169061bbcbb2f313bb0c62cfbf9 1 parent 28a1526
Hongli Lai authored
2  .gitignore
View
@@ -0,0 +1,2 @@
+*.dSYM
+multipart
15 MultipartReader.h
View
@@ -32,7 +32,14 @@ class MultipartReader {
std::string currentHeaderName, currentHeaderValue;
void *userData;
- void setCallbacks() {
+ void resetReaderCallbacks() {
+ onPartBegin = NULL;
+ onPartData = NULL;
+ onPartEnd = NULL;
+ onEnd = NULL;
+ }
+
+ void setParserCallbacks() {
parser.onPartBegin = cbPartBegin;
parser.onHeaderField = cbHeaderField;
parser.onHeaderValue = cbHeaderValue;
@@ -108,11 +115,13 @@ class MultipartReader {
Callback onEnd;
MultipartReader() {
- setCallbacks();
+ resetReaderCallbacks();
+ setParserCallbacks();
}
MultipartReader(const std::string &boundary): parser(boundary) {
- setCallbacks();
+ resetReaderCallbacks();
+ setParserCallbacks();
}
void reset() {
33 Rakefile
View
@@ -4,30 +4,35 @@ file 'multipart' => ['multipart.cpp', 'MultipartParser.h', 'MultipartReader.h']
sh 'g++ -Wall -g multipart.cpp -o multipart'
end
-task :generate_test_file do
+file 'random' do
+ sh "dd if=/dev/urandom of=random bs=1048576 count=100"
+end
+
+desc "Create a test multipart file"
+task :generate_test_file => 'random' do
output = ENV['OUTPUT']
size = (ENV['SIZE'] || 1024 * 1024 * 100).to_i
- boundary = ENV['BOUNDARY'] || 'abcd'
+ boundary = ENV['BOUNDARY'] || '-----------------------------168072824752491622650073'
raise 'OUTPUT must be specified' if !output
- File.open(ENV['OUTPUT'], 'wb') do |f|
+ puts "Creating #{output}"
+ File.open(output, 'wb') do |f|
f.write("--#{boundary}\r\n")
f.write("content-type: text/plain\r\n")
f.write("content-disposition: form-data; name=\"field1\"; filename=\"field1\"\r\n")
f.write("foo-bar: abc\r\n")
f.write("x: y\r\n")
f.write("\r\n")
-
- File.open("/dev/urandom", 'rb') do |r|
- written = 0
- buf = ''
- while !r.eof? && written < size
- r.read([1024, size - written].min, buf)
- f.write(buf)
- written += buf.size
- end
- end
-
+ end
+ sh "cat random >> #{output}"
+ puts "Postprocessing #{output}"
+ File.open(output, 'ab') do |f|
f.write("\r\n--#{boundary}--\r\n")
end
+end
+
+task :benchmark => 'multipart' do
+ sh "./multipart"
+ sh "ruby rack-parser.rb"
+ sh "node multipart_parser.js"
end
109 multipart.cpp
View
@@ -1,11 +1,19 @@
#include "MultipartParser.h"
#include "MultipartReader.h"
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
#include <stdio.h>
+#include <unistd.h>
//#define TEST_PARSER
-#define INPUT_FILE "input.txt"
-#define BOUNDARY "abcd"
-//#define BOUNDARY "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"
+#define INPUT_FILE "input3.txt"
+//#define BOUNDARY "abcd"
+#define BOUNDARY "-----------------------------168072824752491622650073"
+#define TIMES 10
+#define SLURP
+#define QUIET
+
using namespace std;
@@ -67,37 +75,88 @@ int
main() {
#ifdef TEST_PARSER
MultipartParser parser;
- parser.onPartBegin = onPartBegin;
- parser.onHeaderField = onHeaderField;
- parser.onHeaderValue = onHeaderValue;
- parser.onPartData = onPartData;
- parser.onPartEnd = onPartEnd;
- parser.onEnd = onEnd;
+ #ifndef QUIET
+ parser.onPartBegin = onPartBegin;
+ parser.onHeaderField = onHeaderField;
+ parser.onHeaderValue = onHeaderValue;
+ parser.onPartData = onPartData;
+ parser.onPartEnd = onPartEnd;
+ parser.onEnd = onEnd;
+ #endif
#else
MultipartReader parser;
- parser.onPartBegin = onPartBegin;
- parser.onPartData = onPartData;
- parser.onPartEnd = onPartEnd;
- parser.onEnd = onEnd;
+ #ifndef QUIET
+ parser.onPartBegin = onPartBegin;
+ parser.onPartData = onPartData;
+ parser.onPartEnd = onPartEnd;
+ parser.onEnd = onEnd;
+ #endif
#endif
- for (int i = 0; i < 5; i++) {
- printf("------------\n");
- parser.setBoundary(BOUNDARY);
+ struct timeval stime, etime;
+ struct stat sbuf;
+
+ stat(INPUT_FILE, &sbuf);
+
+ #ifdef SLURP
+ size_t bufsize = sbuf.st_size;
+ char *buf = (char *) malloc(bufsize);
FILE *f = fopen(INPUT_FILE, "rb");
- while (!parser.stopped() && !feof(f)) {
- char buf[1024 * 32];
- size_t len = fread(buf, 1, sizeof(buf), f);
+ fread(buf, 1, bufsize, f);
+ fclose(f);
+
+ gettimeofday(&stime, NULL);
+ for (int i = 0; i < TIMES; i++) {
+ #ifndef QUIET
+ printf("------------\n");
+ #endif
+ parser.setBoundary(BOUNDARY);
+
size_t fed = 0;
do {
- size_t ret = parser.feed(buf + fed, len - fed);
+ size_t ret = parser.feed(buf + fed, bufsize - fed);
fed += ret;
- //printf("accepted %d bytes\n", (int) ret);
- } while (fed < len && !parser.stopped());
+ } while (fed < bufsize && !parser.stopped());
+ #ifndef QUIET
+ printf("%s\n", parser.getErrorMessage());
+ #endif
}
- printf("%s\n", parser.getErrorMessage());
- fclose(f);
- }
+ gettimeofday(&etime, NULL);
+ #else
+ size_t bufsize = 1024 * 32;
+ char *buf = (char *) malloc(bufsize);
+
+ gettimeofday(&stime, NULL);
+ for (int i = 0; i < TIMES; i++) {
+ #ifndef QUIET
+ printf("------------\n");
+ #endif
+ parser.setBoundary(BOUNDARY);
+
+ FILE *f = fopen(INPUT_FILE, "rb");
+ while (!parser.stopped() && !feof(f)) {
+ size_t len = fread(buf, 1, bufsize, f);
+ size_t fed = 0;
+ do {
+ size_t ret = parser.feed(buf + fed, len - fed);
+ fed += ret;
+ } while (fed < len && !parser.stopped());
+ }
+ #ifndef QUIET
+ printf("%s\n", parser.getErrorMessage());
+ #endif
+ fclose(f);
+ }
+ gettimeofday(&etime, NULL);
+ #endif
+
+ unsigned long long a = (unsigned long long) stime.tv_sec * 1000000 + stime.tv_usec;
+ unsigned long long b = (unsigned long long) etime.tv_sec * 1000000 + etime.tv_usec;
+ printf("(C++) Total: %.2f s Per run: %.2f s Throughput: %.2f MB/sec\n",
+ (b - a) / 1000000.0,
+ (b - a) / TIMES / 1000000.0,
+ ((unsigned long long) sbuf.st_size * TIMES) / ((b - a) / 1000000.0) / 1024.0 / 1024.0);
+
return 0;
}
212 rack-parser.rb
View
@@ -0,0 +1,212 @@
+# encoding: binary
+require 'stringio'
+
+module Utils
+ if ''.respond_to?(:bytesize)
+ def bytesize(string)
+ string.bytesize
+ end
+ else
+ def bytesize(string)
+ string.size
+ end
+ end
+ module_function :bytesize
+
+ # Unescapes a URI escaped string. (Stolen from Camping).
+ def unescape(s)
+ s.tr('+', ' ').gsub(/((?:%[0-9a-fA-F]{2})+)/n){
+ [$1.delete('%')].pack('H*')
+ }
+ end
+ module_function :unescape
+
+ def normalize_params(params, name, v = nil)
+ name =~ %r(\A[\[\]]*([^\[\]]+)\]*)
+ k = $1 || ''
+ after = $' || ''
+
+ return if k.empty?
+
+ if after == ""
+ params[k] = v
+ elsif after == "[]"
+ params[k] ||= []
+ raise TypeError, "expected Array (got #{params[k].class.name}) for param `#{k}'" unless params[k].is_a?(Array)
+ params[k] << v
+ elsif after =~ %r(^\[\]\[([^\[\]]+)\]$) || after =~ %r(^\[\](.+)$)
+ child_key = $1
+ params[k] ||= []
+ raise TypeError, "expected Array (got #{params[k].class.name}) for param `#{k}'" unless params[k].is_a?(Array)
+ if params[k].last.is_a?(Hash) && !params[k].last.key?(child_key)
+ normalize_params(params[k].last, child_key, v)
+ else
+ params[k] << normalize_params({}, child_key, v)
+ end
+ else
+ params[k] ||= {}
+ raise TypeError, "expected Hash (got #{params[k].class.name}) for param `#{k}'" unless params[k].is_a?(Hash)
+ params[k] = normalize_params(params[k], after, v)
+ end
+
+ return params
+ end
+ module_function :normalize_params
+end
+
+module Multipart
+ Tempfile = StringIO
+
+ EOL = "\r\n"
+ MULTIPART_BOUNDARY = "AaB03x"
+
+ def self.parse_multipart(env)
+ unless env['CONTENT_TYPE'] =~
+ %r|\Amultipart/.*boundary=\"?([^\";,]+)\"?|n
+ nil
+ else
+ boundary = "--#{$1}"
+
+ params = {}
+ buf = ""
+ content_length = env['CONTENT_LENGTH'].to_i
+ input = env['rack.input']
+ input.rewind
+
+ boundary_size = Utils.bytesize(boundary) + EOL.size
+ bufsize = 16384
+
+ content_length -= boundary_size
+
+ read_buffer = ''
+
+ status = input.read(boundary_size, read_buffer)
+ raise EOFError, "bad content body" unless status == boundary + EOL
+
+ rx = /(?:#{EOL})?#{Regexp.quote boundary}(#{EOL}|--)/n
+
+ loop {
+ head = nil
+ body = ''
+ filename = content_type = name = nil
+
+ until head && buf =~ rx
+ if !head && i = buf.index(EOL+EOL)
+ head = buf.slice!(0, i+2) # First \r\n
+ buf.slice!(0, 2) # Second \r\n
+
+ token = /[^\s()<>,;:\\"\/\[\]?=]+/
+ condisp = /Content-Disposition:\s*#{token}\s*/i
+ dispparm = /;\s*(#{token})=("(?:\\"|[^"])*"|#{token})*/
+
+ rfc2183 = /^#{condisp}(#{dispparm})+$/i
+ broken_quoted = /^#{condisp}.*;\sfilename="(.*?)"(?:\s*$|\s*;\s*#{token}=)/i
+ broken_unquoted = /^#{condisp}.*;\sfilename=(#{token})/i
+
+ if head =~ rfc2183
+ filename = Hash[head.scan(dispparm)]['filename']
+ filename = $1 if filename and filename =~ /^"(.*)"$/
+ elsif head =~ broken_quoted
+ filename = $1
+ elsif head =~ broken_unquoted
+ filename = $1
+ end
+
+ if filename && filename !~ /\\[^\\"]/
+ filename = Utils.unescape(filename).gsub(/\\(.)/, '\1')
+ end
+
+ content_type = head[/Content-Type: (.*)#{EOL}/ni, 1]
+ name = head[/Content-Disposition:.*\s+name="?([^\";]*)"?/ni, 1] || head[/Content-ID:\s*([^#{EOL}]*)/ni, 1]
+
+ if filename
+ body = Tempfile.new("RackMultipart")
+ body.binmode if body.respond_to?(:binmode)
+ end
+
+ next
+ end
+
+ # Save the read body part.
+ if head && (boundary_size+4 < buf.size)
+ body << buf.slice!(0, buf.size - (boundary_size+4))
+ end
+
+ c = input.read(bufsize < content_length ? bufsize : content_length, read_buffer)
+ raise EOFError, "bad content body" if c.nil? || c.empty?
+ buf << c
+ content_length -= c.size
+ end
+
+ # Save the rest.
+ if i = buf.index(rx)
+ body << buf.slice!(0, i)
+ buf.slice!(0, boundary_size+2)
+
+ content_length = -1 if $1 == "--"
+ end
+
+ if filename == ""
+ # filename is blank which means no file has been selected
+ data = nil
+ elsif filename
+ body.rewind
+
+ # Take the basename of the upload's original filename.
+ # This handles the full Windows paths given by Internet Explorer
+ # (and perhaps other broken user agents) without affecting
+ # those which give the lone filename.
+ filename = filename.split(/[\/\\]/).last
+
+ data = {:filename => filename, :type => content_type,
+ :name => name, :tempfile => body, :head => head}
+ elsif !filename && content_type
+ body.rewind
+
+ # Generic multipart cases, not coming from a form
+ data = {:type => content_type,
+ :name => name, :tempfile => body, :head => head}
+ else
+ data = body
+ end
+
+ Utils.normalize_params(params, name, data) unless data.nil?
+
+ # break if we're at the end of a buffer, but not if it is the end of a field
+ break if (buf.empty? && $1 != EOL) || content_length == -1
+ }
+
+ input.rewind
+
+ params
+ end
+ end
+end
+
+require 'benchmark'
+
+FILENAME = 'input3.txt'
+#BOUNDARY = "abcd"
+BOUNDARY = "-----------------------------168072824752491622650073"
+TIMES = 10
+SLURP = true
+
+if SLURP
+ io = StringIO.new(File.read(FILENAME))
+else
+ io = File.open(FILENAME, 'rb')
+end
+env = {
+ 'CONTENT_LENGTH' => File.size(FILENAME),
+ 'CONTENT_TYPE' => "multipart/form-data; boundary=#{BOUNDARY}",
+ 'rack.input' => io
+}
+result = Benchmark.measure do
+ TIMES.times do
+ Multipart.parse_multipart(env)
+ end
+end
+printf "(Ruby) Total: %.2f s Per run: %.2f s Throughput: %.2f MB/sec\n",
+ result.total,
+ result.total / TIMES,
+ (File.size(FILENAME) * TIMES) / result.total / 1024.0 / 1024.0
Please sign in to comment.
Something went wrong with that request. Please try again.