Skip to content

Commit

Permalink
Make startup a lot faster
Browse files Browse the repository at this point in the history
Summary:
This greatly decreases startup time for the codebase by making the non-XHP fastpath a lot more sophisticated.

One of the biggest fastpath false positive was markup in strings. The new fastpath correctly scans past strings which means we don't have to double parse nearly as many files.

I implemented the fastpath in re2c which is the lightest weight scanner I could find. Basically the only thing it does is convert lists of regular expressions to huge state machines. Everything else you might expect from a scanner has to be implemented from scratch, which is just what I want.

I implemented just enough of PHP's grammar to know when we're looking at actual code. Then I use the same XHP clues as I had in the old scanner.

Reviewed By: dcorson

Test Plan:
Browsed around the lite site, etc.

Revert: OK

DiffCamp Revision: 69583
  • Loading branch information
Marcel Laverdet committed Oct 18, 2009
1 parent 2100aa3 commit dbaca37
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 44 deletions.
2 changes: 1 addition & 1 deletion ext.hpp
Expand Up @@ -4,7 +4,7 @@
#endif
#include "php.h"

#define PHP_XHP_VERSION "1.3.2"
#define PHP_XHP_VERSION "1.3.3"
#define PHP_XHP_EXTNAME "xhp"

extern zend_module_entry xhp_module_entry;
Expand Down
9 changes: 6 additions & 3 deletions xhp/Makefile
Expand Up @@ -11,7 +11,7 @@ endif
all: libxhp.a libxhp.so

clean:
-rm libxhp.a xhpize parser.yacc.cpp scanner.lex.cpp scanner.lex.hpp parser.yacc.output parser.yacc.hpp *.o 2>/dev/null
-rm libxhp.a xhpize parser.yacc.cpp scanner.lex.cpp scanner.lex.hpp parser.yacc.output parser.yacc.hpp fastpath.cpp *.o 2>/dev/null

parser.yacc.cpp: parser.y
bison --debug --verbose -d -o $@ $<
Expand All @@ -24,17 +24,20 @@ scanner.lex.cpp: scanner.l

scanner.lex.hpp: scanner.lex.cpp

fastpath.cpp: fastpath.re
re2c -c -o $@ $<

%.o: %.cpp
$(CXX) -c $(CPPFLAGS) -o $@ $<

xhp_preprocess.o: xhp_preprocess.cpp scanner.lex.hpp parser.yacc.hpp
parser.yacc.o: scanner.lex.hpp
scanner.lex.o: parser.yacc.hpp

libxhp.a: code_rope.o scanner.lex.o parser.yacc.o xhp_preprocess.o
libxhp.a: code_rope.o scanner.lex.o parser.yacc.o fastpath.o xhp_preprocess.o
$(AR) -crs $@ $^

libxhp.so: code_rope.o scanner.lex.o parser.yacc.o xhp_preprocess.o
libxhp.so: code_rope.o scanner.lex.o parser.yacc.o fastpath.o xhp_preprocess.o
g++ -shared -Wl,-soname,libxhp.so -o libxhp.so $^

xhpize: xhpize.cpp libxhp.a
Expand Down
4 changes: 4 additions & 0 deletions xhp/fastpath.hpp
@@ -0,0 +1,4 @@
#pragma once
#include "xhp_preprocess.hpp"

bool xhp_fastpath(const char* yy, const size_t len, const xhp_flags_t &flags);
93 changes: 93 additions & 0 deletions xhp/fastpath.re
@@ -0,0 +1,93 @@
#include "fastpath.hpp"
#include <stdio.h>
bool xhp_fastpath(const char* yy, const size_t len, const xhp_flags_t &flags) {
const char* eob = yy + len + 1;
const char* YYMARKER = NULL;
enum {
HTML,
PHP,
COMMENT_EOL,
COMMENT_BLOCK
} state = flags.eval ? PHP : HTML;

#define YYCURSOR yy
#define YYCTYPE char
#define YYGETCONDITION() state
#define YYFILL(ii) if (ii + YYCURSOR >= eob) return false

for (;;) {
/*!re2c
re2c:condenumprefix = "";
re2c:yyfill:check = 0;
NEWLINE = ('\r'|'\n'|'\r\n');
WHITESPACE = [ \n\r\t]+;
<HTML> '<?php'([ \t]|NEWLINE) {
state = PHP;
continue;
}
<HTML> '<?='|'<?' {
if (flags.short_tags) {
state = PHP;
}
continue;
}
<HTML> '<%='|'<%' {
if (flags.asp_tags) {
state = PHP;
}
continue;
}
<HTML> [^] { continue; }
<PHP> '?>'|'</script'WHITESPACE*'>' {
state = HTML;
continue;
}
<PHP> '%>' {
if (flags.asp_tags) {
state = PHP;
}
continue;
}
<PHP> 'b'?'\''('\\'.|'\\\n'|[^\\']+)*'\''|
'b'?'\"'('\\'.|'\\\n'|[^\\"]+)*'\"' { continue; }
<PHP> '#'|'//' {
state = COMMENT_EOL;
continue;
}
<PHP> '/*' {
state = COMMENT_BLOCK;
continue;
}
<PHP> '::' { continue; }
<PHP> '</'|
'/>'|
':'[a-zA-Z0-9]|
'element'|
')'WHITESPACE*'[' {
return true;
}
<PHP> [^] { continue; }
<COMMENT_EOL> NEWLINE {
state = PHP;
continue;
}
<COMMENT_EOL> '?>' {
state = HTML;
continue;
}
<COMMENT_EOL> [^] { continue; }
<COMMENT_BLOCK> [^*] { continue; }
<COMMENT_BLOCK> '*/' {
state = PHP;
continue;
}
<COMMENT_BLOCK> [^] { continue; }
*/
}
return false;
}
43 changes: 4 additions & 39 deletions xhp/xhp_preprocess.cpp
@@ -1,5 +1,6 @@
#include "xhp.hpp"
#include "xhp_preprocess.hpp"
#include "fastpath.hpp"
#include <sstream>
using namespace std;
extern int xhpdebug;
Expand All @@ -22,47 +23,11 @@ XHPResult xhp_preprocess(string &in, string &out, bool isEval, string &errDescri
return xhp_preprocess(in, out, errDescription, errLineno, flags);
}

XHPResult xhp_preprocess(std::string &in, std::string &out, std::string &errDescription, uint32_t &errLineno, xhp_flags_t &flags) {
XHPResult xhp_preprocess(std::string &in, std::string &out, std::string &errDescription, uint32_t &errLineno, const xhp_flags_t &flags) {

// Does this maybe contain XHP?
// Early bail if the code doesn't contain anything that looks like XHP
char* buffer = const_cast<char*>(in.c_str());
bool maybe_xhp = false;
for (const char* jj = buffer; *jj; ++jj) {
if (*jj == '<') { // </a>
if (jj[1] == '/') {
maybe_xhp = true;
break;
}
} else if (*jj == '/') { // <a />
if (jj[1] == '>') {
maybe_xhp = true;
break;
}
} else if (*jj == ':') { // :fb:thing
if ((jj[1] >= 'a' && jj[1] <= 'z') ||
(jj[1] >= 'A' && jj[1] <= 'Z') ||
(jj[1] >= '0' && jj[1] <= '9')) {
maybe_xhp = true;
break;
} else if (jj[1] == ':') {
++jj;
}
} else if (!memcmp(jj, "element", 7)) {
maybe_xhp = true;
break;
} else if (*jj == ')') { // foo()['etc']
do {
++jj;
} while (*jj == ' ' || *jj == '\r' || *jj == '\n' || *jj == '\t');
if (*jj == '[') {
maybe_xhp = true;
break;
}
}
}

// Early bail
if (!maybe_xhp) {
if (!xhp_fastpath(buffer, in.length(), flags)) {
return XHPDidNothing;
}

Expand Down
2 changes: 1 addition & 1 deletion xhp/xhp_preprocess.hpp
Expand Up @@ -24,4 +24,4 @@ XHPResult xhp_preprocess(std::string &in, std::string &out, bool isEval,

XHPResult xhp_preprocess(std::string &in, std::string &out,
std::string &errDescription, uint32_t &errLineno,
xhp_flags_t &flags);
const xhp_flags_t &flags);

0 comments on commit dbaca37

Please sign in to comment.