forked from hhvm/xhp-lib
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Summary: This greatly decreases startup time for the codebase by making the non-XHP fastpath a lot more sophisticated. One of the biggest fastpath false positive was markup in strings. The new fastpath correctly scans past strings which means we don't have to double parse nearly as many files. I implemented the fastpath in re2c which is the lightest weight scanner I could find. Basically the only thing it does is convert lists of regular expressions to huge state machines. Everything else you might expect from a scanner has to be implemented from scratch, which is just what I want. I implemented just enough of PHP's grammar to know when we're looking at actual code. Then I use the same XHP clues as I had in the old scanner. Reviewed By: dcorson Test Plan: Browsed around the lite site, etc. Revert: OK DiffCamp Revision: 69583
- Loading branch information
Marcel Laverdet
committed
Oct 18, 2009
1 parent
2100aa3
commit dbaca37
Showing
6 changed files
with
109 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#pragma once | ||
#include "xhp_preprocess.hpp" | ||
|
||
bool xhp_fastpath(const char* yy, const size_t len, const xhp_flags_t &flags); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
#include "fastpath.hpp" | ||
#include <stdio.h> | ||
bool xhp_fastpath(const char* yy, const size_t len, const xhp_flags_t &flags) { | ||
const char* eob = yy + len + 1; | ||
const char* YYMARKER = NULL; | ||
enum { | ||
HTML, | ||
PHP, | ||
COMMENT_EOL, | ||
COMMENT_BLOCK | ||
} state = flags.eval ? PHP : HTML; | ||
|
||
#define YYCURSOR yy | ||
#define YYCTYPE char | ||
#define YYGETCONDITION() state | ||
#define YYFILL(ii) if (ii + YYCURSOR >= eob) return false | ||
|
||
for (;;) { | ||
/*!re2c | ||
re2c:condenumprefix = ""; | ||
re2c:yyfill:check = 0; | ||
NEWLINE = ('\r'|'\n'|'\r\n'); | ||
WHITESPACE = [ \n\r\t]+; | ||
<HTML> '<?php'([ \t]|NEWLINE) { | ||
state = PHP; | ||
continue; | ||
} | ||
<HTML> '<?='|'<?' { | ||
if (flags.short_tags) { | ||
state = PHP; | ||
} | ||
continue; | ||
} | ||
<HTML> '<%='|'<%' { | ||
if (flags.asp_tags) { | ||
state = PHP; | ||
} | ||
continue; | ||
} | ||
<HTML> [^] { continue; } | ||
<PHP> '?>'|'</script'WHITESPACE*'>' { | ||
state = HTML; | ||
continue; | ||
} | ||
<PHP> '%>' { | ||
if (flags.asp_tags) { | ||
state = PHP; | ||
} | ||
continue; | ||
} | ||
<PHP> 'b'?'\''('\\'.|'\\\n'|[^\\']+)*'\''| | ||
'b'?'\"'('\\'.|'\\\n'|[^\\"]+)*'\"' { continue; } | ||
<PHP> '#'|'//' { | ||
state = COMMENT_EOL; | ||
continue; | ||
} | ||
<PHP> '/*' { | ||
state = COMMENT_BLOCK; | ||
continue; | ||
} | ||
<PHP> '::' { continue; } | ||
<PHP> '</'| | ||
'/>'| | ||
':'[a-zA-Z0-9]| | ||
'element'| | ||
')'WHITESPACE*'[' { | ||
return true; | ||
} | ||
<PHP> [^] { continue; } | ||
<COMMENT_EOL> NEWLINE { | ||
state = PHP; | ||
continue; | ||
} | ||
<COMMENT_EOL> '?>' { | ||
state = HTML; | ||
continue; | ||
} | ||
<COMMENT_EOL> [^] { continue; } | ||
<COMMENT_BLOCK> [^*] { continue; } | ||
<COMMENT_BLOCK> '*/' { | ||
state = PHP; | ||
continue; | ||
} | ||
<COMMENT_BLOCK> [^] { continue; } | ||
*/ | ||
} | ||
return false; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters