Permalink
Browse files

[enhance] xhtml.opa: reduces the need for html escaping when serving …

…utf-8 page

Provide more readable html source and unlimited speed improvment on complex utf-8 character (e.g. chinese)

CHANGELOG Keep multibyte characters "as is" during html escaping when the target resource is utf-8
  • Loading branch information...
1 parent 81d58fc commit f94cd8179d4bf2da65e9bff3866a267a5137a92a @OpaOnWindowsNow OpaOnWindowsNow committed May 16, 2012
View
6 opabsl/jsbsl/bslString.js
@@ -1,5 +1,5 @@
/*
- Copyright © 2011 MLstate
+ Copyright © 2011, 2012 MLstate
This file is part of OPA.
@@ -119,8 +119,8 @@
return result;
}
-##register escapeHTML : string -> string
- ##args(someText)
+##register escapeHTML : bool, string -> string
+ ##args(_,someText)
{
var div = document.createElement('div');
var text = document.createTextNode(someText);
View
23 opabsl/mlbsl/bslString.ml
@@ -1,5 +1,5 @@
(*
- Copyright © 2011 MLstate
+ Copyright © 2011, 2012 MLstate
This file is part of OPA.
@@ -70,15 +70,20 @@ let have_to_be_escaped_table =
code >= 128 || String.contains Base.Utf8.except_html_char chr || (code < 32 && not (String.contains Base.Utf8.allowed_special_char chr)) in
Array.init 256 (fun code -> have_to_be_escaped (Char.unsafe_chr code))
let have_to_be_escaped (c:char) = have_to_be_escaped_table.(Char.code c)
+let not_have_to_be_escaped (c:char) = not (have_to_be_escaped_table.(Char.code c))
-(*Fails with UTF-8 -- use Cactutf?*)
-(*TODO: This looks slow -- constructing lists ?*)
-(* I think it works ok with utf8 because whenever the code is greater than 128
- * (ie we have a character of more than one byte, BaseString.len_from is used to
- * agglomerate the following bytes whose code is more than 128 (which is the end
- * of the unicode character if is the input is well formed) *)
-##register escapeHTML : string -> string
- let escapeHTML src =
+
+let utf8_byte_have_to_be_escaped = function
+ | '"' | '<' | '>' | '&' -> true
+(* | '\'' -> true *)
+ | _ -> false
+
+(* This thing works with utf-8 because
+ - if utf8 encoding is ok, no 'one byte utf8 char' needs to be escaped if the html has utf-8 encoding,
+ - if utf8 is not ok, all byte of longer than on byte character are seen as needing escaping *)
+##register escapeHTML : bool, string -> string
+let escapeHTML utf8 src =
+ let have_to_be_escaped = if utf8 then utf8_byte_have_to_be_escaped else have_to_be_escaped in
if BaseString.exists have_to_be_escaped src then
let len = String.length src in
let rec aux pos acc =
View
3 stdlib/core/string.opa
@@ -283,7 +283,8 @@ String =
remove_accents = %% Bslstring.remove_accents %%
/**
- * Escapes every characters of the string with &#xxx escapes
+ * Escapes every characters of the string with &#xxx escapes if needed
+ * The boolean indicates if we target an utf-8 user or not.
*
* Manual use of this function is strongly discouraged, as it may cause
* security risks
View
44 stdlib/core/web/resource/resource_private.opa
@@ -89,6 +89,7 @@ type resource_cache_customizers = {
}
type resource_cache_entry = {
+ doctype : option(html_resource_doctype)
uri : string
customizers : resource_cache_customizers
body : xhtml
@@ -691,15 +692,17 @@ default_customizers = [customizer_for_google_frame,required_customizer_for_incom
/**
* A cache for generation of xhtml resources
*/
+@private print_resource_time(t) = Log.notice("Resource Private", "Html resource computed in {t}s")
+
@private cache_for_xhtml : resource_cache_entry -> {body:xhtml; head:xhtml; mime_type:string} =
- compute_result(body:xhtml, customizations):{body:xhtml head:xhtml mime_type:string} =
+ compute_result(doctype, body:xhtml, customizations):{body:xhtml head:xhtml mime_type:string} =
+ doctype = doctype ? default_doctype.get()
{html=body_content js=raw_js_content} =
- #<Ifstatic:BENCH_SERVER>
- print_t(t) = Log.notice("Resource Private", "resource computed in {t}s")
- CoreProfiler.instrument(1, print_t){->Xhtml.prepare_for_export_as_xml_blocks(body)}
- #<Else>
- Xhtml.prepare_for_export_as_xml_blocks(body)
- #<End>
+ #<Ifstatic:BENCH_SERVER> CoreProfiler.instrument(1, print_resource_time){ -> #<End>
+ match doctype
+ {custom=_} -> Xhtml.prepare_for_export_as_xml_blocks_non_utf8(body)
+ _ -> Xhtml.prepare_for_export_as_xml_blocks(body)
+ #<Ifstatic:BENCH_SERVER> } #<End>
{body = {html=body_custom js=raw_js_body_custom}
head = {html=head_custom js=raw_js_head_custom}
@@ -711,7 +714,7 @@ default_customizers = [customizer_for_google_frame,required_customizer_for_incom
head = head_custom
//Additional IE-specific fix -- note that the mime type can be ignored if the resource uses [override_mime_type]
mime_type =
- match default_doctype.get() with
+ match doctype
| {xhtml1_1} -> (
match user_compat.renderer with
/* hack for IE (considers application/xhtml+xml as files to save) */
@@ -728,24 +731,24 @@ default_customizers = [customizer_for_google_frame,required_customizer_for_incom
| _ -> "text/html"
end }
- compute_everything(customizers, body:xhtml, user_agent) =
+ compute_everything(doctype, customizers, body:xhtml, user_agent) =
//do jlog("RECOMPUTE")
customizations = compute_customization(customizers, user_agent)
- compiled_result = compute_result(body, customizations)
+ compiled_result = compute_result(doctype, body, customizations)
compiled_result
- f({uri=_ ~customizers ~body user_agent=_}) =
+ f(~{uri=_ customizers doctype body user_agent=_}) =
customizer_cache = Cache.make(
Cache.Negotiator.always_necessary(user_agent -> compute_customization(customizers, user_agent)),
{Cache.default_options with size_limit = {some = 30}})
result_cache = Cache.make(
- Cache.Negotiator.always_necessary(user_agent -> compute_result(body, customizer_cache.get(user_agent))),
+ Cache.Negotiator.always_necessary(user_agent -> compute_result(doctype, body, customizer_cache.get(user_agent))),
{Cache.default_options with size_limit = {some = 30}})
{cache_everything =
- { ~customizers ~body ~result_cache ~customizer_cache } }
+ { ~customizers ~body ~result_cache ~customizer_cache } }
cache_options = {Cache.default_options with
size_limit = {some = 30}
@@ -763,20 +766,20 @@ default_customizers = [customizer_for_google_frame,required_customizer_for_incom
strategy(x:resource_cache_entry) =
(
- {uri=_ ~customizers ~body ~user_agent} = x
- if cache_xhtml_options.disable then compute_everything(customizers, body, user_agent)
+ {uri=_ ~customizers ~doctype ~body ~user_agent} = x
+ if cache_xhtml_options.disable then compute_everything(doctype, customizers, body, user_agent)
else
match global_cache.get(x) with
| {no_caching} -> //results seems variable, cache deactivated for this [uri]
- compute_everything(customizers, body, user_agent)
+ compute_everything(doctype, customizers, body, user_agent)
|~{cache_customizers} -> //the body changes, but the customizations don't seem to
if customizer_equality(cache_customizers.customizers,customizers) then
//customizers haven't changed, that's a good sign, let's continue
- compute_result(body, cache_customizers.customizer_cache.get(user_agent))
+ compute_result(doctype, body, cache_customizers.customizer_cache.get(user_agent))
else
//customizers have changed, the resource is unstable
do global_cache.put(x, {no_caching}, void) // fully deactivate caching for this resource
- compute_everything(customizers, body, user_agent)
+ compute_everything(doctype, customizers, body, user_agent)
|~{cache_everything} -> //there's something in the cache, let's check if it's correct
if customizer_equality(cache_everything.customizers,customizers) then
// customizers haven't changed, that's a good sign, let's continue
@@ -785,10 +788,10 @@ default_customizers = [customizer_for_google_frame,required_customizer_for_incom
else // the body has changed, fallback to caching only customization
customizer_cache = cache_everything.customizer_cache
do global_cache.put(x, {cache_customizers = {~customizers ~customizer_cache}}, void)
- compute_result(body, customizer_cache.get(user_agent))
+ compute_result(doctype, body, customizer_cache.get(user_agent))
else //customizers have changed, the resource is unstable
do global_cache.put(x, {no_caching}, void) // fully deactivate caching for this resource
- compute_everything(customizers, body, user_agent)
+ compute_everything(doctype, customizers, body, user_agent)
)
strategy
@@ -954,6 +957,7 @@ export_resource(external_css_files: list(string),
{~uri
customizers= ~{customizers external_css_files inline_css_code
external_js_files inline_js_code headers}
+ ~doctype
~body ~user_agent})
base =
View
26 stdlib/core/xhtml/xhtml.opa
@@ -160,7 +160,7 @@ Xmlns =
to_xhtml(xmlns: xmlns): xhtml = @unsafe_cast(xmlns)
/**
- * Convert a xmlns structure into a string
+ * Convert a xmlns structure into a string, assuming utf-8 encoding
*/
to_string : xmlns -> string = serialize_to_string
@@ -682,6 +682,9 @@ Xhtml =
ns_uri = "http://www.w3.org/1999/xhtml"
@private sassoc_full(namespace, name, value) : Xml.attribute = ~{ namespace name value }
+ /**
+ * Convert a xhtml structure into a string, assuming utf-8 encoding
+ */
to_string = serialize_to_string
@private
@@ -773,7 +776,14 @@ Xhtml =
* (i.e. the tags) and [js_code] contains the event handlers and the style information as a JS
* string.
*/
- prepare_for_export(_default_ns_uri, xhtml: xhtml, style_inline : bool): {js_code: string; html_code:string} =
+ prepare_for_export(default_ns_uri, xhtml: xhtml, style_inline : bool): {js_code: string; html_code:string} =
+ prepare_for_export_(true, default_ns_uri, xhtml, style_inline)
+
+ prepare_for_non_utf8_export(default_ns_uri, xhtml: xhtml, style_inline : bool): {js_code: string; html_code:string} =
+ prepare_for_export_(false, default_ns_uri, xhtml, style_inline)
+
+ @private
+ prepare_for_export_(utf8, _default_ns_uri, xhtml: xhtml, style_inline : bool): {js_code: string; html_code:string} =
(
html_buffer = Buf.create(1024)//A buffer for storing the HTML source code
js_buffer = Buf.create(1024)//A buffer for storing the JS source code -- at the last step, it is inserted in [html_buffer]
@@ -788,7 +798,7 @@ Xhtml =
rec handle_xhtml(xhtml: xhtml, depth:int) =
next = depth + 1 //next depth
match xhtml with
- | ~{ text } -> Buf.add(html_buffer,String.escape_html(text))
+ | ~{ text } -> Buf.add(html_buffer,String.escape_html(utf8,text))
| ~{ content_unsafe } -> Buf.add(html_buffer,content_unsafe)
| ~{ fragment } -> List.iter(x -> handle_xhtml(x, depth), fragment)
| ~{ xml_dialect } ->
@@ -816,7 +826,7 @@ Xhtml =
do Buf.add(html_buffer,":")
Buf.add(html_buffer,name)
do Buf.add(html_buffer,"=\"")
- do Buf.add(html_buffer,String.escape_html(value))
+ do Buf.add(html_buffer,String.escape_html(utf8,value))
Buf.add(html_buffer,"\"")
//Handle regular attributes
@@ -1074,6 +1084,13 @@ Xhtml =
js = of_string_unsafe(js_code)
~{html js}
+ prepare_for_export_as_xml_blocks_non_utf8(xhtml: xhtml) =
+ ~{html_code js_code} = prepare_for_export_(false,ns_uri,xhtml,false)
+ html = of_string_unsafe(html_code)
+ js = of_string_unsafe(js_code)
+ ~{html js}
+
+ /** Same as to_string */
serialize_to_string(xhtml: xhtml): string =
(
~{js_code html_code} = prepare_for_export(ns_uri,xhtml,false)
@@ -1082,6 +1099,7 @@ Xhtml =
String.flatten([html_code,_script_start,js_code,_script_end])
)
+ /** Same as to_string but without js_code */
serialize_as_standalone_html(xhtml: xhtml): string =
{js_code=_ ~html_code} = prepare_for_export(ns_uri,xhtml,true)
html_code

0 comments on commit f94cd81

Please sign in to comment.