Skip to content

Commit

Permalink
[enhance] xhtml.opa: reduces the need for html escaping when serving …
Browse files Browse the repository at this point in the history
…utf-8 page

Provide more readable html source and unlimited speed improvment on complex utf-8 character (e.g. chinese)

CHANGELOG Keep multibyte characters "as is" during html escaping when the target resource is utf-8
  • Loading branch information
OpaOnWindowsNow committed May 16, 2012
1 parent 81d58fc commit f94cd81
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 37 deletions.
6 changes: 3 additions & 3 deletions opabsl/jsbsl/bslString.js
@@ -1,5 +1,5 @@
/*
Copyright © 2011 MLstate
Copyright © 2011, 2012 MLstate
This file is part of OPA.
Expand Down Expand Up @@ -119,8 +119,8 @@
return result;
}

##register escapeHTML : string -> string
##args(someText)
##register escapeHTML : bool, string -> string
##args(_,someText)
{
var div = document.createElement('div');
var text = document.createTextNode(someText);
Expand Down
23 changes: 14 additions & 9 deletions opabsl/mlbsl/bslString.ml
@@ -1,5 +1,5 @@
(*
Copyright © 2011 MLstate
Copyright © 2011, 2012 MLstate
This file is part of OPA.
Expand Down Expand Up @@ -70,15 +70,20 @@ let have_to_be_escaped_table =
code >= 128 || String.contains Base.Utf8.except_html_char chr || (code < 32 && not (String.contains Base.Utf8.allowed_special_char chr)) in
Array.init 256 (fun code -> have_to_be_escaped (Char.unsafe_chr code))
let have_to_be_escaped (c:char) = have_to_be_escaped_table.(Char.code c)
let not_have_to_be_escaped (c:char) = not (have_to_be_escaped_table.(Char.code c))

(*Fails with UTF-8 -- use Cactutf?*)
(*TODO: This looks slow -- constructing lists ?*)
(* I think it works ok with utf8 because whenever the code is greater than 128
* (ie we have a character of more than one byte, BaseString.len_from is used to
* agglomerate the following bytes whose code is more than 128 (which is the end
* of the unicode character if is the input is well formed) *)
##register escapeHTML : string -> string
let escapeHTML src =

let utf8_byte_have_to_be_escaped = function
| '"' | '<' | '>' | '&' -> true
(* | '\'' -> true *)
| _ -> false

(* This thing works with utf-8 because
- if utf8 encoding is ok, no 'one byte utf8 char' needs to be escaped if the html has utf-8 encoding,
- if utf8 is not ok, all byte of longer than on byte character are seen as needing escaping *)
##register escapeHTML : bool, string -> string
let escapeHTML utf8 src =
let have_to_be_escaped = if utf8 then utf8_byte_have_to_be_escaped else have_to_be_escaped in
if BaseString.exists have_to_be_escaped src then
let len = String.length src in
let rec aux pos acc =
Expand Down
3 changes: 2 additions & 1 deletion stdlib/core/string.opa
Expand Up @@ -283,7 +283,8 @@ String =
remove_accents = %% Bslstring.remove_accents %%

/**
* Escapes every characters of the string with &#xxx escapes
* Escapes every characters of the string with &#xxx escapes if needed
* The boolean indicates if we target an utf-8 user or not.
*
* Manual use of this function is strongly discouraged, as it may cause
* security risks
Expand Down
44 changes: 24 additions & 20 deletions stdlib/core/web/resource/resource_private.opa
Expand Up @@ -89,6 +89,7 @@ type resource_cache_customizers = {
}

type resource_cache_entry = {
doctype : option(html_resource_doctype)
uri : string
customizers : resource_cache_customizers
body : xhtml
Expand Down Expand Up @@ -691,15 +692,17 @@ default_customizers = [customizer_for_google_frame,required_customizer_for_incom
/**
* A cache for generation of xhtml resources
*/
@private print_resource_time(t) = Log.notice("Resource Private", "Html resource computed in {t}s")

@private cache_for_xhtml : resource_cache_entry -> {body:xhtml; head:xhtml; mime_type:string} =
compute_result(body:xhtml, customizations):{body:xhtml head:xhtml mime_type:string} =
compute_result(doctype, body:xhtml, customizations):{body:xhtml head:xhtml mime_type:string} =
doctype = doctype ? default_doctype.get()
{html=body_content js=raw_js_content} =
#<Ifstatic:BENCH_SERVER>
print_t(t) = Log.notice("Resource Private", "resource computed in {t}s")
CoreProfiler.instrument(1, print_t){->Xhtml.prepare_for_export_as_xml_blocks(body)}
#<Else>
Xhtml.prepare_for_export_as_xml_blocks(body)
#<End>
#<Ifstatic:BENCH_SERVER> CoreProfiler.instrument(1, print_resource_time){ -> #<End>
match doctype
{custom=_} -> Xhtml.prepare_for_export_as_xml_blocks_non_utf8(body)
_ -> Xhtml.prepare_for_export_as_xml_blocks(body)
#<Ifstatic:BENCH_SERVER> } #<End>

{body = {html=body_custom js=raw_js_body_custom}
head = {html=head_custom js=raw_js_head_custom}
Expand All @@ -711,7 +714,7 @@ default_customizers = [customizer_for_google_frame,required_customizer_for_incom
head = head_custom
//Additional IE-specific fix -- note that the mime type can be ignored if the resource uses [override_mime_type]
mime_type =
match default_doctype.get() with
match doctype
| {xhtml1_1} -> (
match user_compat.renderer with
/* hack for IE (considers application/xhtml+xml as files to save) */
Expand All @@ -728,24 +731,24 @@ default_customizers = [customizer_for_google_frame,required_customizer_for_incom
| _ -> "text/html"
end }

compute_everything(customizers, body:xhtml, user_agent) =
compute_everything(doctype, customizers, body:xhtml, user_agent) =
//do jlog("RECOMPUTE")
customizations = compute_customization(customizers, user_agent)
compiled_result = compute_result(body, customizations)
compiled_result = compute_result(doctype, body, customizations)
compiled_result


f({uri=_ ~customizers ~body user_agent=_}) =
f(~{uri=_ customizers doctype body user_agent=_}) =
customizer_cache = Cache.make(
Cache.Negotiator.always_necessary(user_agent -> compute_customization(customizers, user_agent)),
{Cache.default_options with size_limit = {some = 30}})

result_cache = Cache.make(
Cache.Negotiator.always_necessary(user_agent -> compute_result(body, customizer_cache.get(user_agent))),
Cache.Negotiator.always_necessary(user_agent -> compute_result(doctype, body, customizer_cache.get(user_agent))),
{Cache.default_options with size_limit = {some = 30}})

{cache_everything =
{ ~customizers ~body ~result_cache ~customizer_cache } }
{ ~customizers ~body ~result_cache ~customizer_cache } }

cache_options = {Cache.default_options with
size_limit = {some = 30}
Expand All @@ -763,20 +766,20 @@ default_customizers = [customizer_for_google_frame,required_customizer_for_incom

strategy(x:resource_cache_entry) =
(
{uri=_ ~customizers ~body ~user_agent} = x
if cache_xhtml_options.disable then compute_everything(customizers, body, user_agent)
{uri=_ ~customizers ~doctype ~body ~user_agent} = x
if cache_xhtml_options.disable then compute_everything(doctype, customizers, body, user_agent)
else
match global_cache.get(x) with
| {no_caching} -> //results seems variable, cache deactivated for this [uri]
compute_everything(customizers, body, user_agent)
compute_everything(doctype, customizers, body, user_agent)
|~{cache_customizers} -> //the body changes, but the customizations don't seem to
if customizer_equality(cache_customizers.customizers,customizers) then
//customizers haven't changed, that's a good sign, let's continue
compute_result(body, cache_customizers.customizer_cache.get(user_agent))
compute_result(doctype, body, cache_customizers.customizer_cache.get(user_agent))
else
//customizers have changed, the resource is unstable
do global_cache.put(x, {no_caching}, void) // fully deactivate caching for this resource
compute_everything(customizers, body, user_agent)
compute_everything(doctype, customizers, body, user_agent)
|~{cache_everything} -> //there's something in the cache, let's check if it's correct
if customizer_equality(cache_everything.customizers,customizers) then
// customizers haven't changed, that's a good sign, let's continue
Expand All @@ -785,10 +788,10 @@ default_customizers = [customizer_for_google_frame,required_customizer_for_incom
else // the body has changed, fallback to caching only customization
customizer_cache = cache_everything.customizer_cache
do global_cache.put(x, {cache_customizers = {~customizers ~customizer_cache}}, void)
compute_result(body, customizer_cache.get(user_agent))
compute_result(doctype, body, customizer_cache.get(user_agent))
else //customizers have changed, the resource is unstable
do global_cache.put(x, {no_caching}, void) // fully deactivate caching for this resource
compute_everything(customizers, body, user_agent)
compute_everything(doctype, customizers, body, user_agent)
)
strategy

Expand Down Expand Up @@ -954,6 +957,7 @@ export_resource(external_css_files: list(string),
{~uri
customizers= ~{customizers external_css_files inline_css_code
external_js_files inline_js_code headers}
~doctype
~body ~user_agent})
base =
Expand Down
26 changes: 22 additions & 4 deletions stdlib/core/xhtml/xhtml.opa
Expand Up @@ -160,7 +160,7 @@ Xmlns =
to_xhtml(xmlns: xmlns): xhtml = @unsafe_cast(xmlns)

/**
* Convert a xmlns structure into a string
* Convert a xmlns structure into a string, assuming utf-8 encoding
*/
to_string : xmlns -> string = serialize_to_string

Expand Down Expand Up @@ -682,6 +682,9 @@ Xhtml =
ns_uri = "http://www.w3.org/1999/xhtml"
@private sassoc_full(namespace, name, value) : Xml.attribute = ~{ namespace name value }

/**
* Convert a xhtml structure into a string, assuming utf-8 encoding
*/
to_string = serialize_to_string

@private
Expand Down Expand Up @@ -773,7 +776,14 @@ Xhtml =
* (i.e. the tags) and [js_code] contains the event handlers and the style information as a JS
* string.
*/
prepare_for_export(_default_ns_uri, xhtml: xhtml, style_inline : bool): {js_code: string; html_code:string} =
prepare_for_export(default_ns_uri, xhtml: xhtml, style_inline : bool): {js_code: string; html_code:string} =
prepare_for_export_(true, default_ns_uri, xhtml, style_inline)

prepare_for_non_utf8_export(default_ns_uri, xhtml: xhtml, style_inline : bool): {js_code: string; html_code:string} =
prepare_for_export_(false, default_ns_uri, xhtml, style_inline)

@private
prepare_for_export_(utf8, _default_ns_uri, xhtml: xhtml, style_inline : bool): {js_code: string; html_code:string} =
(
html_buffer = Buf.create(1024)//A buffer for storing the HTML source code
js_buffer = Buf.create(1024)//A buffer for storing the JS source code -- at the last step, it is inserted in [html_buffer]
Expand All @@ -788,7 +798,7 @@ Xhtml =
rec handle_xhtml(xhtml: xhtml, depth:int) =
next = depth + 1 //next depth
match xhtml with
| ~{ text } -> Buf.add(html_buffer,String.escape_html(text))
| ~{ text } -> Buf.add(html_buffer,String.escape_html(utf8,text))
| ~{ content_unsafe } -> Buf.add(html_buffer,content_unsafe)
| ~{ fragment } -> List.iter(x -> handle_xhtml(x, depth), fragment)
| ~{ xml_dialect } ->
Expand Down Expand Up @@ -816,7 +826,7 @@ Xhtml =
do Buf.add(html_buffer,":")
Buf.add(html_buffer,name)
do Buf.add(html_buffer,"=\"")
do Buf.add(html_buffer,String.escape_html(value))
do Buf.add(html_buffer,String.escape_html(utf8,value))
Buf.add(html_buffer,"\"")

//Handle regular attributes
Expand Down Expand Up @@ -1074,6 +1084,13 @@ Xhtml =
js = of_string_unsafe(js_code)
~{html js}

prepare_for_export_as_xml_blocks_non_utf8(xhtml: xhtml) =
~{html_code js_code} = prepare_for_export_(false,ns_uri,xhtml,false)
html = of_string_unsafe(html_code)
js = of_string_unsafe(js_code)
~{html js}

/** Same as to_string */
serialize_to_string(xhtml: xhtml): string =
(
~{js_code html_code} = prepare_for_export(ns_uri,xhtml,false)
Expand All @@ -1082,6 +1099,7 @@ Xhtml =
String.flatten([html_code,_script_start,js_code,_script_end])
)

/** Same as to_string but without js_code */
serialize_as_standalone_html(xhtml: xhtml): string =
{js_code=_ ~html_code} = prepare_for_export(ns_uri,xhtml,true)
html_code
Expand Down

0 comments on commit f94cd81

Please sign in to comment.