Skip to content
This repository
tag: v317
Fetching contributors…

Cannot retrieve contributors at this time

file 68 lines (60 sloc) 3.107 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
(*
Copyright © 2011 MLstate

This file is part of OPA.

OPA is free software: you can redistribute it and/or modify it under the
terms of the GNU Affero General Public License, version 3, as published by
the Free Software Foundation.

OPA is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
more details.

You should have received a copy of the GNU Affero General Public License
along with OPA. If not, see <http://www.gnu.org/licenses/>.
*)
(* depends *)
module String = BaseString

(* -- *)

let except_html_char = "\"&<>" (* normal characters that *should* be escaped in html *)
let allowed_special_char = "\t\n\r" (* special characters that *should not* be escaped in html *)

let string_of_int i =
  let r1 i = Char.chr (((i mod 64) + 128)) in
  let r2 i = Char.chr (((i / 64) mod 64) + 128) in
  if i < 128 then String.make 1 (Char.chr i)
  else if i < 2048 then
    let t = [| Char.chr ((i / 64) + 192); r1 i |] in
    String.init 2 (fun n -> t.(n))
  else if i < 65536 then
    let t = [| Char.chr (((i / 4096) mod 16) + 224); r2 i; r1 i |] in
    String.init 3 (fun n -> t.(n))
  else if i < 2097152 then
    let t = [| Char.chr (((i / 262144) mod 8) + 240); Char.chr (((i / 4096) mod 64) + 128); r2 i; r1 i |] in
    String.init 4 (fun n -> t.(n))
  else assert false

(* takes any utf8 string and converts it into html entities (escape *all* characters, which is not avised in general; please perform checks before calling it) *)
let htmlentities src =
  let rec pow n p = if p = 0 then 1 else (pow n (p - 1)) * n in
  let len = String.length src in
  let soi = Pervasives.string_of_int in
  let rec aux nbr pos i lst =
    if i = len then lst
    else
      let chr = Char.code src.[i] in
      if pos = (-1) then
        if chr >= 240 then aux ((chr mod 16) * 262144) 2 (i + 1) lst
        else if chr >= 224 then aux ((chr mod 32) * 4096) 1 (i + 1) lst
        else if chr >= 192 then aux ((chr mod 64) * 64) 0 (i + 1) lst
        else if (chr < 128 && (chr >= 32 || String.contains allowed_special_char src.[i])) || String.contains except_html_char src.[i]
        then aux 0 (-1) (i + 1) (chr::lst)
        else
          (* between 128 and 192: malformed UTF-8; we should absolutely not fail, but return the usual black question mark for invalid symbols *)
          (* between 0 and 31, except allowed_special_char, the entities seem to be illegal, so we project again to the question mark *)
          begin
            (* Journal.Interface.warning (Printf.sprintf "Warning: htmlentities: invalid UTF-8: in string %s at position %d on character of code %d" src i chr); *)
            aux 0 (-1) (i + 1) (65533::lst)
          end
      else
        let nbr = nbr + ((chr mod 64) * (pow 64 pos)) in
        if pos = 0 then aux 0 (-1) (i + 1) (nbr::lst)
        else aux nbr (pos - 1) (i + 1) lst
  in
  List.fold_right (fun item acc -> acc^"&#"^(soi item)^";") (aux 0 (-1) 0 []) ""
Something went wrong with that request. Please try again.