Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 167 lines (142 sloc) 5.788 kB
fccc685 Initial open-source release
MLstate authored
1 (*
b0fead4 @OpaOnWindowsNow [feature] libbase/jsonLex.ml: anormal float extension
OpaOnWindowsNow authored
2 Copyright © 2011, 2012 MLstate
fccc685 Initial open-source release
MLstate authored
3
4 This file is part of OPA.
5
6 OPA is free software: you can redistribute it and/or modify it under the
7 terms of the GNU Affero General Public License, version 3, as published by
8 the Free Software Foundation.
9
10 OPA is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
13 more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with OPA. If not, see <http://www.gnu.org/licenses/>.
17 *)
18 module JP = JsonParse
19 module JT = JsonTypes
20
21 (** JSON parser based on syntax described on http://www.json.org/
22
23 This lexer uses Ulex, although there is no need for utf-8 support here,
24 because ocamllex created problems with OPA. *)
25
26 (* Types *)
27 (* TODOk1 - Review this... *)
28 let regexp t_blank = [' ' '\t' '\n' '\r']
29
30 let regexp t_digit = ['0'-'9']
31 let regexp t_digits = t_digit+
6b295a9 @BourgerieQuentin [fix] Json parser: proper handle of floats between -1 and 0
BourgerieQuentin authored
32 let regexp t_int = '0'| '-''0' | '-'? ['1'-'9'] t_digit*
fccc685 Initial open-source release
MLstate authored
33 let regexp t_frac = "." t_digits
34 let regexp t_e = ("e"|"E") ("+"|"-")?
35 let regexp t_exp = t_e t_digits
36 let regexp t_number = (t_int|'0') t_frac? t_exp?
37
38 let regexp t_hexa_digit = ['0'-'9''A'-'F''a'-'f']
39 let regexp t_hexa = t_hexa_digit t_hexa_digit t_hexa_digit t_hexa_digit
40
41 let regexp t_ident = ['a'-'z''A'-'Z']['a'-'z''A'-'Z''0'-'9''_']*
42
43 let b = Buffer.create 100000
44
45 (** Lexing rules *)
46 let rec get_token = lexer
47
48 (* End of stream / file *)
49 | eof -> JP.EOF
50
51 | '{' -> JP.LCURLY
52 | '}' -> JP.RCURLY
53 | '[' -> JP.LBRACKET
54 | ']' -> JP.RBRACKET
55 | ':' -> JP.COLON
56 | ',' -> JP.COMMA
57 | t_int -> JP.INT (int_of_string (Ulexing.utf8_lexeme lexbuf))
b0fead4 @OpaOnWindowsNow [feature] libbase/jsonLex.ml: anormal float extension
OpaOnWindowsNow authored
58
59 | "NaN"
60 | "Infinity"
61 | "-Infinity"
fccc685 Initial open-source release
MLstate authored
62 | t_number -> JP.FLOAT (float_of_string (Ulexing.utf8_lexeme lexbuf))
b0fead4 @OpaOnWindowsNow [feature] libbase/jsonLex.ml: anormal float extension
OpaOnWindowsNow authored
63
fccc685 Initial open-source release
MLstate authored
64 | "true" -> JP.TRUE
65 | "false" -> JP.FALSE
66 | "null" -> JP.NIL
67 | "undefined" -> JP.NIL
68 | "u" -> JP.NIL
69
70 (* An ident without quotes *)
71 (* This is not from JSON spec, it's added only for compatibility in OPA *)
72 | t_ident -> JP.IDENT (Ulexing.utf8_lexeme lexbuf)
73
74 (* Spaces *)
75 | t_blank -> get_token lexbuf
76
77 (* Strings *)
78 | '\"' -> Buffer.reset b;
79 get_string b lexbuf
80 | _ -> failwith ("unknown token: " ^ (Ulexing.utf8_lexeme lexbuf))
81
82 (** Parse a string, handle escaping *)
83 and get_string s = lexer
84 | eof -> assert false
85 | '\"' -> JP.STRING (Buffer.contents s)
86 | "\\\"" -> Buffer.add_char s '"'; get_string s lexbuf
87 | "\\\\" -> Buffer.add_char s '\\'; get_string s lexbuf
88 | "\\/" -> Buffer.add_char s '/'; get_string s lexbuf
89 | "\\b" -> Buffer.add_char s '\b'; get_string s lexbuf
90 | "\\f" -> Buffer.add_char s '\012'; get_string s lexbuf
91 | "\\n" -> Buffer.add_char s '\n'; get_string s lexbuf
92 | "\\r" -> Buffer.add_char s '\r'; get_string s lexbuf
93 | "\\t" -> Buffer.add_char s '\t'; get_string s lexbuf
94 | "\\u" t_hexa ->
95 let lx = Ulexing.utf8_lexeme lexbuf in
96 let i = int_of_string ("0x"^(String.sub lx 2 4)) in
97 let res = Cactutf.cons i in
98 Buffer.add_string s res;
99 get_string s lexbuf
100 | [^'\\''\"']+ -> Buffer.add_string s (Ulexing.utf8_lexeme lexbuf); get_string s lexbuf
101 | _ -> failwith "unterminated string"
102
103 (** Print token contained on given string. Used for debug. *)
104 let print_tokens str =
105 let pr = function
106 | JP.EOF -> "eof"
107 | JP.LCURLY -> "{"
108 | JP.RCURLY -> "}"
109 | JP.COLON -> ":"
110 | JP.LBRACKET -> "["
111 | JP.RBRACKET -> "]"
112 | JP.COMMA -> ","
113 | JP.TRUE -> "true"
114 | JP.FALSE -> "false"
115 | JP.NIL -> "null"
116 | JP.STRING s -> "\"" ^ s ^ "\""
117 | JP.IDENT s -> "$" ^ s ^ "$"
118 | JP.INT i -> string_of_int i
119 | JP.FLOAT f -> string_of_float f
120 in
121
122 let buffer = Ulexing.from_utf8_string str in
123 Printf.printf "ml json: %S\n\n%!" str;
124 let tok = ref JP.NIL in
125 while !tok <> JP.EOF do
126 tok := get_token buffer;
127 Printf.printf "token = %s\n" (pr !tok)
128 done
129
130 (** Transform a string to type that you want with given constructor.
131 [transform emptyM addM emptyL addL cint cfloat cstring cbool cvoid str]
132
133 @param emptyM Constructor for an empty record
134 @param addM Constructor for add a field to a record
135 @param emptyL Constructor for an empty list
136 @param emptyL Constructor for add an element to a list
137 @param cint Constructor for an int
138 @param cfloat Constructor for a float
139 @param cstring Constructor for a string
140 @param cbool Constructor for a bool
141 @param cvoid Constructor for a void
142 @return Constructed value
143 *)
144 let transform utf8 (*emptyM addM emptyL addL cons_int cons_float cons_string cons_bool cons_void*) str =
145 (*
146 (* convert the JSON AST to an OPA type *)
147 let rec conv = function
148 | Int i -> cons_int i
149 | Float f -> cons_float f
150 | String s -> cons_string s
151 | Bool b -> cons_bool b
152 | Void -> cons_void ()
153 | Array l -> List.fold_right (fun e acc -> addL (conv e) acc) l (emptyL())
154 | Record l -> List.fold_right (fun (s,v) acc -> addM s (conv v) acc) l (emptyM())
155 in
156 *)
157 (*DEBUG - print_tokens str;*)
158 let buffer =
159 if utf8 then Ulexing.from_utf8_string str
160 else Ulexing.from_latin1_string str
161 in
162 let res() =
163 (* Trick from Alain Frisch to use Ulex with OCamlyacc *)
164 (* http://caml.inria.fr/pub/ml-archives/caml-list/2005/01/52cbc2cd2be4fc7ea0f00c39a760bf59.en.html *)
165 JP.json (fun _ -> get_token buffer) (Lexing.from_string "dummy") in
166 try Some((*conv*) (res())) with _ -> None
Something went wrong with that request. Please try again.