Skip to content
Browse files

[enhance] libbsl: first version of new directive parser.

  • Loading branch information...
1 parent 5746a54 commit 1f10c508fff55404431a84a79d4254d7a5969a52 @arthuraa arthuraa committed Aug 3, 2012
View
1 compiler/jslang/jsLex.mli
@@ -135,6 +135,7 @@ type token =
| CommentLine of string
| CommentTag of string * string
+val string_of_token : token -> string
val init_lexer : unit -> unit
val lex : bool -> Lexing.lexbuf -> token
val stream : bool -> Lexing.lexbuf -> token Stream.t
View
107 compiler/jslang/jsLex.mll
@@ -127,6 +127,113 @@
| CommentLine of string
| CommentTag of string * string
+(* used for debug only, not error messages *)
+let string_of_token = function
+ | Break -> "break"
+ | Case -> "case"
+ | Catch -> "catch"
+ | Continue -> "continue"
+ | Debugger -> "debugger"
+ | Default -> "default"
+ | Delete -> "delete"
+ | Do -> "do"
+ | Else -> "else"
+ | Finally -> "finally"
+ | For -> "for"
+ | Function -> "function"
+ | If -> "if"
+ | In -> "in"
+ | Instanceof -> "instanceof"
+ | New -> "new"
+ | Return -> "return"
+ | Switch -> "switch"
+ | This -> "this"
+ | Throw -> "throw"
+ | Try -> "try"
+ | Typeof -> "typeof"
+ | Var -> "var"
+ | Void -> "void"
+ | While -> "while"
+ | With -> "with"
+ | Class -> "class"
+ | Const -> "const"
+ | Enum -> "enum"
+ | Export -> "export"
+ | Extends -> "extends"
+ | Import -> "import"
+ | Super -> "super"
+ | Implements -> "implements"
+ | Interface -> "interface"
+ | Let -> "let"
+ | Package -> "package"
+ | Private -> "private"
+ | Protected -> "protected"
+ | Public -> "public"
+ | Static -> "static"
+ | Yield -> "yield"
+ | True -> "true"
+ | False -> "false"
+ | Null -> "null"
+ | Regexp (s1,s2) -> Printf.sprintf "Regexp /%s/%s" s1 s2
+ | String s -> Printf.sprintf "%S" s
+ | Ident s -> "Ident " ^ s
+ | Integer s -> s
+ | LT -> "LT"
+ | EOF -> "EOF"
+ | Lbracket -> "["
+ | Rbracket -> "]"
+ | Lcurly -> "{"
+ | Rcurly -> "}"
+ | Lparen -> "("
+ | Rparen -> ")"
+ | Dot -> "."
+ | Semic -> ";"
+ | Comma -> ","
+ | Lt -> "<"
+ | Gt -> ">"
+ | Le -> "<="
+ | Ge -> ">="
+ | EqualEqual -> "=="
+ | BangEqual -> "!="
+ | EqualEqualEqual -> "==="
+ | BangEqualEqual -> "!=="
+ | Plus -> "+"
+ | Minus -> "-"
+ | Times -> "*"
+ | Percent -> "%"
+ | PlusPlus -> "++"
+ | MinusMinus -> "--"
+ | LtLt -> "<<"
+ | GtGt -> ">>"
+ | GtGtGt -> ">>>"
+ | Amper -> "&"
+ | Bar -> "|"
+ | Chapeau -> "^"
+ | Bang -> "!"
+ | Tilda -> "~"
+ | AmperAmper -> "&&"
+ | BarBar -> "||"
+ | Question -> "?"
+ | Colon -> ":"
+ | Equal -> "="
+ | PlusEqual -> "+="
+ | MinusEqual -> "-="
+ | TimesEqual -> "*="
+ | PercentEqual -> "%="
+ | LtLtEqual -> "<<="
+ | GtGtEqual -> ">>="
+ | GtGtGtEqual -> ">>>="
+ | AmperEqual -> "&="
+ | BarEqual -> "|="
+ | ChapeauEqual -> "^="
+ | Div -> "/"
+ | DivEqual -> "/="
+ | OpenDocComment -> "/**"
+ | CloseComment -> "*/"
+ | CommentLine s -> s
+ | CommentTag (t, _) -> "@" ^ t
+
+
(* the ecmascript defines two kinds of lexing: for the places in the ast
* where a token starting with / is a regular expression, and the places where
* it is the division or the division-assignment /=
View
106 compiler/jslang/jsParse.ml
@@ -23,112 +23,6 @@ let dummy_pos = FilePos.nopos "jsParse"
let label () = Annot.next_label dummy_pos
let native_ident = JsCons.Ident.native
-(* used for debug only, not error messages *)
-let string_of_token = function
- | Break -> "break"
- | Case -> "case"
- | Catch -> "catch"
- | Continue -> "continue"
- | Debugger -> "debugger"
- | Default -> "default"
- | Delete -> "delete"
- | Do -> "do"
- | Else -> "else"
- | Finally -> "finally"
- | For -> "for"
- | Function -> "function"
- | If -> "if"
- | In -> "in"
- | Instanceof -> "instanceof"
- | New -> "new"
- | Return -> "return"
- | Switch -> "switch"
- | This -> "this"
- | Throw -> "throw"
- | Try -> "try"
- | Typeof -> "typeof"
- | Var -> "var"
- | Void -> "void"
- | While -> "while"
- | With -> "with"
- | Class -> "class"
- | Const -> "const"
- | Enum -> "enum"
- | Export -> "export"
- | Extends -> "extends"
- | Import -> "import"
- | Super -> "super"
- | Implements -> "implements"
- | Interface -> "interface"
- | Let -> "let"
- | Package -> "package"
- | Private -> "private"
- | Protected -> "protected"
- | Public -> "public"
- | Static -> "static"
- | Yield -> "yield"
- | True -> "true"
- | False -> "false"
- | Null -> "null"
- | Regexp (s1,s2) -> Printf.sprintf "Regexp /%s/%s" s1 s2
- | String s -> Printf.sprintf "%S" s
- | Ident s -> "Ident " ^ s
- | Integer s -> s
- | LT -> "LT"
- | EOF -> "EOF"
- | Lbracket -> "["
- | Rbracket -> "]"
- | Lcurly -> "{"
- | Rcurly -> "}"
- | Lparen -> "("
- | Rparen -> ")"
- | Dot -> "."
- | Semic -> ";"
- | Comma -> ","
- | Lt -> "<"
- | Gt -> ">"
- | Le -> "<="
- | Ge -> ">="
- | EqualEqual -> "=="
- | BangEqual -> "!="
- | EqualEqualEqual -> "==="
- | BangEqualEqual -> "!=="
- | Plus -> "+"
- | Minus -> "-"
- | Times -> "*"
- | Percent -> "%"
- | PlusPlus -> "++"
- | MinusMinus -> "--"
- | LtLt -> "<<"
- | GtGt -> ">>"
- | GtGtGt -> ">>>"
- | Amper -> "&"
- | Bar -> "|"
- | Chapeau -> "^"
- | Bang -> "!"
- | Tilda -> "~"
- | AmperAmper -> "&&"
- | BarBar -> "||"
- | Question -> "?"
- | Colon -> ":"
- | Equal -> "="
- | PlusEqual -> "+="
- | MinusEqual -> "-="
- | TimesEqual -> "*="
- | PercentEqual -> "%="
- | LtLtEqual -> "<<="
- | GtGtEqual -> ">>="
- | GtGtGtEqual -> ">>>="
- | AmperEqual -> "&="
- | BarEqual -> "|="
- | ChapeauEqual -> "^="
- | Div -> "/"
- | DivEqual -> "/="
- | OpenDocComment -> "/**"
- | CloseComment -> "*/"
- | CommentLine s -> s
- | CommentTag (t, _) -> "@" ^ t
-
(* redefining the modules Stream allows us to 'override' the syntax of streams
* the new peek, junk and empty function look at the first non-newline token
* (which allows us to write almost the whole parser while implicitely
View
1 compiler/libbsl.mllib
@@ -28,3 +28,4 @@ compiler/libbsl/BslError
compiler/libbsl/BslArgs
compiler/libbsl/BslTracker
compiler/libbsl/BslGeneration
+compiler/libbsl/BslJsParse
View
3 compiler/libbsl/_tags
@@ -42,3 +42,6 @@
<bslbrowser.ml> : with_mlstate_debug
<bslOcaml.ml> : with_mlstate_debug
<bslConf.ml> : with_mlstate_debug
+
+<bslJsParse.ml>: use_camlp4, camlp4orf_fixed
+<bslJsParse.ml>: use_jslang
View
329 compiler/libbsl/bslJsParse.ml
@@ -0,0 +1,329 @@
+(*
+ Copyright © 2012 MLstate
+
+ This file is part of Opa.
+
+ Opa is free software: you can redistribute it and/or modify it under the
+ terms of the GNU Affero General Public License, version 3, as published by
+ the Free Software Foundation.
+
+ Opa is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
+ more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with Opa. If not, see <http://www.gnu.org/licenses/>.
+*)
+
+module List = BaseList
+module String = BaseString
+module BD = BslDirectives
+module BT = BslTypes
+
+open JsLex
+
+(* No source positions for now *)
+let dummy_pos = FilePos.nopos "BslJsParse"
+let label () = Annot.next_label dummy_pos
+
+type tag = string
+type message = string
+
+let whitespace = Str.regexp "[ \t]*"
+
+(** When trying to interpret a comment as a bsl directive, we do the
+ following:
+
+ - One group of tags, such as "@opaName" or "@raise", is used to
+ build a BslTags.t value, that should be associated with a
+ directive later.
+
+ - Other tags, such as "@externType" and "@register", denote the
+ actual bsl directives. They are mutually exclusive: there can be
+ no comment with two or more of them.
+
+ - Tags that are recognized but badly formatted (e.g. "@register I
+ have no type") trigger an error. Other tags (such as "@param") are
+ silently ignored.
+
+ - Comments that have no directives are ignored as well.
+*)
+
+(* Example:
+
+ /**
+ * @register {int -> int} my_bypass
+ * @cpsBypass
+ *
+ */
+ function my_bypass(val) {
+ return 4;
+ }
+
+*)
+
+(** Builds a set of bsl tags based on comment annotations *)
+let collect_bsl_tags tags =
+
+ (* Reads tags with an associated set of strings *)
+ let string_set tag update bsl_tags =
+ let aux (tag', args) =
+ if tag = tag' then
+ let attributes = Str.split whitespace args in
+ Some (StringSet.from_list attributes)
+ else
+ None
+ in
+ match List.find_map aux tags with
+ | Some attributes -> Some (update bsl_tags attributes)
+ | None -> Some bsl_tags
+ in
+
+ (* Sets a flag to true if it finds the corresponding tags *)
+ let bool tag update bsl_tags =
+ let rec aux tags =
+ match tags with
+ | [] -> Some bsl_tags
+ | (tag', args) :: rest ->
+ if tag <> tag' then
+ aux rest
+ else if Str.string_match whitespace args 0 then
+ Some (update bsl_tags true)
+ else
+ None
+ in aux tags
+ in
+
+ (* List of tags, their formats and how they update a BslTags.t *)
+ let updates = [
+ string_set "noProjection" (fun t v ->
+ {t with BslTags.no_projection = Some v}
+ );
+ bool "opaName" (fun t v -> {t with BslTags.opaname = v});
+ bool "raise" (fun t v -> {t with BslTags.raise_ = v});
+ bool "cpsBypass" (fun t v -> {t with BslTags.cps_bypass = v});
+ bool "opacapi" (fun t v -> {t with BslTags.opacapi = v});
+ ] in
+
+ let rec try_updates bsl_tags updates =
+ match updates with
+ | [] -> Some bsl_tags
+ | update :: rest -> (
+ match update bsl_tags with
+ | Some bsl_tags' -> try_updates bsl_tags' rest
+ | None -> None
+ )
+ in
+
+ try_updates BslTags.default updates
+
+(** The next set of tags corresponds to directive tags. We try to
+ extract a directive from each comment line. In the end, we check
+ if a unique directive was defined or not. *)
+
+type global_read_result = [ `no_occurrences
+ | `wrong_format of message
+ | `multiple_occurrences of tag
+ | `found of tag * BD.bypasslang_directive ]
+
+type local_read_result = [ `wrong_format of message
+ | `found of BD.bypasslang_directive ]
+
+(** Extracts all occurrences of tag [keyword] *)
+let try_read_args tag
+ (arg_reader : string -> local_read_result)
+ tags =
+ let rec aux acc tags =
+ match tags with
+ | [] ->
+ Option.default_map `no_occurrences
+ (fun dir -> `found (tag, dir))
+ acc
+ | (tag', args) :: rest ->
+ if tag <> tag' then
+ aux acc rest
+ else if Option.is_some acc then
+ `multiple_occurrences tag
+ else
+ match arg_reader args with
+ | `wrong_format _ as s -> s
+ | `found directive -> aux (Some directive) rest
+ in
+ aux None tags
+
+let identifier_regexp =
+ Str.regexp "^[ \t]*\\([a-zA-Z][a-zA-Z0-9]*\\)[ \t]*$"
+
+let get_identifier string =
+ if Str.string_match identifier_regexp string 0 then
+ Some (Str.matched_group 1 string)
+ else
+ None
+
+(** Reads something of the form "@tag I.am.a_type('var1, 'var2)" *)
+let extract_type_declaration =
+ let type_regexp =
+ Str.regexp (
+ "^[ \t]*" ^ (* whitespace *)
+ "\\([^( \t]*\\)" ^ (* type name *)
+ "\\((\\([^)]*\\))\\)?" ^ (* optional type vars *)
+ "[ \t]*$" (* ending white space *)
+ )
+ in
+ let split_vars_regexp = Str.regexp "[ \t]*,[ \t]*" in
+ let var_regexp = Str.regexp "'[a-z]*" in
+ fun tag constructor ->
+ try_read_args tag (fun args ->
+ if Str.string_match type_regexp args 0 then
+ let name = Str.matched_group 1 args in
+ try
+ let vars = Str.matched_group 3 args in
+ let vars = Str.split split_vars_regexp vars in
+ if List.for_all (fun s -> Str.string_match var_regexp s 0) vars then
+ let vars = List.map (fun var ->
+ let var = Str.string_after var 1 in
+ QmlTypeVars.TypeVar.next ~name:var ()
+ ) vars in
+ `found (constructor name vars)
+ else
+ `wrong_format (
+ Printf.sprintf
+ "Couldn't read type in @%s directive"
+ tag
+ )
+ with
+ Not_found ->
+ `found (constructor name [])
+ else
+ `wrong_format (
+ Printf.sprintf
+ "Directive @%s requires a type"
+ tag
+ )
+ )
+
+(** The tag readers, one for each recognized tag *)
+
+let extract_external_type_def =
+ extract_type_declaration "externalType" (fun ty args ->
+ BD.ExternalTypeDef (ty, args, None)
+ )
+
+let extract_opa_type_def =
+ extract_type_declaration "opaType" (fun ty args ->
+ BD.OpaTypeDef (ty, args)
+ )
+
+let extract_module =
+ try_read_args "module" (fun args ->
+ match get_identifier args with
+ | Some i -> `found (BD.Module (i, None))
+ | None -> `wrong_format "Expected identifier in module declaration"
+ )
+
+let extract_end_module =
+ try_read_args "endModule" (fun args ->
+ if args = "" then
+ `found BD.EndModule
+ else
+ `wrong_format "@endModule takes no arguments"
+ )
+
+let extract_register =
+ let re = Str.regexp (
+ "^{\\([^}]*\\)}[ \t]+" ^ (* Type between brackets *)
+ "\\([a-zA-Z][a-zA-Z0-9]*\\)[ \t]*" ^ (* Bypass name *)
+ "\\([^ \t]+\\)?[ \t]*$" (* Optional source code *)
+ ) in
+ try_read_args "register" (fun args ->
+ if Str.string_match re args 0 then
+ (* TODO:
+ - Parse type
+ - Find a way of telling apart injected and not injected with
+ different name *)
+ let _ty = Str.matched_group 1 args in
+ let name = Str.matched_group 2 args in
+ try
+ let source = Str.matched_group 3 args in
+ `found (BD.Register (name, Some source, true, BT.Void dummy_pos))
+ with
+ Not_found ->
+ `found (BD.Register (name, None, false, BT.Void dummy_pos))
+ else
+ `wrong_format
+ "Format of @register is \"@register {type} key [optional source]\""
+ )
+
+let readers = [
+ extract_external_type_def;
+ extract_opa_type_def;
+ extract_module;
+ extract_end_module;
+ extract_register;
+]
+
+type extract_result =
+| NoOccurrences
+| Error of string
+| Found of BslTags.t * BD.bypasslang_directive
+
+(** Try to extract a bsl directive from a list of tags, *)
+let maybe_extract_directive tags : extract_result =
+ let extracted_directives = List.map (fun extract ->
+ extract tags
+ ) readers in
+ let rec aux acc extracted_directives =
+ match extracted_directives with
+ | [] -> (
+ match acc with
+ | None -> NoOccurrences
+ | Some (_, d) -> (
+ match collect_bsl_tags tags with
+ | Some bsl_tags -> Found (bsl_tags, d)
+ | None -> Error "Badly formatted BSL tags"
+ )
+ )
+ | extracted :: rest -> (
+ match extracted with
+ | `no_occurrences -> aux acc rest
+ | `found (name, directive) -> (
+ match acc with
+ | None -> aux (Some (name, directive)) rest
+ | Some (name', _directive') ->
+ Error (
+ Printf.sprintf
+ "Multiple directives have been found: @%s and @%s"
+ name name'
+ )
+ )
+ | `wrong_format message -> Error message
+ | `multiple_occurrences name ->
+ Error (
+ Printf.sprintf "Multiple occurrences of tag @%s" name
+ )
+ )
+ in
+ aux None extracted_directives
+
+let rec comment_tags = parser
+ | [< 'CommentLine _; tags = comment_tags >] -> tags
+ | [< 'CommentTag (t, s); tags = comment_tags >] ->
+ (t, String.replace s "\\\n" "") :: tags
+ | [< >] -> []
+
+let rec doc_comment acc = parser
+ | [< 'OpenDocComment; tags = comment_tags; 'CloseComment; stream >] -> (
+ match maybe_extract_directive tags with
+ | NoOccurrences -> doc_comment acc stream
+ | Error e -> `error e
+ | Found (bsl_tags, d) ->
+ (* TODO: analyze next parts *)
+ doc_comment ((bsl_tags, d) :: acc) stream
+ )
+ | [< _ = Stream.next; stream >] -> doc_comment acc stream
+ | [< >] -> `success (List.rev acc)
+
+let parse filename =
+ let stream, _lexbuf = JsLex.stream_of_file ~lex_comments:true filename in
+ doc_comment [] stream
View
31 compiler/libbsl/bslRegisterLib.ml
@@ -1572,16 +1572,37 @@ let parse_opa_file pprocess options f =
BRParse.parse_bslregisterparser_opalang f in
( parsed : BslDirectives.opalang_decorated_file )
-
-
-
let parse_bypass_file pprocess options filename =
let set_last_directive d = BRState.set_last_directive d in
let process_directive = bypass_process_directive in
let parsed = parse_file_factory pprocess process_directive set_last_directive options
BRParse.parse_bslregisterparser_bypasslang filename in
( parsed : BslDirectives.bypasslang_decorated_file )
+let parse_js_bypass_file_new filename =
+ match BslJsParse.parse filename with
+ | `error e ->
+ OManager.error "Error while reading file %s: %s"
+ filename e
+ | `success directives ->
+ List.iter (fun (_, directive) ->
+ match directive with
+ | BDir.ExternalTypeDef (key, _vars, _) ->
+ Format.printf "external type %s\n%!" key
+ | BDir.OpaTypeDef (key, _vars) ->
+ Format.printf "opa type def %s\n%!" key
+ | BDir.Register (key, _, _, _) ->
+ Format.printf "register %s\n%!" key
+ | _ ->
+ Format.printf "other\n%!"
+ ) directives;
+ failwith "not implemented"
+
+let parse_js_bypass_file pprocess options filename =
+ if options.BI.js_classic_bypass_syntax then
+ parse_bypass_file pprocess options filename
+ else
+ parse_js_bypass_file_new filename
(*
Main preprocessor
@@ -1628,7 +1649,7 @@ let preprocess_file session filename =
| "js" ->
- let parsed_file = parse_bypass_file session.s_pprocess session.s_options filename in
+ let parsed_file = parse_js_bypass_file session.s_pprocess session.s_options filename in
let s_rev_js_parsed_files =
parsed_file :: session.s_rev_js_parsed_files in
@@ -1639,7 +1660,7 @@ let preprocess_file session filename =
session
| "nodejs" ->
- let parsed_file = parse_bypass_file session.s_pprocess session.s_options filename in
+ let parsed_file = parse_js_bypass_file session.s_pprocess session.s_options filename in
let s_rev_nodejs_parsed_files =
parsed_file :: session.s_rev_nodejs_parsed_files in

0 comments on commit 1f10c50

Please sign in to comment.
Something went wrong with that request. Please try again.