Skip to content
Permalink
db2ddab825
Go to file
 
 
Cannot retrieve contributors at this time
97 lines (74 sloc) 3.38 KB
type wrapped_token = token * Lexing.position * Lexing.position
(* Mostly cribbed from Steffen Smolka's 2017 ocaml-parsing boilerplate:
<https://github.com/smolkaj/ocaml-parsing> *)
(* use custom lexbuffer to keep track of source location *)
module Sedlexing = LexBuffer
open LexBuffer
(** Signals a lexing error at the provided source location. *)
exception LexError of (Lexing.position * string)
(** Signals a parsing error at the provided token and its start and end locations. *)
exception ParseError of (token * Lexing.position * Lexing.position)
let error_of_exn = let open Location in function
| LexError (pos, msg) ->
let loc = { loc_start = pos; loc_end = pos; loc_ghost = false} in
Some { loc; msg; sub=[]; if_highlight=""; }
| _ -> None
(** Register exceptions for pretty printing *)
let pp_exceptions () =
(* XXX: Jane Street Core intentionally blocks `Printexc`, without a clear explanation as to why.
* Should I not be using it?
* (See: <https://ocaml.janestreet.com/ocaml-core/latest/doc/core_kernel/Core_kernel/Printexc/> *)
Caml.Printexc.register_printer (fun exn -> Core.Option.try_with (fun () ->
Location.report_exception Format.str_formatter exn;
Format.flush_str_formatter ())) ;
Location.register_error_of_exn error_of_exn
let failwith buf s = raise (LexError (buf.pos, s))
let illegal buf c =
Char.escaped c
|> Printf.sprintf "unexpected character in expression: '%s'"
|> failwith buf
(** Regular expressions *)
let newline = [%sedlex.regexp? '\r' | '\n' | "\r\n" ]
(* TODO: I *could* crib JavaScript's definition of 'whitespace', expanding on Unicode category 'Zs'?
* <http://www.ecma-international.org/ecma-262/6.0/#table-32> *)
let whitespace = [%sedlex.regexp? ' ' | newline ]
(* FIXME: I'd really like to make this more multilingual, but I have no idea which Unicode
* categories or traits to use ... *)
let digit = [%sedlex.regexp? '0'..'9']
let letter = [%sedlex.regexp? 'A'..'Z' | 'a'..'z']
let special_initial = [%sedlex.regexp?
'!' | '$' | '%' | '&' | '*' | '/' | ':' | '<' | '=' | '>' | '?' | '^' | '_' | '~' ]
let initial = [%sedlex.regexp? letter | special_initial ]
let special_subsequent = [%sedlex.regexp? '+' | '-' | '.' | '@' ]
let subsequent = [%sedlex.regexp? initial | digit | special_subsequent ]
let peculiar_identifier = [%sedlex.regexp? '+' | '-' | "..." ]
let identifier = [%sedlex.regexp? initial, Star subsequent | peculiar_identifier ]
(** Swallow whitespace and comments. *)
let rec swallow_atmosphere buf =
match%sedlex buf with
| Plus whitespace -> swallow_atmosphere buf
| ";" -> swallow_comment buf
| _ -> ()
and swallow_comment buf =
match%sedlex buf with
| newline -> swallow_atmosphere buf
| any -> swallow_comment buf
| _ -> assert false
(** Return the next token. *)
let rec token buf =
swallow_atmosphere buf;
match%sedlex buf with
| eof -> EOF
| identifier -> IDENTIFIER (LexBuffer.Utf8.lexeme buf)
(* parenths *)
| '(' -> LEFT_PAREN
| ')' -> RIGHT_PAREN
(* YOUR TOKENS HERE... *)
| _ -> illegal buf (Char.chr (next buf))
(* wrapper around `token` that records start and end locations *)
let loc_token buf =
let () = swallow_atmosphere buf in (* dispose of garbage before recording start location *)
let loc_start = next_loc buf in
let t = token buf in
let loc_end = next_loc buf in
(t, loc_start, loc_end)
You can’t perform that action at this time.