From e0fb08fac390769c471b1453ee7566ce3b6ebd65 Mon Sep 17 00:00:00 2001 From: pad Date: Thu, 21 Oct 2010 14:16:02 -0700 Subject: [PATCH] lang_lisp: simple lexer-based highlighter --- lang_lisp/analyze/highlight_lisp.ml | 186 +++++++++++++++++++++++++++ lang_lisp/analyze/highlight_lisp.mli | 7 + lang_lisp/parsing/parser_lisp.ml | 4 + lang_ml/analyze/highlight_ml.ml | 7 +- tests/lisp/foo.lisp | 2 + visual/Makefile | 2 + visual/draw2.ml | 1 + visual/parsing2.ml | 47 ++++++- visual/parsing2.mli | 5 +- 9 files changed, 253 insertions(+), 8 deletions(-) create mode 100644 lang_lisp/analyze/highlight_lisp.mli diff --git a/lang_lisp/analyze/highlight_lisp.ml b/lang_lisp/analyze/highlight_lisp.ml index e69de29bb..3711e52c7 100644 --- a/lang_lisp/analyze/highlight_lisp.ml +++ b/lang_lisp/analyze/highlight_lisp.ml @@ -0,0 +1,186 @@ +(* Yoann Padioleau + * + * Copyright (C) 2010 Facebook + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * version 2.1 as published by the Free Software Foundation, with the + * special exception on linking described in file license.txt. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file + * license.txt for more details. + *) + +open Common + +open Ast_lisp + +module Ast = Ast_lisp +module PI = Parse_info + +open Highlight_code + +module T = Parser_lisp + +(*****************************************************************************) +(* Prelude *) +(*****************************************************************************) + +(*****************************************************************************) +(* Helpers when have global analysis information *) +(*****************************************************************************) + +let fake_no_def2 = NoUse +let fake_no_use2 = (NoInfoPlace, UniqueDef, MultiUse) + +(*****************************************************************************) +(* Code highlighter *) +(*****************************************************************************) + +let visit_toplevel ~tag_hook prefs (toplevel, toks) = + + let already_tagged = Hashtbl.create 101 in + let tag = (fun ii categ -> + tag_hook ii categ; + Hashtbl.add already_tagged ii true + ) + in + + (* -------------------------------------------------------------------- *) + (* ast phase 1 *) + (* -------------------------------------------------------------------- *) + + (* -------------------------------------------------------------------- *) + (* toks phase 1 *) + (* -------------------------------------------------------------------- *) + + (* + * note: all TCommentSpace are filtered in xs so easier to write + * rules (but regular comments are kept as well as newlines). + *) + + let rec aux_toks xs = + match xs with + | [] -> () + + (* a little bit pad specific *) + | T.TComment(ii) + ::T.TCommentNewline (ii2) + ::T.TComment(ii3) + ::T.TCommentNewline (ii4) + ::T.TComment(ii5) + ::xs -> + + let s = PI.str_of_info ii in + let s5 = PI.str_of_info ii5 in + (match () with + | _ when s =~ ".*\\*\\*\\*\\*" && s5 =~ ".*\\*\\*\\*\\*" -> + tag ii CommentEstet; + tag ii5 CommentEstet; + tag ii3 CommentSection1 + | _ when s =~ ".*------" && s5 =~ ".*------" -> + tag ii CommentEstet; + tag ii5 CommentEstet; + tag ii3 CommentSection2 + | _ when s =~ ".*####" && s5 =~ ".*####" -> + tag ii CommentEstet; + tag ii5 CommentEstet; + tag ii3 CommentSection0 + | _ -> + () + ); + aux_toks (T.TComment ii3::T.TCommentNewline ii4::T.TComment ii5::xs) + + + | x::xs -> + aux_toks xs + in + let toks' = toks +> Common.exclude (function + | T.TCommentSpace _ -> true + | _ -> false + ) + in + aux_toks toks'; + + (* -------------------------------------------------------------------- *) + (* toks phase 2 *) + + toks +> List.iter (fun tok -> + match tok with + | T.TComment ii -> + if not (Hashtbl.mem already_tagged ii) + then + (* a little bit syncweb specific *) + let s = PI.str_of_info ii in + if s =~ "(\\*[sex]:" (* yep, s e x are the syncweb markers *) + then tag ii CommentSyncweb + else tag ii Comment + + | T.TCommentNewline ii | T.TCommentSpace ii + -> () + + | T.TUnknown ii + -> tag ii Error + | T.EOF ii + -> () + + | T.TString (s,ii) -> + tag ii String + + | T.TNumber (s,ii) -> + tag ii Number + + | T.TIdent (s, ii) -> + (match s with + | "defun" + -> tag ii Keyword + | "setq" + -> tag ii KeywordObject (* hmm not really *) + + | "t" -> + tag ii Boolean + | "nil" -> + tag ii Boolean (* or Null ? *) + + | "cond" | "if" -> + tag ii KeywordConditional + + | "concat" + | "getenv" + -> tag ii Builtin + + | _ -> () + ) + + + | T.TCBracket ii + | T.TOBracket ii + -> tag ii TypeVoid (* TODO *) + + | T.TCParen ii + | T.TOParen ii + -> tag ii Punctuation + + | T.TQuote ii -> + tag ii EmbededHtml (* quote stuff is kind of like XHP after all *) + + | T.TAt ii + | T.TComma ii + | T.TBackQuote ii + -> + tag ii EmbededHtml (* quote stuff is kind of like XHP after all *) + + + + ); + + (* -------------------------------------------------------------------- *) + (* ast phase 2 *) + + () + + + + diff --git a/lang_lisp/analyze/highlight_lisp.mli b/lang_lisp/analyze/highlight_lisp.mli new file mode 100644 index 000000000..db3324dd2 --- /dev/null +++ b/lang_lisp/analyze/highlight_lisp.mli @@ -0,0 +1,7 @@ + +val visit_toplevel : + tag_hook: + (Ast_lisp.info -> Highlight_code.category -> unit) -> + Highlight_code.highlighter_preferences -> + Ast_lisp.toplevel * Parser_lisp.token list -> + unit diff --git a/lang_lisp/parsing/parser_lisp.ml b/lang_lisp/parsing/parser_lisp.ml index 7682c00db..ebe2a8e5f 100644 --- a/lang_lisp/parsing/parser_lisp.ml +++ b/lang_lisp/parsing/parser_lisp.ml @@ -15,6 +15,8 @@ open Common +module PI = Parse_info + (*****************************************************************************) (* Types *) (*****************************************************************************) @@ -88,3 +90,5 @@ let info_of_tok tok = visitor_info_of_tok (fun ii -> res := Some ii; ii) tok +> ignore; Common.some !res + +let str_of_tok x = PI.str_of_info (info_of_tok x) diff --git a/lang_ml/analyze/highlight_ml.ml b/lang_ml/analyze/highlight_ml.ml index 04d246c5d..3e413740d 100644 --- a/lang_ml/analyze/highlight_ml.ml +++ b/lang_ml/analyze/highlight_ml.ml @@ -71,13 +71,8 @@ let disable_token_phase2 = false * number and basic entities. The Ast is better for tagging idents * to figure out what kind of ident it is. *) +let visit_toplevel ~tag_hook prefs (*db_opt *) (toplevel, toks) = -let visit_toplevel - ~tag_hook - prefs - (*db_opt *) - (toplevel, toks) - = let already_tagged = Hashtbl.create 101 in let tag = (fun ii categ -> tag_hook ii categ; diff --git a/tests/lisp/foo.lisp b/tests/lisp/foo.lisp index a76b15c52..bb34da981 100644 --- a/tests/lisp/foo.lisp +++ b/tests/lisp/foo.lisp @@ -5,3 +5,5 @@ (setq debug-on-error t) ;or --debug-init (defun h(s) (concat (getenv "HOME") "/" s)) + +(setq bar 1001) diff --git a/visual/Makefile b/visual/Makefile index 710aa1dc5..a64be5308 100644 --- a/visual/Makefile +++ b/visual/Makefile @@ -38,6 +38,8 @@ INCLUDEDIRS=$(TOP)/commons $(TOP)/globals \ $(TOP)/lang_ml/analyze \ $(TOP)/lang_nw/parsing \ $(TOP)/lang_nw/analyze \ + $(TOP)/lang_lisp/parsing \ + $(TOP)/lang_lisp/analyze \ $(TOP)/lang_cpp/parsing \ $(TOP)/lang_cpp/analyze \ $(TOP)/lang_js/parsing \ diff --git a/visual/draw2.ml b/visual/draw2.ml index 3e38624d4..f0889d068 100644 --- a/visual/draw2.ml +++ b/visual/draw2.ml @@ -381,6 +381,7 @@ let draw_content2 ~cr ~layout ~context ~nblines ~file rect = | FT.PL (FT.Cplusplus | FT.C) | FT.PL (FT.Thrift) | FT.Text ("nw" | "tex" | "texi" | "web") + | FT.PL (FT.Lisp _) ) -> let column = ref 0 in diff --git a/visual/parsing2.ml b/visual/parsing2.ml index 226bf6db7..39eacaf7e 100644 --- a/visual/parsing2.ml +++ b/visual/parsing2.ml @@ -35,10 +35,13 @@ open Highlight_code *) type ast = | ML of Parse_ml.program2 + | Php of Parse_php.program2 - | Cpp of Parse_cpp.program2 | Js of Parse_js.program2 + | Cpp of Parse_cpp.program2 + + | Lisp of Parse_lisp.program2 | Noweb of Parse_nw.program2 @@ -60,6 +63,14 @@ let parse_nw_cache a = match parse_nw2 a with | Noweb a -> a | _ -> raise Impossible ) +let parse_lisp2 file = + Common.memoized _hmemo_file file (fun () -> + Lisp (Parse_lisp.parse file +> fst)) +let parse_lisp_cache a = + Common.profile_code "View.parse_lisp_cache" (fun () -> + match parse_lisp2 a with | Lisp a -> a | _ -> raise Impossible + ) + let parse_php2 file = Common.memoized _hmemo_file file (fun () -> @@ -244,6 +255,40 @@ let tokens_with_categ_of_file file hentities = ) ) +> List.flatten + | FT.PL (FT.Lisp _) -> + let h = Hashtbl.create 101 in + + let ast2 = parse_lisp_cache file in + ast2 +> List.map (fun (ast, (_str, toks)) -> + (* computing the token attributes *) + Highlight_lisp.visit_toplevel + ~tag_hook:(fun info categ -> Hashtbl.add h info categ) + prefs + (ast, toks) + ; + + (* getting the text *) + toks |> Common.map_filter (fun tok -> + let info = Parser_lisp.info_of_tok tok in + let s = Parser_lisp.str_of_tok tok in + + if not (Parse_info.is_origintok info) + then None + else + let categ = Common.hfind_option info h in + let categ = categ +> Common.fmap (fun categ -> + rewrite_categ_using_entities s categ file hentities + ) + in + Some (s, categ, + { l = Parse_info.line_of_info info; + c = Parse_info.col_of_info info; + }) + + ) + ) +> List.flatten + + | FT.Text ("nw" | "tex" | "texi" | "web") -> let h = Hashtbl.create 101 in diff --git a/visual/parsing2.mli b/visual/parsing2.mli index 26c8365c1..964ed5296 100644 --- a/visual/parsing2.mli +++ b/visual/parsing2.mli @@ -9,10 +9,13 @@ val use_arity_of_use_count : int -> Highlight_code.use_arity type ast = | ML of Parse_ml.program2 + | Php of Parse_php.program2 - | Cpp of Parse_cpp.program2 | Js of Parse_js.program2 + | Cpp of Parse_cpp.program2 + + | Lisp of Parse_lisp.program2 | Noweb of Parse_nw.program2 val _hmemo_file : (Common.filename, ast) Hashtbl.t