From 9a56be647bd0faa258a78fa2ae9225afca5b316c Mon Sep 17 00:00:00 2001 From: Matthew Fluet Date: Fri, 2 Jun 2017 19:57:28 -0400 Subject: [PATCH] Make ml.lex and ml.grm more closely follow the Definition of SML The lexer now properly distinguishes between alphanumeric and symbolic and short and long identifiers. The parser uses the fine-grained identifier classes to properly define vid, longvid, tycon, longtycon, field, strid, longstrid, sigid, and fctid classes. This fixes a minor bug where MLton erroneously accepted symbolic identifiers for strid, sigid, and fctid and erroneously accepted symbolic identifiers before "." in long identifiers. This also further extends the treatment of `=` as a special identifier by allowing including `=` in the vid class and equating the con class with the vid class. Thus, the program: datatype t = = of unit does not give a parse error, but does give a "Special identifier cannot be redefined by datata: =." error. This almost allows `=` everywhere a vid is allowed. The exception is that a vidNoEquals class is used (as was previously) in atomic patterns. The issue is with val pat = exp and fun f apat apat ... apat = exp phrases, where an `=` in the pattern is difficult to distinguish from the `=` terminating the pattern(s). It would be possible to have a set of *NoEqual pattern productions that excluded the `=` vid, which would be used for these patterns appearing in declarations. --- mlton/front-end/ml.grm | 278 ++++++++++++++++-------------- mlton/front-end/ml.lex | 16 +- regression/fail/special-ids.2.sml | 31 +++- 3 files changed, 192 insertions(+), 133 deletions(-) diff --git a/mlton/front-end/ml.grm b/mlton/front-end/ml.grm index 85a0bda161..04b332d9c3 100644 --- a/mlton/front-end/ml.grm +++ b/mlton/front-end/ml.grm @@ -141,8 +141,6 @@ structure Spec = reg (Sharing {spec = seq (s, spec), equations = equations}) | _ => reg (Seq (s, s')) end - -(* val seq = Trace.trace2 ("Spec.seq", layout, layout, layout) seq *) end fun consTopdec (d, dss) = @@ -190,6 +188,18 @@ type vb = {pat: Pat.t, type rvb = {pat: Pat.t, match: Match.t} +fun longIdFromTok (s, left, right) = + let + val syms = List.map (String.split (s, #"."), Symbol.fromString) + in + (syms, reg (left, right)) + end + +fun shortIdFromTok (s, left, right) = + (Symbol.fromString s, reg (left, right)) + +fun longIdFromShortId (sym, reg) = ([sym], reg) + fun ensureNonqualified (ss: Symbol.t list, r: Region.t): Symbol.t * Region.t = case ss of [s] => (s, r) @@ -226,7 +236,10 @@ type 'a whereAnd = wherespec list * SourcePos.t * 'a list extended: bool, negate: bool, radix: StringCvt.radix} - | LONGID of string + | SHORTALPHANUMID of string + | SHORTSYMID of string + | LONGALPHANUMID of string + | LONGSYMID of string | REAL of string | STRING of IntInf.t vector | TYVAR of string @@ -304,24 +317,22 @@ type 'a whereAnd = wherespec list * SourcePos.t * 'a list | funbinds'1' of funbind whereAnd | funbinds'2 of funbind list | funs of clauses list - | id of Symbol.t * Region.t - | idEqual of Symbol.t * Region.t - | idNoAsterisk of Symbol.t * Region.t + | idField of Symbol.t * Region.t | int of IntInf.t | longcon of Longcon.t - | longid of Symbol.t list * Region.t - | longidEqual of Symbol.t list * Region.t - | longidNoAsterisk of Symbol.t list * Region.t + | longAlphanumId of Symbol.t list * Region.t + | longSymId of Symbol.t list * Region.t | longstrid of Longstrid.t | longstrideqns of Longstrid.t list | longstrids of Longstrid.t list | longtycon of Longtycon.t | longtyconeqns of Longtycon.t list - | longvidands of Longvid.t list | longvid of Longvid.t + | longvidEqual of Longvid.t | longvidNoEqual of Longvid.t + | longvidands of Longvid.t list | match of Match.t - | numeric of int + | numericField of int | opaspat of Pat.t option | opcon of Con.t | optbar of unit @@ -341,6 +352,8 @@ type 'a whereAnd = wherespec list * SourcePos.t * 'a list | sdecs of Dec.t | sdecsPlus of Dec.t | sharespec of Equation.node + | shortAlphanumId of Symbol.t * Region.t + | shortSymId of Symbol.t * Region.t | sigbinds of sigbind list | sigbinds' of sigbind whereAnd | sigbinds'' of sigbind whereAnd @@ -404,8 +417,8 @@ type 'a whereAnd = wherespec list * SourcePos.t * 'a list | valbindTop of vb vector * rvb vector | valdesc of valdesc | valdescs of valdesc list - | var of Var.t | vid of Vid.t + | vidEqual of Vid.t | vidNoEqual of Vid.t | vids of Vid.t list | wherespec of wherespec @@ -447,11 +460,11 @@ type 'a whereAnd = wherespec list * SourcePos.t * 'a list %change -> VAL | -> THEN | -> ELSE | -> LPAREN | -> SEMICOLON | DARROW -> EQUALOP | EQUALOP -> DARROW | AND -> ANDALSO | COLON -> OF | SEMICOLON -> COMMA | COMMA -> SEMICOLON | - -> IN LONGID END | -> ELSE LONGID + -> IN SHORTALPHANUMID END | -> ELSE SHORTALPHANUMID %value CHAR (IntInf.fromInt (Char.ord #"a")) %value INT ({digits = "0", extended = false, negate = false, radix = StringCvt.DEC}) -%value LONGID ("bogus") +%value SHORTALPHANUMID ("bogus") %value REAL ("13.0") %value STRING (Vector.fromList []) %value TYVAR ("'a") @@ -566,10 +579,9 @@ strexp2 : strexp2node (Strexp.makeRegion' (strexp2node, strexp2nodeleft, strexp2noderight)) strexp2node - : longid (Strexp.Var (Longstrid.fromSymbols longid)) + : longstrid (Strexp.Var longstrid) | STRUCT strdecs END (Strexp.Struct strdecs) - | longid arg_fct - (Strexp.App (Fctid.fromSymbol (ensureNonqualified longid), arg_fct)) + | fctid arg_fct (Strexp.App (fctid, arg_fct)) | LET strdecs IN strexp END (Strexp.Let (strdecs,strexp)) arg_fct : LPAREN strexp RPAREN (strexp) @@ -675,7 +687,7 @@ typdesc : tyvars tycon ({tyvars = tyvars, valdescs : valdesc ([valdesc]) | valdesc AND valdescs (valdesc :: valdescs) -valdesc : var COLON ty (var, ty) +valdesc : vid COLON ty (Vid.toVar vid, ty) exndescs : exndesc ([exndesc]) | exndesc AND exndescs (exndesc :: exndescs) @@ -745,9 +757,9 @@ decnolocal | OPEN longstrids (Dec.Open (Vector.fromList longstrids)) | fixity vids (Dec.Fix {fixity = fixity, ops = Vector.fromList vids}) - | OVERLOAD priority var COLON ty AS longvidands + | OVERLOAD priority vid COLON ty AS longvidands (Dec.Overload (priority, - var, + Vid.toVar vid, Vector.new0 (), ty, Vector.fromList longvidands)) @@ -897,7 +909,7 @@ digit : INT end end) -numeric : INT +numericField : INT (let val {digits, extended, negate, radix} = INT fun err () = @@ -970,28 +982,28 @@ rules : rule ([rule]) rule : pat DARROW exp ((pat,exp)) elabel : field EQUALOP exp (field, (reg (fieldleft, fieldright), exp)) - | vidNoEqual constraint (if allowRecordPunExps () + | idField constraint (if allowRecordPunExps () then () - else error (reg (vidNoEqualleft, vidNoEqualright), "Record punning expressions disallowed, compile with -default-ann 'allowRecordPunExps true'") - ; (Field.Symbol (Vid.toSymbol vidNoEqual), - (reg (vidNoEqualleft, vidNoEqualright), + else error (reg (idFieldleft, idFieldright), "Record punning expressions disallowed, compile with -default-ann 'allowRecordPunExps true'") + ; (Field.Symbol (#1 idField), + (reg (idFieldleft, idFieldright), let val exp = Exp.makeRegion' (Exp.FlatApp (Vector.new1 (Exp.makeRegion' - (Exp.Var {name = Longvid.short vidNoEqual, + (Exp.Var {name = Longvid.short (Vid.fromSymbol idField), fixop = Fixop.None}, - vidNoEqualleft, vidNoEqualright))), - vidNoEqualleft, vidNoEqualright) + idFieldleft, idFieldright))), + idFieldleft, idFieldright) val exp = case constraint of NONE => exp | SOME ty => Exp.makeRegion' (Exp.Constraint (exp, ty), - vidNoEqualleft, constraintright) + idFieldleft, constraintright) in exp end))) @@ -1085,11 +1097,11 @@ aexp : OP longvid (Exp.Var {name = longvid, ieattributes : ([]) - | id ieattributes + | shortAlphanumId ieattributes (let - val id = Symbol.toString (#1 id) + val (id, reg) = shortAlphanumId in - case id of + case Symbol.toString id of "cdecl" => PrimKind.ImportExportAttribute.Cdecl :: ieattributes | "external" => PrimKind.ImportExportAttribute.External :: ieattributes | "impure" => PrimKind.ImportExportAttribute.Impure :: ieattributes @@ -1099,24 +1111,24 @@ ieattributes | "reentrant" => PrimKind.ImportExportAttribute.Reentrant :: ieattributes | "runtime" => PrimKind.ImportExportAttribute.Runtime :: ieattributes | "stdcall" => PrimKind.ImportExportAttribute.Stdcall :: ieattributes - | _ => (error (reg (idleft, idright), concat ["invalid attribute: ", id]) - ; ieattributes) + | id => (error (reg, concat ["invalid attribute: ", id]) + ; ieattributes) end) symattributes : ([]) - | id symattributes + | shortAlphanumId symattributes (let - val id = Symbol.toString (#1 id) + val (id, reg) = shortAlphanumId in - case id of + case Symbol.toString id of "alloc" => PrimKind.SymbolAttribute.Alloc :: symattributes | "external" => PrimKind.SymbolAttribute.External :: symattributes | "private" => PrimKind.SymbolAttribute.Private :: symattributes | "public" => PrimKind.SymbolAttribute.Public :: symattributes - | _ => (error (reg (idleft, idright), concat ["invalid attribute: ", id]) - ; symattributes) + | id => (error (reg, concat ["invalid attribute: ", id]) + ; symattributes) end) exp_2c : exp COMMA exp_2c (exp :: exp_2c) @@ -1139,45 +1151,49 @@ cpatnode : cpat AS cpat (Pat.makeAs (cpat1, cpat2)) | cpat COLON ty (Pat.Constraint (cpat, ty)) | apats (Pat.FlatApp (Vector.fromList apats)) -apats : apat ([apat]) - | apat apats (apat :: apats) +apats : apat ([apat]) + | apat apats (apat :: apats) -apat : apatnode (Pat.makeRegion' (apatnode, apatnodeleft, apatnoderight)) +apat + : apatnode (Pat.makeRegion' (apatnode, + apatnodeleft, + apatnoderight)) -apatnode : longvidNoEqual (Pat.Var {name = longvidNoEqual, - fixop = Fixop.None}) - | OP longvid (Pat.Var {name = longvid, - fixop = Fixop.Op}) - | const - (let - val _ = - case Const.node const of - Const.Real r => - let - open Layout - in - Control.error - (Const.region const, - seq [str "real constants are not allowed in patterns: ", - Const.layout const], - empty) - end - | _ => () - in - Pat.Const const - end) - | WILD (Pat.Wild) - | LPAREN pats RPAREN (Pat.tuple (Vector.fromList pats)) - | LBRACKET pats RBRACKET (Pat.List (Vector.fromList pats)) - | HASHLBRACKET pats RBRACKET (Pat.Vector (Vector.fromList pats)) - | LBRACE RBRACE (Pat.unit) - | LBRACE patitems RBRACE - (let - val (items, flexible) = patitems - in - Pat.Record {flexible = flexible, - items = Vector.fromList items} - end) +apatnode + : longvidNoEqual (Pat.Var {name = longvidNoEqual, + fixop = Fixop.None}) + | OP longvid (Pat.Var {name = longvid, + fixop = Fixop.Op}) + | const + (let + val _ = + case Const.node const of + Const.Real r => + let + open Layout + in + Control.error + (Const.region const, + seq [str "real constants are not allowed in patterns: ", + Const.layout const], + empty) + end + | _ => () + in + Pat.Const const + end) + | WILD (Pat.Wild) + | LPAREN pats RPAREN (Pat.tuple (Vector.fromList pats)) + | LBRACKET pats RBRACKET (Pat.List (Vector.fromList pats)) + | HASHLBRACKET pats RBRACKET (Pat.Vector (Vector.fromList pats)) + | LBRACE RBRACE (Pat.unit) + | LBRACE patitems RBRACE + (let + val (items, flexible) = patitems + in + Pat.Record {flexible = flexible, + items = Vector.fromList items} + end) pats : ([]) | pat commapats (pat :: commapats) @@ -1197,10 +1213,10 @@ patitems : patitem COMMA patitems (let val (items, f) = patitems patitem : field EQUALOP pat ((field, reg (fieldleft, fieldright), Pat.Item.Field pat)) - | vidNoEqual constraint opaspat - (Field.Symbol (Vid.toSymbol vidNoEqual), - reg (vidNoEqualleft, vidNoEqualright), - Pat.Item.Vid (vidNoEqual, constraint, opaspat)) + | vid constraint opaspat + (Field.Symbol (Vid.toSymbol vid), + reg (vidleft, vidright), + Pat.Item.Vid (vid, constraint, opaspat)) opaspat : (NONE) | AS pat (SOME pat) @@ -1261,13 +1277,14 @@ optsemicolon constOrBool : const (const) - | id (let - fun ok b = Const.makeRegion (Const.Bool b, reg (idleft, idright)) + | shortAlphanumId + (let + fun ok b = Const.makeRegion (Const.Bool b, #2 shortAlphanumId) in - case Symbol.toString (#1 id) of + case Symbol.toString (#1 shortAlphanumId) of "false" => ok false | "true" => ok true - | s => (error (#2 id, concat ["unknown boolean constant: ", s]) + | s => (error (#2 shortAlphanumId, concat ["unknown boolean constant: ", s]) ; ok false) end) @@ -1284,53 +1301,64 @@ string : STRING (CharVector.tabulate (Vector.length STRING, fn i => Char.fromInt (Int.fromIntInf (Vector.sub (STRING, i))))) -idNoAsterisk : longidNoAsterisk (ensureNonqualified longidNoAsterisk) - -id : idNoAsterisk (idNoAsterisk) - | ASTERISK ((Symbol.asterisk, reg (ASTERISKleft, ASTERISKright))) -idEqual : id (id) - | EQUALOP ((Symbol.equal, reg (EQUALOPleft, EQUALOPright))) +shortAlphanumId + : SHORTALPHANUMID + (shortIdFromTok (SHORTALPHANUMID, SHORTALPHANUMIDleft, SHORTALPHANUMIDright)) +shortSymId + : SHORTSYMID + (shortIdFromTok (SHORTSYMID, SHORTSYMIDleft, SHORTSYMIDright)) +longAlphanumId + : LONGALPHANUMID + (longIdFromTok (LONGALPHANUMID, LONGALPHANUMIDleft, LONGALPHANUMIDright)) +longSymId + : LONGSYMID + (longIdFromTok (LONGSYMID, LONGSYMIDleft, LONGSYMIDright)) + +vidNoEqual : shortAlphanumId (Vid.fromSymbol shortAlphanumId) + | shortSymId (Vid.fromSymbol shortSymId) + | ASTERISK (Vid.fromSymbol (Symbol.asterisk, + reg (ASTERISKleft, ASTERISKright))) +vidEqual : EQUALOP (Vid.fromSymbol (Symbol.equal, + reg (EQUALOPleft, EQUALOPright))) +vid : vidNoEqual (vidNoEqual) + | vidEqual (vidEqual) +longvidNoEqual : vidNoEqual (Longvid.short vidNoEqual) + | longAlphanumId (Longvid.fromSymbols longAlphanumId) + | longSymId (Longvid.fromSymbols longSymId) +longvidEqual : vidEqual (Longvid.short vidEqual) +longvid : longvidNoEqual (longvidNoEqual) + | longvidEqual (longvidEqual) + +con : vid (Vid.toCon vid) +longcon : longvid (Longvid.toLongcon longvid) + +tyvar : TYVAR (Tyvar.newString (TYVAR, {left = TYVARleft, right = TYVARright})) + +tycon : shortAlphanumId (Tycon.fromSymbol shortAlphanumId) + | shortSymId (Tycon.fromSymbol shortSymId) +longtycon : tycon (Longtycon.short tycon) + | longAlphanumId (Longtycon.fromSymbols longAlphanumId) + +idField : shortAlphanumId (shortAlphanumId) + | shortSymId (shortSymId) + | ASTERISK ((Symbol.asterisk, + reg (ASTERISKleft, ASTERISKright))) +field : idField (Field.Symbol (#1 idField)) + | numericField (Field.Int (numericField - 1)) + +strid : shortAlphanumId (Strid.fromSymbol shortAlphanumId) +longstrid : strid (Longstrid.short strid) + | longAlphanumId (Longstrid.fromSymbols longAlphanumId) + +sigid : shortAlphanumId (Sigid.fromSymbol shortAlphanumId) +fctid : shortAlphanumId (Fctid.fromSymbol shortAlphanumId) -longid - : longidNoAsterisk (longidNoAsterisk) - | ASTERISK (([Symbol.asterisk], reg (ASTERISKleft, ASTERISKright))) - -longidNoAsterisk - : LONGID - (let - val syms = List.map (String.split (LONGID, #"."), Symbol.fromString) - in - (syms, reg (LONGIDleft, LONGIDright)) - end) - -longidEqual : longid (longid) - | EQUALOP (([Symbol.equal], reg (EQUALOPleft, EQUALOPright))) - -vid : idEqual (Vid.fromSymbol idEqual) -vidNoEqual : id (Vid.fromSymbol id) vids : vid ([vid]) | vid vids (vid::vids) -var : idEqual (Var.fromSymbol idEqual) -con : id (Con.fromSymbol id) -tycon : idNoAsterisk (Tycon.fromSymbol idNoAsterisk) -tyvar : TYVAR (Tyvar.newString (TYVAR, {left = TYVARleft, - right = TYVARright})) -field : id (Field.Symbol (#1 id)) - | numeric (Field.Int (numeric - 1)) - (* numeric - 1 because fields are 0-based *) - -strid : id (Strid.fromSymbol id) -sigid : id (Sigid.fromSymbol id) + sigids : sigid ([sigid]) | sigid sigids (sigid :: sigids) -fctid : id (Fctid.fromSymbol id) - -longtycon : longidNoAsterisk (Longtycon.fromSymbols longidNoAsterisk) -longvid : longidEqual (Longvid.fromSymbols longidEqual) -longvidNoEqual : longid (Longvid.fromSymbols longid) -longcon : longid (Longcon.fromSymbols longid) -longstrid : longid (Longstrid.fromSymbols longid) longstrids : longstrid ([longstrid]) | longstrid longstrids (longstrid :: longstrids) diff --git a/mlton/front-end/ml.lex b/mlton/front-end/ml.lex index 1b874a5eee..3c567ebad1 100644 --- a/mlton/front-end/ml.lex +++ b/mlton/front-end/ml.lex @@ -263,11 +263,12 @@ eol=({cr}{nl}|{nl}|{cr}); alphanum=[A-Za-z0-9'_]; alphanumId=[A-Za-z]{alphanum}*; -tyvarId="'"{alphanum}*; sym="!"|"%"|"&"|"$"|"#"|"+"|"-"|"/"|":"|"<"|"="|">"|"?"|"@"|"\\"|"~"|"`"|"^"|"|"|"*"; symId={sym}+; -id={alphanumId}|{symId}; -longId={id}("."{id})*; + +tyvarId="'"{alphanum}*; +longSymId=({alphanumId}".")+{symId}; +longAlphanumId=({alphanumId}".")+{alphanumId}; decDigit=[0-9]; decnum={decDigit}("_"*{decDigit})*; @@ -356,11 +357,14 @@ real=(~?)(({decnum}{frac}?{exp})|({decnum}{frac}{exp}?)); "withtype" => (tok (Tokens.WITHTYPE, yytext, source, yypos)); -{tyvarId} => (tok' (Tokens.TYVAR, yytext, source, yypos)); -{longId} => +{alphanumId} => (tok' (Tokens.SHORTALPHANUMID, yytext, source, yypos)); +{symId} => (case yytext of "*" => tok (Tokens.ASTERISK, yytext, source, yypos) - | _ => tok' (Tokens.LONGID, yytext, source, yypos)); + | _ => tok' (Tokens.SHORTSYMID, yytext, source, yypos)); +{tyvarId} => (tok' (Tokens.TYVAR, yytext, source, yypos)); +{longAlphanumId} => (tok' (Tokens.LONGALPHANUMID, yytext, source, yypos)); +{longSymId} => (tok' (Tokens.LONGSYMID, yytext, source, yypos)); {real} => diff --git a/regression/fail/special-ids.2.sml b/regression/fail/special-ids.2.sml index 900c2fa8df..5f1aa11570 100644 --- a/regression/fail/special-ids.2.sml +++ b/regression/fail/special-ids.2.sml @@ -1,6 +1,33 @@ +signature SIG1 = + sig + datatype t = = of unit + end +signature SIG2 = + sig + exception = + end +signature SIG3 = + sig + val = : unit + end + +local +datatype t = = of unit +in end +local +exception = +in end + +local +val op= = () +in end + local -fun f op= () = () -fun op= () = () val rec f = fn op= => fn () => () val rec op= = fn () => () in end + +local +fun f op= () = () +fun op= () = () +in end