Skip to content

Commit

Permalink
Handle UTF8 BOM in the diff API
Browse files Browse the repository at this point in the history
  • Loading branch information
sjoelund committed Mar 17, 2021
1 parent e78b77a commit c324f25
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 7 deletions.
7 changes: 5 additions & 2 deletions OMCompiler/Compiler/Lexers/LexerModelicaDiff.mo
Expand Up @@ -43,10 +43,11 @@ end scan;

function scanString "Scan starts the lexical analysis, load the tables and consume the program to output the tokens"
input String fileSource "input source code file";
input String fileName = "<StringSource>";
output list<Token> tokens "return list of tokens";
output list<Token> errorTokens;
algorithm
(tokens, errorTokens) := lex("<StringSource>",fileSource);
(tokens, errorTokens) := lex(fileName,fileSource);
end scanString;


Expand Down Expand Up @@ -1688,13 +1689,15 @@ function reportErrors
input list<Token> tokens;
protected
Integer i=0;
String content;
algorithm
for t in tokens loop
i := i+1;
if i>10 then
Error.addMessage(Error.SCANNER_ERROR_LIMIT, {});
end if;
Error.addSourceMessage(Error.SCANNER_ERROR, {tokenContent(t)}, tokenSourceInfo(t));
content := tokenContent(t);
Error.addSourceMessage(Error.SCANNER_ERROR, {StringUtil.convertCharNonAsciiToHex(content)}, tokenSourceInfo(t));
end for;
if not listEmpty(tokens) then
fail();
Expand Down
6 changes: 4 additions & 2 deletions OMCompiler/Compiler/Lexers/lexerModelicaDiff.l
Expand Up @@ -340,7 +340,7 @@ algorithm
case (e as (_,TOKEN(id=TokenId.NEWLINE)))::(Diff.Add,TOKEN(id=TokenId.NEWLINE))::rest
then (false,e::rest,tmp);
case (e as (_,TOKEN(id=TokenId.NEWLINE)))::rest then (true,rest,e::tmp);
case (Diff.Add,TOKEN(id=TokenId.WHITESPACE))::(e as (Diff.Add,t))::rest guard lastIsNewline
case (Diff.Add,TOKEN(id=TokenId.WHITESPACE))::(e as (Diff.Add,_))::rest guard lastIsNewline
then (false,rest,e::
(Diff.Add,TOKEN("WHITESPACE",TokenId.WHITESPACE,sum(" " for i in 1:depth),1,depth,0,0,0,0))
::tmp);
Expand Down Expand Up @@ -487,13 +487,15 @@ function reportErrors
input list<Token> tokens;
protected
Integer i=0;
String content;
algorithm
for t in tokens loop
i := i+1;
if i>10 then
Error.addMessage(Error.SCANNER_ERROR_LIMIT, {});
end if;
Error.addSourceMessage(Error.SCANNER_ERROR, {tokenContent(t)}, tokenSourceInfo(t));
content := tokenContent(t);
Error.addSourceMessage(Error.SCANNER_ERROR, {StringUtil.convertCharNonAsciiToHex(content)}, tokenSourceInfo(t));
end for;
if not listEmpty(tokens) then
fail();
Expand Down
8 changes: 5 additions & 3 deletions OMCompiler/Compiler/Script/CevalScriptBackend.mo
Expand Up @@ -655,7 +655,7 @@ algorithm
title,xLabel,yLabel,filename2,varNameStr,xml_filename,xml_contents,visvar_str,pwd,omhome,omlib,omcpath,os,
platform,usercflags,senddata,res,workdir,gcc,confcmd,touch_file,uname,filenameprefix,compileDir,libDir,exeDir,configDir,from,to,
gridStr, logXStr, logYStr, x1Str, x2Str, y1Str, y2Str, curveWidthStr, curveStyleStr, legendPosition, footer, autoScaleStr,scriptFile,logFile, simflags2, outputFile,
systemPath, gccVersion, gd, strlinearizeTime, suffix,cname, modeldescriptionfilename, tmpDir, tmpFile;
systemPath, gccVersion, gd, strlinearizeTime, suffix,cname, modeldescriptionfilename, tmpDir, tmpFile, bom;
list<DAE.Exp> simOptions;
list<Values.Value> vals;
Absyn.Path path,classpath,className,baseClassPath;
Expand Down Expand Up @@ -1098,6 +1098,7 @@ algorithm
algorithm
ExecStat.execStatReset();

(s1, bom) := StringUtil.stripBOM(s1);
(tokens1, errorTokens) := scanString(s1);
reportErrors(errorTokens);

Expand All @@ -1121,7 +1122,8 @@ algorithm
fail();
end if;

tokens2 := scanString(s2);
(s2, bom) := StringUtil.stripBOM(s2);
(tokens2, errorTokens) := scanString(s2);
reportErrors(errorTokens);
ExecStat.execStat("diffModelicaFileListings scan string 2");
(_,parseTree2) := SimpleModelicaParser.stored_definition(tokens2, {});
Expand Down Expand Up @@ -1186,7 +1188,7 @@ algorithm
Error.addInternalError("Unknown diffModelicaFileListings choice", sourceInfo());
then fail();
end matchcontinue;
then (cache,Values.STRING(str));
then (cache,Values.STRING(bom + str));

case (cache,_,"diffModelicaFileListings",_,_) then (cache,Values.STRING(""));

Expand Down
28 changes: 28 additions & 0 deletions OMCompiler/Compiler/Util/StringUtil.mo
Expand Up @@ -386,5 +386,33 @@ algorithm
b := CHAR_NEWLINE == MetaModelica.Dangerous.stringGetNoBoundsChecking(str, stringLength(str));
end endsWithNewline;

function convertCharNonAsciiToHex "Converts a single character string to a hex representation if it is not valid unicode"
input output String s;
protected
Integer i;
constant String hex[:] = array("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F");
algorithm
i := stringCharInt(s);
if i < 128 then
return;
end if;
s := "0x" + hex[intDiv(i, 16)+1] + hex[intMod(i, 16)+1];
end convertCharNonAsciiToHex;

function stripBOM
input output String s;
output String bom = "";
algorithm
if stringLength(s) < 3 then
return;
end if;
if stringGet(s,1) == 239 and
stringGet(s,2) == 187 and
stringGet(s,3) == 191 then
s := substring(s, 4, stringLength(s));
bom := substring(s, 1, 3);
end if;
end stripBOM;

annotation(__OpenModelica_Interface="util");
end StringUtil;

0 comments on commit c324f25

Please sign in to comment.