Skip to content

Commit

Permalink
Allow parsing of non-UTF-8 files (give a warning, fall back to 7-bit …
Browse files Browse the repository at this point in the history
…ASCII)

git-svn-id: https://openmodelica.org/svn/OpenModelica/trunk@17471 f25d12d1-65f4-0310-ae8a-bbce733d8d8e
  • Loading branch information
sjoelund committed Oct 1, 2013
1 parent eb30277 commit 4a5e40b
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 34 deletions.
35 changes: 16 additions & 19 deletions Compiler/runtime/systemimpl.c
Expand Up @@ -1834,26 +1834,17 @@ extern int SystemImpl__reopenStandardStream(int id,const char *filename)
return 1;
}

static char* SystemImpl__iconv__ascii(const char * str)
char* SystemImpl__iconv__ascii(const char * str)
{
static char *buf = 0;
static int buflen = 0;
char *buf = 0;
char *in_str,*res;
size_t sz,out_sz;
iconv_t ic;
int i;
sz = strlen(str);
if (buflen < sz) {
if (buf) free(buf);
buf = (char*)malloc(sz);
if (!buf) {
buflen = 0;
return (char*) "";
}
buflen = sz;
}
buf = malloc(sz+1);
*buf = 0;
for (i=0; i<sz; i++)
for (i=0; i<=sz; i++)
buf[i] = str[i] & 0x80 ? '?' : str[i];
return buf;
}
Expand All @@ -1880,9 +1871,12 @@ extern char* SystemImpl__iconv(const char * str, const char *from, const char *t
/* fprintf(stderr,"iconv(%s,to=%s,%s) of size %d, buflen %d\n",str,to,from,sz,buflen); */
ic = iconv_open(to, from);
if (ic == (iconv_t) -1) {
char *ignore = SystemImpl__iconv__ascii(str);
const char *tokens[4] = {strerror(errno),from,to,ignore};
if (printError) c_add_message(-1,ErrorType_scripting,ErrorLevel_error,gettext("iconv(\"%s\",to=\"%s\",from=\"%s\") failed: %s"),tokens,4);
if (printError) {
char *ignore = SystemImpl__iconv__ascii(str);
const char *tokens[4] = {strerror(errno),from,to,ignore};
c_add_message(-1,ErrorType_scripting,ErrorLevel_error,gettext("iconv(\"%s\",to=\"%s\",from=\"%s\") failed: %s"),tokens,4);
free(ignore);
}
return (char*) "";
}
in_str = (char*) str;
Expand All @@ -1891,9 +1885,12 @@ extern char* SystemImpl__iconv(const char * str, const char *from, const char *t
count = iconv(ic,&in_str,&sz,&res,&out_sz);
iconv_close(ic);
if (count == -1) {
char *ignore = SystemImpl__iconv__ascii(str);
const char *tokens[4] = {strerror(errno),from,to,ignore};
if (printError) c_add_message(-1,ErrorType_scripting,ErrorLevel_error,gettext("iconv(\"%s\",to=\"%s\",from=\"%s\") failed: %s"),tokens,4);
if (printError) {
char *ignore = SystemImpl__iconv__ascii(str);
const char *tokens[4] = {strerror(errno),from,to,ignore};
c_add_message(-1,ErrorType_scripting,ErrorLevel_error,gettext("iconv(\"%s\",to=\"%s\",from=\"%s\") failed: %s"),tokens,4);
free(ignore);
}
return (char*) "";
}
buf[(buflen-1)-out_sz] = 0;
Expand Down
1 change: 1 addition & 0 deletions Compiler/runtime/systemimpl.h
Expand Up @@ -97,5 +97,6 @@ extern char* SystemImpl__readFileNoNumeric(const char* filename);
extern double SystemImpl__getCurrentTime(void);
extern int SystemImpl__unescapedStringLength(const char* str);
extern char* SystemImpl__iconv(const char * str, const char *from, const char *to, int printError);
extern char* SystemImpl__iconv__ascii(const char * str);

#endif //__SYSTEMIMPL_H
33 changes: 18 additions & 15 deletions Parser/BaseModelica_Lexer.g
Expand Up @@ -318,35 +318,38 @@ STRING : '"' STRING_GUTS '"'
res = SystemImpl__iconv((const char*)text->chars,ModelicaParser_encoding,"UTF-8",0);
if (!*res) {
const char *strs[2];
signed char *buf = (signed char*) strdup((char*)text->chars);
signed char buf[76];
int len = strlen((const char*)buf), i;
res = SystemImpl__iconv__ascii((const char*)text->chars);
/* Avoid printing huge strings */
if (len > 75) {
len = 75;
buf[len] = 0;
buf[len-1] = '.';
buf[len-2] = '.';
buf[len-3] = '.';
len = 72;
buf[len+0] = '.';
buf[len+1] = '.';
buf[len+2] = '.';
buf[len+3] = '0';
}
for (i=0;i<len;i++) {
if (buf[i] < 0)
buf[i] = '?';
for (i=0;i<=len;i++) {
/* Don't break lines in the printed error-message */
if (buf[i] == '\n' || buf[i] == '\r')
if (res[i] == '\n' || res[i] == '\r') {
buf[i] = ' ';
} else {
buf[i] = res[i];
}
}
strs[0] = (const char*) buf;
strs[1] = ModelicaParser_encoding;
c_add_source_message(2, ErrorType_syntax, ErrorLevel_error, "The file was not encoded in \%s:\n \"\%s\".\n"
c_add_source_message(2, ErrorType_syntax, ErrorLevel_warning, "The file was not encoded in \%s:\n \"\%s\".\n"
" Defaulting to 7-bit ASCII with unknown characters replaced by '?'.\n"
" To change encoding when loading a file: loadFile(encoding=\"ISO-XXXX-YY\").\n"
" To change it in a package: add a file package.encoding at the top-level.\n"
" Note: The Modelica Language Specification only allows files encoded in UTF-8.",
strs, 2, $line, $pos+1, $line, $pos+len+1,
ModelicaParser_readonly, ModelicaParser_filename_C_testsuiteFriendly);
free(buf);
ModelicaParser_lexerError = ANTLR3_TRUE;
}
if (strcmp(ModelicaParser_encoding,"UTF-8")!=0) {
text->set8(text,res);
free(res);
/* ModelicaParser_lexerError = ANTLR3_TRUE; */
} else if (strcmp(ModelicaParser_encoding,"UTF-8")!=0) {
text->set8(text,res);
}
}
Expand Down

0 comments on commit 4a5e40b

Please sign in to comment.