Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

On Windows, improve handling of Unicode in command line arguments and environment strings #528

Merged
merged 11 commits into from
Feb 11, 2017
Merged
6 changes: 6 additions & 0 deletions Configure.pl
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -512,6 +512,12 @@ ($$)
3rdparty: $thirdpartylibs 3rdparty: $thirdpartylibs
TERM TERM


# make sure to link with the correct entry point */
$config{mingw_unicode} = '';
if ($config{os} eq 'mingw32') {
$config{mingw_unicode} = '-municode';
}

# read list of files to generate # read list of files to generate


open my $listfile, '<', $GENLIST open my $listfile, '<', $GENLIST
Expand Down
4 changes: 2 additions & 2 deletions build/Makefile.in
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ RM_RF = $(PERL) -MExtUtils::Command -e rm_rf
RM_F = $(PERL) -MExtUtils::Command -e rm_f RM_F = $(PERL) -MExtUtils::Command -e rm_f
DYNASM = $(LUA) @dynasmlua@ DYNASM = $(LUA) @dynasmlua@



MINGW_UNICODE = @mingw_unicode@


CONFIG = @config@ CONFIG = @config@
ADDCONFIG = ADDCONFIG =
Expand Down Expand Up @@ -488,7 +488,7 @@ clangcheck gcccheck:


moar@exe@: $(MAIN_OBJECTS) @moar@ moar@exe@: $(MAIN_OBJECTS) @moar@
$(MSG) linking $@ $(MSG) linking $@
$(CMD)$(LD) @ldout@$@ $(LDFLAGS) $(MAIN_OBJECTS) $(MAIN_LIBS) $(CMD)$(LD) @ldout@$@ $(LDFLAGS) $(MINGW_UNICODE) $(MAIN_OBJECTS) $(MAIN_LIBS)


@moarlib@: $(OBJECTS) $(THIRDPARTY) @moarlib@: $(OBJECTS) $(THIRDPARTY)
$(MSG) linking $@ $(MSG) linking $@
Expand Down
35 changes: 24 additions & 11 deletions src/io/procops.c
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
extern char **environ; extern char **environ;
# endif # endif
#else #else
# include <process.h> #include <stdlib.h>
#endif #endif


#ifdef _WIN32 #ifdef _WIN32
Expand All @@ -33,7 +33,7 @@ static wchar_t * ANSIToUnicode(MVMuint16 acp, const char *str)
static char * UnicodeToUTF8(const wchar_t *str) static char * UnicodeToUTF8(const wchar_t *str)
{ {
const int len = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL); const int len = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
char * const result = (char *)MVM_malloc(len * sizeof(char)); char * const result = (char *)MVM_malloc(len + 1);


WideCharToMultiByte(CP_UTF8, 0, str, -1, result, len, NULL, NULL); WideCharToMultiByte(CP_UTF8, 0, str, -1, result, len, NULL, NULL);


Expand All @@ -49,29 +49,45 @@ static char * ANSIToUTF8(MVMuint16 acp, const char * str)
return result; return result;
} }


MVM_PUBLIC char **
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MVM_PUBLIC is not needed here, since it won't be used out of moarvm. :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At first I did not have MVM_PUBLIC, but then link failed. Maybe it is an issue with the ordering of filenames.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's needed because it's being called from main.c, which is an executable that dynamically links against moar.dll. We can mark it MVM_PUBLIC, but in that case must also follow the naming convention of having an MVM_ prefix, so we don't cause namespace pollution and make problems for anyone embedding MoarVM. Also I suggest it also be added to procops.h, which will eliminate the need for the decl in main.c.

MVM_UnicodeToUTF8_argv(const int argc, wchar_t **wargv)
{
int i;
char **argv = MVM_malloc((argc + 1) * sizeof(*argv));
for (i = 0; i < argc; ++i)
{
argv[i] = UnicodeToUTF8(wargv[i]);
}
argv[i] = NULL;
return argv;
}

#endif #endif


MVMObject * MVM_proc_getenvhash(MVMThreadContext *tc) { MVMObject * MVM_proc_getenvhash(MVMThreadContext *tc) {
MVMInstance * const instance = tc->instance; MVMInstance * const instance = tc->instance;
MVMObject * env_hash; MVMObject * env_hash;


#ifdef _WIN32
const MVMuint16 acp = GetACP(); /* We should get ACP at runtime. */
#endif
MVMuint32 pos = 0; MVMuint32 pos = 0;
MVMString *needle = MVM_string_ascii_decode(tc, instance->VMString, STR_WITH_LEN("=")); MVMString *needle = MVM_string_ascii_decode(tc, instance->VMString, STR_WITH_LEN("="));
#ifndef _WIN32
char *env; char *env;
#else
wchar_t *env;
(void) _wgetenv(L"windows"); /* populate _wenviron */
#endif


MVM_gc_root_temp_push(tc, (MVMCollectable **)&needle); MVM_gc_root_temp_push(tc, (MVMCollectable **)&needle);


env_hash = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_hash_type); env_hash = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_hash_type);
MVM_gc_root_temp_push(tc, (MVMCollectable **)&env_hash); MVM_gc_root_temp_push(tc, (MVMCollectable **)&env_hash);


while ((env = environ[pos++]) != NULL) {
#ifndef _WIN32 #ifndef _WIN32
while ((env = environ[pos++]) != NULL) {
MVMString *str = MVM_string_utf8_c8_decode(tc, instance->VMString, env, strlen(env)); MVMString *str = MVM_string_utf8_c8_decode(tc, instance->VMString, env, strlen(env));
#else #else
char * const _env = ANSIToUTF8(acp, env); while ((env = _wenviron[pos++]) != NULL) {
char * const _env = UnicodeToUTF8(env);
MVMString *str = MVM_string_utf8_c8_decode(tc, instance->VMString, _env, strlen(_env)); MVMString *str = MVM_string_utf8_c8_decode(tc, instance->VMString, _env, strlen(_env));
#endif #endif


Expand Down Expand Up @@ -1227,7 +1243,6 @@ MVMObject * MVM_proc_clargs(MVMThreadContext *tc) {
}); });
#else #else
MVMROOT(tc, clargs, { MVMROOT(tc, clargs, {
const MVMuint16 acp = GetACP();
const MVMint64 num_clargs = instance->num_clargs; const MVMint64 num_clargs = instance->num_clargs;
MVMint64 count; MVMint64 count;


Expand All @@ -1240,10 +1255,8 @@ MVMObject * MVM_proc_clargs(MVMThreadContext *tc) {


for (count = 0; count < num_clargs; count++) { for (count = 0; count < num_clargs; count++) {
char *raw_clarg = instance->raw_clargs[count]; char *raw_clarg = instance->raw_clargs[count];
char * const _tmp = ANSIToUTF8(acp, raw_clarg);
MVMString *string = MVM_string_utf8_c8_decode(tc, MVMString *string = MVM_string_utf8_c8_decode(tc,
instance->VMString, _tmp, strlen(_tmp)); instance->VMString, raw_clarg, strlen(raw_clarg));
MVM_free(_tmp);
boxed_str = MVM_repr_box_str(tc, boxed_str = MVM_repr_box_str(tc,
instance->boot_types.BOOTStr, string); instance->boot_types.BOOTStr, string);
MVM_repr_push_o(tc, clargs, boxed_str); MVM_repr_push_o(tc, clargs, boxed_str);
Expand Down
6 changes: 6 additions & 0 deletions src/io/procops.h
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -29,3 +29,9 @@ MVMint64 MVM_proc_time_i(MVMThreadContext *tc);
MVMObject * MVM_proc_clargs(MVMThreadContext *tc); MVMObject * MVM_proc_clargs(MVMThreadContext *tc);
MVMnum64 MVM_proc_time_n(MVMThreadContext *tc); MVMnum64 MVM_proc_time_n(MVMThreadContext *tc);
MVMString * MVM_executable_name(MVMThreadContext *tc); MVMString * MVM_executable_name(MVMThreadContext *tc);

#ifdef _WIN32
#include <wchar.h>
MVM_PUBLIC char ** MVM_UnicodeToUTF8_argv(const int argc, wchar_t **argv);
#endif

9 changes: 9 additions & 0 deletions src/main.c
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -97,18 +97,27 @@ static int parse_flag(const char *arg)
return UNKNOWN_FLAG; return UNKNOWN_FLAG;
} }


#ifndef _WIN32
int main(int argc, char *argv[]) int main(int argc, char *argv[])
#else
int wmain(int argc, wchar_t *wargv[])
#endif
{ {
MVMInstance *instance; MVMInstance *instance;
const char *input_file; const char *input_file;
const char *executable_name = NULL; const char *executable_name = NULL;
const char *lib_path[8]; const char *lib_path[8];


#ifdef _WIN32
char **argv = MVM_UnicodeToUTF8_argv(argc, wargv);
#endif

int dump = 0; int dump = 0;
int full_cleanup = 0; int full_cleanup = 0;
int argi = 1; int argi = 1;
int lib_path_i = 0; int lib_path_i = 0;
int flag; int flag;

for (; (flag = parse_flag(argv[argi])) != NOT_A_FLAG; ++argi) { for (; (flag = parse_flag(argv[argi])) != NOT_A_FLAG; ++argi) {
switch (flag) { switch (flag) {
case FLAG_CRASH: case FLAG_CRASH:
Expand Down