Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

On Windows, improve handling of Unicode in command line arguments and environment strings #528

Merged
merged 11 commits into from Feb 11, 2017
@@ -512,6 +512,12 @@ ($$)
3rdparty: $thirdpartylibs 3rdparty: $thirdpartylibs
TERM TERM


# make sure to link with the correct entry point */
$config{mingw_unicode} = '';
if ($config{os} eq 'mingw32') {
$config{mingw_unicode} = '-municode';
}

# read list of files to generate # read list of files to generate


open my $listfile, '<', $GENLIST open my $listfile, '<', $GENLIST
@@ -14,7 +14,7 @@ RM_RF = $(PERL) -MExtUtils::Command -e rm_rf
RM_F = $(PERL) -MExtUtils::Command -e rm_f RM_F = $(PERL) -MExtUtils::Command -e rm_f
DYNASM = $(LUA) @dynasmlua@ DYNASM = $(LUA) @dynasmlua@



MINGW_UNICODE = @mingw_unicode@


CONFIG = @config@ CONFIG = @config@
ADDCONFIG = ADDCONFIG =
@@ -488,7 +488,7 @@ clangcheck gcccheck:


moar@exe@: $(MAIN_OBJECTS) @moar@ moar@exe@: $(MAIN_OBJECTS) @moar@
$(MSG) linking $@ $(MSG) linking $@
$(CMD)$(LD) @ldout@$@ $(LDFLAGS) $(MAIN_OBJECTS) $(MAIN_LIBS) $(CMD)$(LD) @ldout@$@ $(LDFLAGS) $(MINGW_UNICODE) $(MAIN_OBJECTS) $(MAIN_LIBS)


@moarlib@: $(OBJECTS) $(THIRDPARTY) @moarlib@: $(OBJECTS) $(THIRDPARTY)
$(MSG) linking $@ $(MSG) linking $@
@@ -16,7 +16,7 @@
extern char **environ; extern char **environ;
# endif # endif
#else #else
# include <process.h> #include <stdlib.h>
#endif #endif


#ifdef _WIN32 #ifdef _WIN32
@@ -33,7 +33,7 @@ static wchar_t * ANSIToUnicode(MVMuint16 acp, const char *str)
static char * UnicodeToUTF8(const wchar_t *str) static char * UnicodeToUTF8(const wchar_t *str)
{ {
const int len = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL); const int len = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
char * const result = (char *)MVM_malloc(len * sizeof(char)); char * const result = (char *)MVM_malloc(len + 1);


WideCharToMultiByte(CP_UTF8, 0, str, -1, result, len, NULL, NULL); WideCharToMultiByte(CP_UTF8, 0, str, -1, result, len, NULL, NULL);


@@ -49,29 +49,45 @@ static char * ANSIToUTF8(MVMuint16 acp, const char * str)
return result; return result;
} }


MVM_PUBLIC char **

This comment has been minimized.

Copy link
@zhuomingliang

zhuomingliang Feb 9, 2017

Member

MVM_PUBLIC is not needed here, since it won't be used out of moarvm. :)

This comment has been minimized.

Copy link
@nanis

nanis Feb 9, 2017

Author Contributor

At first I did not have MVM_PUBLIC, but then link failed. Maybe it is an issue with the ordering of filenames.

This comment has been minimized.

Copy link
@jnthn

jnthn Feb 11, 2017

Member

It's needed because it's being called from main.c, which is an executable that dynamically links against moar.dll. We can mark it MVM_PUBLIC, but in that case must also follow the naming convention of having an MVM_ prefix, so we don't cause namespace pollution and make problems for anyone embedding MoarVM. Also I suggest it also be added to procops.h, which will eliminate the need for the decl in main.c.

MVM_UnicodeToUTF8_argv(const int argc, wchar_t **wargv)
{
int i;
char **argv = MVM_malloc((argc + 1) * sizeof(*argv));
for (i = 0; i < argc; ++i)
{
argv[i] = UnicodeToUTF8(wargv[i]);
}
argv[i] = NULL;
return argv;
}

#endif #endif


MVMObject * MVM_proc_getenvhash(MVMThreadContext *tc) { MVMObject * MVM_proc_getenvhash(MVMThreadContext *tc) {
MVMInstance * const instance = tc->instance; MVMInstance * const instance = tc->instance;
MVMObject * env_hash; MVMObject * env_hash;


#ifdef _WIN32
const MVMuint16 acp = GetACP(); /* We should get ACP at runtime. */
#endif
MVMuint32 pos = 0; MVMuint32 pos = 0;
MVMString *needle = MVM_string_ascii_decode(tc, instance->VMString, STR_WITH_LEN("=")); MVMString *needle = MVM_string_ascii_decode(tc, instance->VMString, STR_WITH_LEN("="));
#ifndef _WIN32
char *env; char *env;
#else
wchar_t *env;
(void) _wgetenv(L"windows"); /* populate _wenviron */
#endif


MVM_gc_root_temp_push(tc, (MVMCollectable **)&needle); MVM_gc_root_temp_push(tc, (MVMCollectable **)&needle);


env_hash = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_hash_type); env_hash = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_hash_type);
MVM_gc_root_temp_push(tc, (MVMCollectable **)&env_hash); MVM_gc_root_temp_push(tc, (MVMCollectable **)&env_hash);


while ((env = environ[pos++]) != NULL) {
#ifndef _WIN32 #ifndef _WIN32
while ((env = environ[pos++]) != NULL) {
MVMString *str = MVM_string_utf8_c8_decode(tc, instance->VMString, env, strlen(env)); MVMString *str = MVM_string_utf8_c8_decode(tc, instance->VMString, env, strlen(env));
#else #else
char * const _env = ANSIToUTF8(acp, env); while ((env = _wenviron[pos++]) != NULL) {
char * const _env = UnicodeToUTF8(env);
MVMString *str = MVM_string_utf8_c8_decode(tc, instance->VMString, _env, strlen(_env)); MVMString *str = MVM_string_utf8_c8_decode(tc, instance->VMString, _env, strlen(_env));
#endif #endif


@@ -1227,7 +1243,6 @@ MVMObject * MVM_proc_clargs(MVMThreadContext *tc) {
}); });
#else #else
MVMROOT(tc, clargs, { MVMROOT(tc, clargs, {
const MVMuint16 acp = GetACP();
const MVMint64 num_clargs = instance->num_clargs; const MVMint64 num_clargs = instance->num_clargs;
MVMint64 count; MVMint64 count;


@@ -1240,10 +1255,8 @@ MVMObject * MVM_proc_clargs(MVMThreadContext *tc) {


for (count = 0; count < num_clargs; count++) { for (count = 0; count < num_clargs; count++) {
char *raw_clarg = instance->raw_clargs[count]; char *raw_clarg = instance->raw_clargs[count];
char * const _tmp = ANSIToUTF8(acp, raw_clarg);
MVMString *string = MVM_string_utf8_c8_decode(tc, MVMString *string = MVM_string_utf8_c8_decode(tc,
instance->VMString, _tmp, strlen(_tmp)); instance->VMString, raw_clarg, strlen(raw_clarg));
MVM_free(_tmp);
boxed_str = MVM_repr_box_str(tc, boxed_str = MVM_repr_box_str(tc,
instance->boot_types.BOOTStr, string); instance->boot_types.BOOTStr, string);
MVM_repr_push_o(tc, clargs, boxed_str); MVM_repr_push_o(tc, clargs, boxed_str);
@@ -29,3 +29,9 @@ MVMint64 MVM_proc_time_i(MVMThreadContext *tc);
MVMObject * MVM_proc_clargs(MVMThreadContext *tc); MVMObject * MVM_proc_clargs(MVMThreadContext *tc);
MVMnum64 MVM_proc_time_n(MVMThreadContext *tc); MVMnum64 MVM_proc_time_n(MVMThreadContext *tc);
MVMString * MVM_executable_name(MVMThreadContext *tc); MVMString * MVM_executable_name(MVMThreadContext *tc);

#ifdef _WIN32
#include <wchar.h>
MVM_PUBLIC char ** MVM_UnicodeToUTF8_argv(const int argc, wchar_t **argv);
#endif

@@ -97,18 +97,27 @@ static int parse_flag(const char *arg)
return UNKNOWN_FLAG; return UNKNOWN_FLAG;
} }


#ifndef _WIN32
int main(int argc, char *argv[]) int main(int argc, char *argv[])
#else
int wmain(int argc, wchar_t *wargv[])
#endif
{ {
MVMInstance *instance; MVMInstance *instance;
const char *input_file; const char *input_file;
const char *executable_name = NULL; const char *executable_name = NULL;
const char *lib_path[8]; const char *lib_path[8];


#ifdef _WIN32
char **argv = MVM_UnicodeToUTF8_argv(argc, wargv);
#endif

int dump = 0; int dump = 0;
int full_cleanup = 0; int full_cleanup = 0;
int argi = 1; int argi = 1;
int lib_path_i = 0; int lib_path_i = 0;
int flag; int flag;

for (; (flag = parse_flag(argv[argi])) != NOT_A_FLAG; ++argi) { for (; (flag = parse_flag(argv[argi])) != NOT_A_FLAG; ++argi) {
switch (flag) { switch (flag) {
case FLAG_CRASH: case FLAG_CRASH:
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.