Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

On Windows, improve handling of Unicode in command line arguments and environment strings #528

Merged
merged 11 commits into from Feb 11, 2017
Merged
6 changes: 6 additions & 0 deletions Configure.pl
Expand Up @@ -512,6 +512,12 @@ ($$)
3rdparty: $thirdpartylibs
TERM

# make sure to link with the correct entry point */
$config{mingw_unicode} = '';
if ($config{os} eq 'mingw32') {
$config{mingw_unicode} = '-municode';
}

# read list of files to generate

open my $listfile, '<', $GENLIST
Expand Down
4 changes: 2 additions & 2 deletions build/Makefile.in
Expand Up @@ -14,7 +14,7 @@ RM_RF = $(PERL) -MExtUtils::Command -e rm_rf
RM_F = $(PERL) -MExtUtils::Command -e rm_f
DYNASM = $(LUA) @dynasmlua@


MINGW_UNICODE = @mingw_unicode@

CONFIG = @config@
ADDCONFIG =
Expand Down Expand Up @@ -488,7 +488,7 @@ clangcheck gcccheck:

moar@exe@: $(MAIN_OBJECTS) @moar@
$(MSG) linking $@
$(CMD)$(LD) @ldout@$@ $(LDFLAGS) $(MAIN_OBJECTS) $(MAIN_LIBS)
$(CMD)$(LD) @ldout@$@ $(LDFLAGS) $(MINGW_UNICODE) $(MAIN_OBJECTS) $(MAIN_LIBS)

@moarlib@: $(OBJECTS) $(THIRDPARTY)
$(MSG) linking $@
Expand Down
35 changes: 24 additions & 11 deletions src/io/procops.c
Expand Up @@ -16,7 +16,7 @@
extern char **environ;
# endif
#else
# include <process.h>
#include <stdlib.h>
#endif

#ifdef _WIN32
Expand All @@ -33,7 +33,7 @@ static wchar_t * ANSIToUnicode(MVMuint16 acp, const char *str)
static char * UnicodeToUTF8(const wchar_t *str)
{
const int len = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
char * const result = (char *)MVM_malloc(len * sizeof(char));
char * const result = (char *)MVM_malloc(len + 1);

WideCharToMultiByte(CP_UTF8, 0, str, -1, result, len, NULL, NULL);

Expand All @@ -49,29 +49,45 @@ static char * ANSIToUTF8(MVMuint16 acp, const char * str)
return result;
}

MVM_PUBLIC char **
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MVM_PUBLIC is not needed here, since it won't be used out of moarvm. :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At first I did not have MVM_PUBLIC, but then link failed. Maybe it is an issue with the ordering of filenames.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's needed because it's being called from main.c, which is an executable that dynamically links against moar.dll. We can mark it MVM_PUBLIC, but in that case must also follow the naming convention of having an MVM_ prefix, so we don't cause namespace pollution and make problems for anyone embedding MoarVM. Also I suggest it also be added to procops.h, which will eliminate the need for the decl in main.c.

MVM_UnicodeToUTF8_argv(const int argc, wchar_t **wargv)
{
int i;
char **argv = MVM_malloc((argc + 1) * sizeof(*argv));
for (i = 0; i < argc; ++i)
{
argv[i] = UnicodeToUTF8(wargv[i]);
}
argv[i] = NULL;
return argv;
}

#endif

MVMObject * MVM_proc_getenvhash(MVMThreadContext *tc) {
MVMInstance * const instance = tc->instance;
MVMObject * env_hash;

#ifdef _WIN32
const MVMuint16 acp = GetACP(); /* We should get ACP at runtime. */
#endif
MVMuint32 pos = 0;
MVMString *needle = MVM_string_ascii_decode(tc, instance->VMString, STR_WITH_LEN("="));
#ifndef _WIN32
char *env;
#else
wchar_t *env;
(void) _wgetenv(L"windows"); /* populate _wenviron */
#endif

MVM_gc_root_temp_push(tc, (MVMCollectable **)&needle);

env_hash = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_hash_type);
MVM_gc_root_temp_push(tc, (MVMCollectable **)&env_hash);

while ((env = environ[pos++]) != NULL) {
#ifndef _WIN32
while ((env = environ[pos++]) != NULL) {
MVMString *str = MVM_string_utf8_c8_decode(tc, instance->VMString, env, strlen(env));
#else
char * const _env = ANSIToUTF8(acp, env);
while ((env = _wenviron[pos++]) != NULL) {
char * const _env = UnicodeToUTF8(env);
MVMString *str = MVM_string_utf8_c8_decode(tc, instance->VMString, _env, strlen(_env));
#endif

Expand Down Expand Up @@ -1227,7 +1243,6 @@ MVMObject * MVM_proc_clargs(MVMThreadContext *tc) {
});
#else
MVMROOT(tc, clargs, {
const MVMuint16 acp = GetACP();
const MVMint64 num_clargs = instance->num_clargs;
MVMint64 count;

Expand All @@ -1240,10 +1255,8 @@ MVMObject * MVM_proc_clargs(MVMThreadContext *tc) {

for (count = 0; count < num_clargs; count++) {
char *raw_clarg = instance->raw_clargs[count];
char * const _tmp = ANSIToUTF8(acp, raw_clarg);
MVMString *string = MVM_string_utf8_c8_decode(tc,
instance->VMString, _tmp, strlen(_tmp));
MVM_free(_tmp);
instance->VMString, raw_clarg, strlen(raw_clarg));
boxed_str = MVM_repr_box_str(tc,
instance->boot_types.BOOTStr, string);
MVM_repr_push_o(tc, clargs, boxed_str);
Expand Down
6 changes: 6 additions & 0 deletions src/io/procops.h
Expand Up @@ -29,3 +29,9 @@ MVMint64 MVM_proc_time_i(MVMThreadContext *tc);
MVMObject * MVM_proc_clargs(MVMThreadContext *tc);
MVMnum64 MVM_proc_time_n(MVMThreadContext *tc);
MVMString * MVM_executable_name(MVMThreadContext *tc);

#ifdef _WIN32
#include <wchar.h>
MVM_PUBLIC char ** MVM_UnicodeToUTF8_argv(const int argc, wchar_t **argv);
#endif

9 changes: 9 additions & 0 deletions src/main.c
Expand Up @@ -97,18 +97,27 @@ static int parse_flag(const char *arg)
return UNKNOWN_FLAG;
}

#ifndef _WIN32
int main(int argc, char *argv[])
#else
int wmain(int argc, wchar_t *wargv[])
#endif
{
MVMInstance *instance;
const char *input_file;
const char *executable_name = NULL;
const char *lib_path[8];

#ifdef _WIN32
char **argv = MVM_UnicodeToUTF8_argv(argc, wargv);
#endif

int dump = 0;
int full_cleanup = 0;
int argi = 1;
int lib_path_i = 0;
int flag;

for (; (flag = parse_flag(argv[argi])) != NOT_A_FLAG; ++argi) {
switch (flag) {
case FLAG_CRASH:
Expand Down