-
Notifications
You must be signed in to change notification settings - Fork 529
/
locale.c
6981 lines (5709 loc) · 255 KB
/
locale.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* locale.c
*
* Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
* 2002, 2003, 2005, 2006, 2007, 2008 by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
*
*/
/*
* A Elbereth Gilthoniel,
* silivren penna míriel
* o menel aglar elenath!
* Na-chaered palan-díriel
* o galadhremmin ennorath,
* Fanuilos, le linnathon
* nef aear, si nef aearon!
*
* [p.238 of _The Lord of the Rings_, II/i: "Many Meetings"]
*/
/* utility functions for handling locale-specific stuff like what
* character represents the decimal point.
*
* All C programs have an underlying locale. Perl code generally doesn't pay
* any attention to it except within the scope of a 'use locale'. For most
* categories, it accomplishes this by just using different operations if it is
* in such scope than if not. However, various libc functions called by Perl
* are affected by the LC_NUMERIC category, so there are macros in perl.h that
* are used to toggle between the current locale and the C locale depending on
* the desired behavior of those functions at the moment. And, LC_MESSAGES is
* switched to the C locale for outputting the message unless within the scope
* of 'use locale'.
*
* This code now has multi-thread-safe locale handling on systems that support
* that. This is completely transparent to most XS code. On earlier systems,
* it would be possible to emulate thread-safe locales, but this likely would
* involve a lot of locale switching, and would require XS code changes.
* Macros could be written so that the code wouldn't have to know which type of
* system is being used.
*
* Table-driven code is used for simplicity and clarity, as many operations
* differ only in which category is being worked on. However the system
* categories need not be small contiguous integers, so do not lend themselves
* to table lookup. Instead we have created our own equivalent values which
* are all small contiguous non-negative integers, and translation functions
* between the two sets. For category 'LC_foo', the name of our index is
* LC_foo_INDEX_. Various parallel tables, indexed by these, are used.
*
* Many of the macros and functions in this file have one of the suffixes '_c',
* '_r', or '_i'. khw found these useful in remembering what type of locale
* category to use as their parameter. '_r' takes an int category number as
* passed to setlocale(), like LC_ALL, LC_CTYPE, etc. The 'r' indicates that
* the value isn't known until runtime. '_c' also indicates such a category
* number, but its value is known at compile time. These are both converted
* into unsigned indexes into various tables of category information, where the
* real work is generally done. The tables are generated at compile-time based
* on platform characteristics and Configure options. They hide from the code
* many of the vagaries of the different locale implementations out there. You
* may have already guessed that '_i' indicates the parameter is such an
* unsigned index. Converting from '_r' to '_i' requires run-time lookup.
* '_c' is used to get cpp to do this at compile time. To avoid the runtime
* expense, the code is structured to use '_r' at the API level, and once
* converted, everything possible is done using the table indexes.
*
* On unthreaded perls, most operations expand out to just the basic
* setlocale() calls. The same is true on threaded perls on modern Windows
* systems where the same API, after set up, is used for thread-safe locale
* handling. On other systems, there is a completely different API, specified
* in POSIX 2008, to do thread-safe locales. On these systems, our
* emulate_setlocale_i() function is used to hide the different API from the
* outside. This makes it completely transparent to most XS code.
*
* A huge complicating factor is that the LC_NUMERIC category is normally held
* in the C locale, except during those relatively rare times when it needs to
* be in the underlying locale. There is a bunch of code to accomplish this,
* and to allow easy switches from one state to the other.
*
* z/OS (os390) is an outlier. Locales really don't work under threads when
* either the radix character isn't a dot, or attempts are made to change
* locales after the first thread is created. The reason is that IBM has made
* it thread-safe by refusing to change locales (returning failure if
* attempted) any time after an application has called pthread_create() to
* create another thread. The expectation is that an application will set up
* its locale information before the first fork, and be stable thereafter. But
* perl toggles LC_NUMERIC if the locale's radix character isn't a dot, as do
* the other toggles, which are less common.
*/
/* If the environment says to, we can output debugging information during
* initialization. This is done before option parsing, and before any thread
* creation, so can be a file-level static. (Must come before #including
* perl.h) */
#ifdef DEBUGGING
static int debug_initialization = 0;
# define DEBUG_INITIALIZATION_set(v) (debug_initialization = v)
# define DEBUG_LOCALE_INITIALIZATION_ debug_initialization
#else
# define debug_initialization 0
# define DEBUG_INITIALIZATION_set(v)
#endif
#include "EXTERN.h"
#define PERL_IN_LOCALE_C
#include "perl_langinfo.h"
#include "perl.h"
#include "reentr.h"
#ifdef I_WCHAR
# include <wchar.h>
#endif
#ifdef I_WCTYPE
# include <wctype.h>
#endif
/* Returns the Unix errno portion; ignoring any others. This is a macro here
* instead of putting it into perl.h, because unclear to khw what should be
* done generally. */
#define GET_ERRNO saved_errno
/* strlen() of a literal string constant. We might want this more general,
* but using it in just this file for now. A problem with more generality is
* the compiler warnings about comparing unlike signs */
#define STRLENs(s) (sizeof("" s "") - 1)
/* Default values come from the C locale */
static const char C_codeset[] = "ANSI_X3.4-1968";
static const char C_decimal_point[] = ".";
static const char C_thousands_sep[] = "";
/* Is the C string input 'name' "C" or "POSIX"? If so, and 'name' is the
* return of setlocale(), then this is extremely likely to be the C or POSIX
* locale. However, the output of setlocale() is documented to be opaque, but
* the odds are extremely small that it would return these two strings for some
* other locale. Note that VMS in these two locales includes many non-ASCII
* characters as controls and punctuation (below are hex bytes):
* cntrl: 84-97 9B-9F
* punct: A1-A3 A5 A7-AB B0-B3 B5-B7 B9-BD BF-CF D1-DD DF-EF F1-FD
* Oddly, none there are listed as alphas, though some represent alphabetics
* http://www.nntp.perl.org/group/perl.perl5.porters/2013/02/msg198753.html */
#define isNAME_C_OR_POSIX(name) \
( (name) != NULL \
&& (( *(name) == 'C' && (*(name + 1)) == '\0') \
|| strEQ((name), "POSIX")))
#if defined(HAS_THREAD_SAFE_NL_LANGINFO_L) || defined(HAS_NL_LANGINFO)
# define HAS_SOME_LANGINFO
#endif
#if defined(HAS_LOCALECONV) || defined(HAS_LOCALECONV_L)
# define HAS_SOME_LOCALECONV
#endif
/* For use in calling my_langinfo() */
#define USE_UNDERLYING_NUMERIC ((char *) 1)
#define my_langinfo_c(item, category, locale, retbufp, retbuf_sizep, utf8ness) \
my_langinfo_i(item, category##_INDEX_, locale, retbufp, \
retbuf_sizep, utf8ness)
#ifdef USE_LOCALE
/* This code keeps a LRU cache of the UTF-8ness of the locales it has so-far
* looked up. This is in the form of a C string: */
# define UTF8NESS_SEP "\v"
# define UTF8NESS_PREFIX "\f"
/* So, the string looks like:
*
* \vC\a0\vPOSIX\a0\vam_ET\a0\vaf_ZA.utf8\a1\ven_US.UTF-8\a1\0
*
* where the digit 0 after the \a indicates that the locale starting just
* after the preceding \v is not UTF-8, and the digit 1 mean it is. */
STATIC_ASSERT_DECL(STRLENs(UTF8NESS_SEP) == 1);
STATIC_ASSERT_DECL(STRLENs(UTF8NESS_PREFIX) == 1);
# define C_and_POSIX_utf8ness UTF8NESS_SEP "C" UTF8NESS_PREFIX "0" \
UTF8NESS_SEP "POSIX" UTF8NESS_PREFIX "0"
/* The cache is initialized to C_and_POSIX_utf8ness at start up. These are
* kept there always. The remining portion of the cache is LRU, with the
* oldest looked-up locale at the tail end */
# ifdef DEBUGGING
# define setlocale_debug_string_c(category, locale, result) \
setlocale_debug_string_i(category##_INDEX_, locale, result)
# define setlocale_debug_string_r(category, locale, result) \
setlocale_debug_string_i(get_category_index(category, locale), \
locale, result)
# endif
# define toggle_locale_c(cat, locale) toggle_locale_i(cat##_INDEX_, locale)
# define restore_toggled_locale_c(cat, locale) \
restore_toggled_locale_i(cat##_INDEX_, locale)
/* Two parallel arrays indexed by our mapping of category numbers into small
* non-negative indexes; first the locale categories Perl uses on this system,
* used to do the inverse mapping. The second array is their names. These
* arrays are in mostly arbitrary order. */
STATIC const int categories[] = {
# ifdef USE_LOCALE_NUMERIC
LC_NUMERIC,
# endif
# ifdef USE_LOCALE_CTYPE
LC_CTYPE,
# endif
# ifdef USE_LOCALE_COLLATE
LC_COLLATE,
# endif
# ifdef USE_LOCALE_TIME
LC_TIME,
# endif
# ifdef USE_LOCALE_MESSAGES
LC_MESSAGES,
# endif
# ifdef USE_LOCALE_MONETARY
LC_MONETARY,
# endif
# ifdef USE_LOCALE_ADDRESS
LC_ADDRESS,
# endif
# ifdef USE_LOCALE_IDENTIFICATION
LC_IDENTIFICATION,
# endif
# ifdef USE_LOCALE_MEASUREMENT
LC_MEASUREMENT,
# endif
# ifdef USE_LOCALE_PAPER
LC_PAPER,
# endif
# ifdef USE_LOCALE_TELEPHONE
LC_TELEPHONE,
# endif
# ifdef USE_LOCALE_SYNTAX
LC_SYNTAX,
# endif
# ifdef USE_LOCALE_TOD
LC_TOD,
# endif
# ifdef LC_ALL
LC_ALL,
# endif
/* Placeholder as a precaution if code fails to check the return of
* get_category_index(), which returns this element to indicate an error */
-1
};
/* The top-most real element is LC_ALL */
STATIC const char * const category_names[] = {
# ifdef USE_LOCALE_NUMERIC
"LC_NUMERIC",
# endif
# ifdef USE_LOCALE_CTYPE
"LC_CTYPE",
# endif
# ifdef USE_LOCALE_COLLATE
"LC_COLLATE",
# endif
# ifdef USE_LOCALE_TIME
"LC_TIME",
# endif
# ifdef USE_LOCALE_MESSAGES
"LC_MESSAGES",
# endif
# ifdef USE_LOCALE_MONETARY
"LC_MONETARY",
# endif
# ifdef USE_LOCALE_ADDRESS
"LC_ADDRESS",
# endif
# ifdef USE_LOCALE_IDENTIFICATION
"LC_IDENTIFICATION",
# endif
# ifdef USE_LOCALE_MEASUREMENT
"LC_MEASUREMENT",
# endif
# ifdef USE_LOCALE_PAPER
"LC_PAPER",
# endif
# ifdef USE_LOCALE_TELEPHONE
"LC_TELEPHONE",
# endif
# ifdef USE_LOCALE_SYNTAX
"LC_SYNTAX",
# endif
# ifdef USE_LOCALE_TOD
"LC_TOD",
# endif
# ifdef LC_ALL
"LC_ALL",
# endif
/* Placeholder as a precaution if code fails to check the return of
* get_category_index(), which returns this element to indicate an error */
NULL
};
/* A few categories require additional setup when they are changed. This table
* points to the functions that do that setup */
STATIC void (*update_functions[]) (pTHX_ const char *) = {
# ifdef USE_LOCALE_NUMERIC
S_new_numeric,
# endif
# ifdef USE_LOCALE_CTYPE
S_new_ctype,
# endif
# ifdef USE_LOCALE_COLLATE
S_new_collate,
# endif
# ifdef USE_LOCALE_TIME
NULL,
# endif
# ifdef USE_LOCALE_MESSAGES
NULL,
# endif
# ifdef USE_LOCALE_MONETARY
NULL,
# endif
# ifdef USE_LOCALE_ADDRESS
NULL,
# endif
# ifdef USE_LOCALE_IDENTIFICATION
NULL,
# endif
# ifdef USE_LOCALE_MEASUREMENT
NULL,
# endif
# ifdef USE_LOCALE_PAPER
NULL,
# endif
# ifdef USE_LOCALE_TELEPHONE
NULL,
# endif
# ifdef USE_LOCALE_SYNTAX
NULL,
# endif
# ifdef USE_LOCALE_TOD
NULL,
# endif
/* No harm done to have this even without an LC_ALL */
S_new_LC_ALL,
/* Placeholder as a precaution if code fails to check the return of
* get_category_index(), which returns this element to indicate an error */
NULL
};
# ifdef LC_ALL
/* On systems with LC_ALL, it is kept in the highest index position. (-2
* to account for the final unused placeholder element.) */
# define NOMINAL_LC_ALL_INDEX (C_ARRAY_LENGTH(categories) - 2)
# else
/* On systems without LC_ALL, we pretend it is there, one beyond the real
* top element, hence in the unused placeholder element. */
# define NOMINAL_LC_ALL_INDEX (C_ARRAY_LENGTH(categories) - 1)
# endif
/* Pretending there is an LC_ALL element just above allows us to avoid most
* special cases. Most loops through these arrays in the code below are
* written like 'for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++)'. They will work
* on either type of system. But the code must be written to not access the
* element at 'LC_ALL_INDEX_' except on platforms that have it. This can be
* checked for at compile time by using the #define LC_ALL_INDEX_ which is only
* defined if we do have LC_ALL. */
STATIC unsigned int
S_get_category_index(const int category, const char * locale)
{
/* Given a category, return the equivalent internal index we generally use
* instead.
*
* 'locale' is for use in any generated diagnostics, and may be NULL
*
* Some sort of hash could be used instead of this loop, but the number of
* elements is so far at most 12 */
unsigned int i;
const char * conditional_warn_text = "; can't set it to ";
PERL_ARGS_ASSERT_GET_CATEGORY_INDEX;
# ifdef LC_ALL
for (i = 0; i <= LC_ALL_INDEX_; i++)
# else
for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++)
# endif
{
if (category == categories[i]) {
dTHX_DEBUGGING;
DEBUG_Lv(PerlIO_printf(Perl_debug_log,
"%s:%d: index of category %d (%s) is %d\n",
__FILE__, __LINE__, category, category_names[i], i));
return i;
}
}
/* Here, we don't know about this category, so can't handle it. */
if (! locale) {
locale = "";
conditional_warn_text = "";
}
/* diag_listed_as: Unknown locale category %d; can't set it to %s */
Perl_warner_nocontext(packWARN(WARN_LOCALE),
"Unknown locale category %d%s%s",
category, conditional_warn_text, locale);
# ifdef EINVAL
SETERRNO(EINVAL, LIB_INVARG);
# endif
/* Return an out-of-bounds value */
return NOMINAL_LC_ALL_INDEX + 1;
}
STATIC const char *
S_category_name(const int category)
{
unsigned int index;
index = get_category_index(category, NULL);
if (index <= NOMINAL_LC_ALL_INDEX) {
return category_names[index];
}
return Perl_form_nocontext("%d (unknown)", category);
}
#endif /* ifdef USE_LOCALE */
#ifdef USE_POSIX_2008_LOCALE
STATIC locale_t
S_use_curlocale_scratch(pTHX)
{
/* This function is used to hide from the caller the case where the current
* locale_t object in POSIX 2008 is the global one, which is illegal in
* many of the P2008 API calls. This checks for that and, if necessary
* creates a proper P2008 object. Any prior object is deleted, as is any
* remaining object during global destruction. */
locale_t cur = uselocale((locale_t) 0);
if (cur != LC_GLOBAL_LOCALE) {
return cur;
}
if (PL_scratch_locale_obj) {
freelocale(PL_scratch_locale_obj);
}
PL_scratch_locale_obj = duplocale(LC_GLOBAL_LOCALE);
return PL_scratch_locale_obj;
}
#endif
#define setlocale_failure_panic_c( \
cat, current, failed, caller_0_line, caller_1_line) \
setlocale_failure_panic_i(cat##_INDEX_, current, failed, \
caller_0_line, caller_1_line)
/* porcelain_setlocale() presents a consistent POSIX-compliant interface to
* setlocale(). Windows requres a customized base-level setlocale() */
#ifdef WIN32
# define porcelain_setlocale(cat, locale) win32_setlocale(cat, locale)
#else
# define porcelain_setlocale(cat, locale) \
((const char *) setlocale(cat, locale))
#endif
/* The next layer up is to catch vagaries and bugs in the libc setlocale return
* value */
#ifdef stdize_locale
# define stdized_setlocale(cat, locale) \
stdize_locale(cat, porcelain_setlocale(cat, locale), \
&PL_stdize_locale_buf, &PL_stdize_locale_bufsize)
#else
# define stdized_setlocale(cat, locale) porcelain_setlocale(cat, locale)
#endif
/* The next many lines form a layer above the close-to-the-metal 'porcelain'
* and 'stdized' macros. They are used to present a uniform API to the rest of
* the code in this file in spite of the disparate underlying implementations.
* */
#ifndef USE_POSIX_2008_LOCALE
/* For non-threaded perls (which we are not to use the POSIX 2008 API on), or a
* thread-safe Windows one in which threading is invisible to us, the added
* layer just calls the base-level functions. See the introductory comments in
* this file for the meaning of the suffixes '_c', '_r', '_i'. */
# define setlocale_r(cat, locale) stdized_setlocale(cat, locale)
# define setlocale_i(i, locale) setlocale_r(categories[i], locale)
# define setlocale_c(cat, locale) setlocale_r(cat, locale)
# define void_setlocale_i(i, locale) \
STMT_START { \
if (! porcelain_setlocale(categories[i], locale)) { \
setlocale_failure_panic_i(i, NULL, locale, __LINE__, 0); \
NOT_REACHED; /* NOTREACHED */ \
} \
} STMT_END
# define void_setlocale_c(cat, locale) \
void_setlocale_i(cat##_INDEX_, locale)
# define void_setlocale_r(cat, locale) \
void_setlocale_i(get_category_index(cat, locale), locale)
# define bool_setlocale_r(cat, locale) \
cBOOL(porcelain_setlocale(cat, locale))
# define bool_setlocale_i(i, locale) \
bool_setlocale_c(categories[i], locale)
# define bool_setlocale_c(cat, locale) bool_setlocale_r(cat, locale)
# define querylocale_r(cat) setlocale_r(cat, NULL)
# define querylocale_c(cat) querylocale_r(cat)
# define querylocale_i(i) querylocale_c(categories[i])
#else /* Below uses POSIX 2008 */
/* Here, there is a completely different API to get thread-safe locales. We
* emulate the setlocale() API with our own function(s). setlocale categories,
* like LC_NUMERIC, are not valid here for the POSIX 2008 API. Instead, there
* are equivalents, like LC_NUMERIC_MASK, which we use instead, converting to
* by using get_category_index() followed by table lookup. */
# define emulate_setlocale_c(cat, locale, recalc_LC_ALL, line) \
emulate_setlocale_i(cat##_INDEX_, locale, recalc_LC_ALL, line)
/* A wrapper for the macros below. TRUE => do recalculate LC_ALL */
# define common_emulate_setlocale(i, locale) \
emulate_setlocale_i(i, locale, TRUE, __LINE__)
# define setlocale_i(i, locale) common_emulate_setlocale(i, locale)
# define setlocale_c(cat, locale) setlocale_i(cat##_INDEX_, locale)
# define setlocale_r(cat, locale) \
setlocale_i(get_category_index(cat, locale), locale)
# define void_setlocale_i(i, locale) ((void) setlocale_i(i, locale))
# define void_setlocale_c(cat, locale) \
void_setlocale_i(cat##_INDEX_, locale)
# define void_setlocale_r(cat, locale) ((void) setlocale_r(cat, locale))
# define bool_setlocale_i(i, locale) cBOOL(setlocale_i(i, locale))
# define bool_setlocale_c(cat, locale) \
bool_setlocale_i(cat##_INDEX_, locale)
# define bool_setlocale_r(cat, locale) cBOOL(setlocale_r(cat, locale))
# define querylocale_i(i) my_querylocale_i(i)
# define querylocale_c(cat) querylocale_i(cat##_INDEX_)
# define querylocale_r(cat) querylocale_i(get_category_index(cat,NULL))
# ifndef USE_QUERYLOCALE
# define USE_PL_CURLOCALES
# else
# define isSINGLE_BIT_SET(mask) isPOWER_OF_2(mask)
/* This code used to think querylocale() was valid on LC_ALL. Make sure
* all instances of that have been removed */
# define QUERYLOCALE_ASSERT(index) \
__ASSERT_(isSINGLE_BIT_SET(category_masks[index]))
# if ! defined(HAS_QUERYLOCALE) && defined(_NL_LOCALE_NAME)
# define querylocale_l(index, locale_obj) \
(QUERYLOCALE_ASSERT(index) \
nl_langinfo_l(_NL_LOCALE_NAME(categories[index]), locale_obj))
# else
# define querylocale_l(index, locale_obj) \
(QUERYLOCALE_ASSERT(index) \
querylocale(category_masks[index], locale_obj))
# endif
# endif
# if defined(__GLIBC__) && defined(USE_LOCALE_MESSAGES)
# define HAS_GLIBC_LC_MESSAGES_BUG
# include <libintl.h>
# endif
/* A fourth array, parallel to the ones above to map from category to its
* equivalent mask */
STATIC const int category_masks[] = {
# ifdef USE_LOCALE_NUMERIC
LC_NUMERIC_MASK,
# endif
# ifdef USE_LOCALE_CTYPE
LC_CTYPE_MASK,
# endif
# ifdef USE_LOCALE_COLLATE
LC_COLLATE_MASK,
# endif
# ifdef USE_LOCALE_TIME
LC_TIME_MASK,
# endif
# ifdef USE_LOCALE_MESSAGES
LC_MESSAGES_MASK,
# endif
# ifdef USE_LOCALE_MONETARY
LC_MONETARY_MASK,
# endif
# ifdef USE_LOCALE_ADDRESS
LC_ADDRESS_MASK,
# endif
# ifdef USE_LOCALE_IDENTIFICATION
LC_IDENTIFICATION_MASK,
# endif
# ifdef USE_LOCALE_MEASUREMENT
LC_MEASUREMENT_MASK,
# endif
# ifdef USE_LOCALE_PAPER
LC_PAPER_MASK,
# endif
# ifdef USE_LOCALE_TELEPHONE
LC_TELEPHONE_MASK,
# endif
# ifdef USE_LOCALE_SYNTAX
LC_SYNTAX_MASK,
# endif
# ifdef USE_LOCALE_TOD
LC_TOD_MASK,
# endif
/* LC_ALL can't be turned off by a Configure
* option, and in Posix 2008, should always be
* here, so compile it in unconditionally.
* This could catch some glitches at compile
* time */
LC_ALL_MASK,
/* Placeholder as a precaution if code fails to check the return of
* get_category_index(), which returns this element to indicate an error */
0
};
# define my_querylocale_c(cat) my_querylocale_i(cat##_INDEX_)
STATIC const char *
S_my_querylocale_i(pTHX_ const unsigned int index)
{
/* This function returns the name of the locale category given by the input
* index into our parallel tables of them.
*
* POSIX 2008, for some sick reason, chose not to provide a method to find
* the category name of a locale, discarding a basic linguistic tenet that
* for any object, people will create a name for it. Some vendors have
* created a querylocale() function to do just that. This function is a
* lot simpler to implement on systems that have this. Otherwise, we have
* to keep track of what the locale has been set to, so that we can return
* its name so as to emulate setlocale(). It's also possible for C code in
* some library to change the locale without us knowing it, though as of
* September 2017, there are no occurrences in CPAN of uselocale(). Some
* libraries do use setlocale(), but that changes the global locale, and
* threads using per-thread locales will just ignore those changes. */
int category;
const locale_t cur_obj = uselocale((locale_t) 0);
const char * retval;
PERL_ARGS_ASSERT_MY_QUERYLOCALE_I;
assert(index <= NOMINAL_LC_ALL_INDEX);
category = categories[index];
DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: my_querylocale_i(%s) on %p\n",
__FILE__, __LINE__, category_names[index], cur_obj));
if (cur_obj == LC_GLOBAL_LOCALE) {
retval = porcelain_setlocale(category, NULL);
}
else {
# ifdef USE_QUERYLOCALE
/* We don't currently keep records when there is querylocale(), so have
* to get it anew each time */
retval = (index == LC_ALL_INDEX_)
? calculate_LC_ALL(cur_obj)
: querylocale_l(index, cur_obj);
# else
/* But we do have up-to-date values when we keep our own records */
retval = PL_curlocales[index];
# endif
}
DEBUG_Lv(PerlIO_printf(Perl_debug_log,
"%s:%d: my_querylocale_i(%s) returning '%s'\n",
__FILE__, __LINE__, category_names[index], retval));
return retval;
}
# define LOOPING -1 /* Special value for 'recalc_LC_ALL' parameter below */
# ifdef USE_PL_CURLOCALES
STATIC const char *
S_update_PL_curlocales_i(pTHX_
const unsigned int index,
const char * new_locale,
/* 0 => skip recalculating LC_ALL;
* 1 => do recalculate LC_ALL
* LOOPING => this call is part of a loop that goes
* through all categories by index-order.
* Recalculate only on the final iteration,
* after all values are known */
int recalc_LC_ALL)
{
/* This is a helper function for emulate_setlocale_i(), mostly used to
* make that function easier to read. */
PERL_ARGS_ASSERT_UPDATE_PL_CURLOCALES_I;
assert(index <= NOMINAL_LC_ALL_INDEX);
if (index == LC_ALL_INDEX_) {
unsigned int i;
/* For LC_ALL, we change all individual categories to correspond */
/* PL_curlocales is a parallel array, so has same
* length as 'categories' */
for (i = 0; i < LC_ALL_INDEX_; i++) {
Safefree(PL_curlocales[i]);
PL_curlocales[i] = savepv(new_locale);
}
recalc_LC_ALL = TRUE;
}
else {
/* Update the single category's record */
Safefree(PL_curlocales[index]);
PL_curlocales[index] = savepv(new_locale);
if (recalc_LC_ALL == LOOPING) {
recalc_LC_ALL = (index == NOMINAL_LC_ALL_INDEX - 1);
}
}
if (recalc_LC_ALL) { /* And recalculate LC_ALL */
Safefree(PL_curlocales[LC_ALL_INDEX_]);
PL_curlocales[LC_ALL_INDEX_] =
savepv(calculate_LC_ALL(PL_curlocales));
}
return PL_curlocales[index];
}
STATIC const char *
S_setlocale_from_aggregate_LC_ALL(pTHX_ const char * locale, const line_t line)
{
/* This function parses the value of the LC_ALL locale, assuming glibc
* syntax, and sets each individual category on the system to the proper
* value.
*
* This is likely to only ever be called from one place, so exists to make
* the calling function easier to read by moving this ancillary code out of
* the main line.
*
* The locale for each category is independent of the other categories.
* Often, they are all the same, but certainly not always. Perl, in fact,
* usually keeps LC_NUMERIC in the C locale, regardless of the underlying
* locale. LC_ALL has to be able to represent the case of when there are
* varying locales. Platforms have differing ways of representing this.
* Because of this, the code in this file goes to lengths to avoid the
* issue, generally looping over the component categories instead of
* referring to them in the aggregate, wherever possible. However, there
* are cases where we have to parse our own constructed aggregates, which use
* the glibc syntax. */
unsigned int i;
const char * s = locale;
const char * e = locale + strlen(locale);
const char * p = s;
const char * category_end;
const char * name_start;
const char * name_end;
const char * locale_on_entry = savepv(querylocale_c(LC_ALL));
const char * retval;
PERL_ARGS_ASSERT_SETLOCALE_FROM_AGGREGATE_LC_ALL;
/* If the string that gives what to set doesn't include all categories,
* the omitted ones get set to "C". To get this behavior, first set
* all the individual categories to "C", and override the furnished
* ones below. FALSE => No need to recalculate LC_ALL, as this is a
* temporary state */
if (! emulate_setlocale_c(LC_ALL, "C", FALSE, line)) {
setlocale_failure_panic_c(LC_ALL, locale_on_entry,
"C", __LINE__, line);
NOT_REACHED; /* NOTREACHED */
}
while (s < e) {
/* Parse through the category */
while (isWORDCHAR(*p)) {
p++;
}
category_end = p;
if (*p++ != '=') {
Perl_croak(aTHX_
"panic: %s: %d: Unexpected character in locale name '%02X",
__FILE__, __LINE__, *(p-1));
}
/* Parse through the locale name */
name_start = p;
while (p < e && *p != ';') {
if (! isGRAPH(*p)) {
Perl_croak(aTHX_
"panic: %s: %d: Unexpected character in locale name '%02X",
__FILE__, __LINE__, *(p-1));
}
p++;
}
name_end = p;
/* Space past the semi-colon */
if (p < e) {
p++;
}
/* Find the index of the category name in our lists */
for (i = 0; i < LC_ALL_INDEX_; i++) {
char * individ_locale;
/* Keep going if this isn't the index. The strnNE() avoids a
* Perl_form(), but would fail if ever a category name could be
* a substring of another one, like if there were a
* "LC_TIME_DATE" */
if strnNE(s, category_names[i], category_end - s) {
continue;
}
individ_locale = Perl_form(aTHX_ "%.*s",
(int) (name_end - name_start), name_start);
/* FALSE => Don't recalculate LC_ALL; we'll do it ourselves after
* the loop */
if (! emulate_setlocale_i(i, individ_locale, FALSE, line)) {
/* But if we have to back out, do fix up LC_ALL */
if (! emulate_setlocale_c(LC_ALL, locale_on_entry, TRUE, line))
{
Safefree(locale_on_entry);
setlocale_failure_panic_i(i, individ_locale,
locale, __LINE__, line);
NOT_REACHED; /* NOTREACHED */
}
Safefree(locale_on_entry);
return NULL;
}
/* Found and handled the desired category */
break;
}
s = p;
}
/* Here we have set all the individual categories by recursive calls;
* update the LC_ALL entry as well. We can't just use the input 'locale'
* as the value may omit categories whose locale is 'C'. khw thinks it's
* better to store a complete LC_ALL. So calculate it. */
retval = savepv(calculate_LC_ALL(PL_curlocales));
Safefree(PL_curlocales[LC_ALL_INDEX_]);
PL_curlocales[LC_ALL_INDEX_] = retval;
Safefree(locale_on_entry);
return retval;
}
STATIC const char *
S_find_locale_from_environment(pTHX_ const unsigned int index)
{
/* On systems without querylocale(), it is problematic getting the results
* of the POSIX 2008 equivalent of setlocale(category, "") (which gets the
* locale from the environment).
*
* To ensure that we know exactly what those values are, we do the setting
* ourselves, using the documented algorithm (assuming the documentation is
* correct) rather than use "" as the locale. This will lead to results
* that differ from native behavior if the native behavior differs from the
* standard documented value, but khw believes it is better to know what's
* going on, even if different from native, than to just guess.
*
* Another option would be, in a critical section, to save the global
* locale's current value, and do a straight setlocale(LC_ALL, ""). That
* would return our desired values, destroying the global locale's, which
* we would then restore. But that could cause races with any other thread
* that is using the global locale and isn't using the mutex. And, the
* only reason someone would have done that is because they are calling a
* library function, like in gtk, that calls setlocale(), and which can't
* be changed to use the mutex. That wouldn't be a problem if this were to
* be done before any threads had switched, say during perl construction
* time. But this code would still be needed for the general case. */
const char * default_name;
unsigned int i;
const char * locale_names[LC_ALL_INDEX_];
/* We rely on PerlEnv_getenv() returning a mortalized copy */
const char * const lc_all = PerlEnv_getenv("LC_ALL");
/* Use any "LC_ALL" environment variable, as it overrides everything
* else. */
if (lc_all && strNE(lc_all, "")) {
return lc_all;
}
/* Otherwise, we need to dig deeper. Unless overridden, the default is
* the LANG environment variable; "C" if it doesn't exist. */
default_name = PerlEnv_getenv("LANG");
if (! default_name || strEQ(default_name, "")) {
default_name = "C";
}
/* If setting an individual category, use its corresponding value found in
* the environment, if any; otherwise use the default we already
* calculated. */
if (index != LC_ALL_INDEX_) {
const char * const new_value = PerlEnv_getenv(category_names[index]);
return (new_value && strNE(new_value, ""))
? new_value
: default_name;
}
/* Here, we are getting LC_ALL. Any categories that don't have a
* corresponding environment variable set should be set to 'default_name'
*
* Simply find the values for all categories, and call the function to
* compute LC_ALL. */
for (i = 0; i < LC_ALL_INDEX_; i++) {
const char * const env_override = PerlEnv_getenv(category_names[i]);
locale_names[i] = (env_override && strNE(env_override, ""))
? env_override
: default_name;
DEBUG_Lv(PerlIO_printf(Perl_debug_log,
"%s:%d: find_locale_from_environment i=%d, name=%s, locale=%s\n",
__FILE__, __LINE__, i, category_names[i], locale_names[i]));
}
return calculate_LC_ALL(locale_names);
}
# endif /* Need PL_curlocales[] */
STATIC const char *
S_emulate_setlocale_i(pTHX_
const unsigned int index,
const char * new_locale,
const int recalc_LC_ALL,
const line_t line)
{
/* This function effectively performs a setlocale() on just the current
* thread; thus it is thread-safe. It does this by using the POSIX 2008
* locale functions to emulate the behavior of setlocale(). Similar to
* regular setlocale(), the return from this function points to memory that
* can be overwritten by other system calls, so needs to be copied
* immediately if you need to retain it. The difference here is that
* system calls besides another setlocale() can overwrite it.
*
* By doing this, most locale-sensitive functions become thread-safe. The
* exceptions are mostly those that return a pointer to static memory.
*
* This function takes our internal index of the 'category' setlocale is
* called with, and the 'new_locale' to set the category to. It uses the
* index to find the category mask that the POSIX 2008 functions use.
*
* The function can be called recursively on all the individual categories,
* when the outer call is for LC_ALL. It would be unnecessary work to
* recalculate LC_ALL in the middle of all this; so the 'recalc_LC_ALL'
* parameter is set to LOOPING in this circumstance, to indicate to
* recalculate only on the final category; 0 to indicate to not recalculate
* at all; and 1 to indicate to do so unconditionally */
int mask;
locale_t old_obj;
locale_t new_obj;
const char * old_messages_locale = NULL;
PERL_ARGS_ASSERT_EMULATE_SETLOCALE_I;
assert(index <= NOMINAL_LC_ALL_INDEX);