-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
XmlCollation.cs
498 lines (421 loc) · 18.7 KB
/
XmlCollation.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
//------------------------------------------------------------------------------
// <copyright file="XmlCollation.cs" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
// <owner current="true" primary="true">Microsoft</owner>
//------------------------------------------------------------------------------
using System.Collections;
using System.ComponentModel;
using System.Diagnostics;
using System.Globalization;
using System.Runtime.InteropServices;
using System.IO;
namespace System.Xml.Xsl.Runtime {
using Res = System.Xml.Utils.Res;
[EditorBrowsable(EditorBrowsableState.Never)]
public sealed class XmlCollation {
// lgid support for sort
private const int deDE = 0x0407;
private const int huHU = 0x040E;
private const int jaJP = 0x0411;
private const int kaGE = 0x0437;
private const int koKR = 0x0412;
private const int zhTW = 0x0404;
private const int zhCN = 0x0804;
private const int zhHK = 0x0C04;
private const int zhSG = 0x1004;
private const int zhMO = 0x1404;
private const int zhTWbopo = 0x030404;
private const int deDEphon = 0x010407;
private const int huHUtech = 0x01040e;
private const int kaGEmode = 0x010437;
// Invariant: compops == (options & Options.mask)
private CultureInfo cultInfo;
private Options options;
private CompareOptions compops;
/// <summary>
/// Extends System.Globalization.CompareOptions with additional flags.
/// </summary>
private struct Options {
public const int FlagUpperFirst = 0x1000;
public const int FlagEmptyGreatest = 0x2000;
public const int FlagDescendingOrder = 0x4000;
private const int Mask = FlagUpperFirst | FlagEmptyGreatest | FlagDescendingOrder;
private int value;
public Options(int value) {
this.value = value;
}
public bool GetFlag(int flag) {
return (this.value & flag) != 0;
}
public void SetFlag(int flag, bool value) {
if (value)
this.value |= flag;
else
this.value &= ~flag;
}
public bool UpperFirst {
get { return GetFlag(FlagUpperFirst); }
set { SetFlag(FlagUpperFirst, value); }
}
public bool EmptyGreatest {
get { return GetFlag(FlagEmptyGreatest); }
}
public bool DescendingOrder {
get { return GetFlag(FlagDescendingOrder); }
}
public bool IgnoreCase {
get { return GetFlag((int)CompareOptions.IgnoreCase); }
}
public bool Ordinal {
get { return GetFlag((int)CompareOptions.Ordinal); }
}
public CompareOptions CompareOptions {
get {
return (CompareOptions)(value & ~Mask);
}
set {
Debug.Assert(((int)value & Mask) == 0);
this.value = (this.value & Mask) | (int)value;
}
}
public static implicit operator int(Options options) {
return options.value;
}
}
//-----------------------------------------------
// Constructors
//-----------------------------------------------
/// <summary>
/// Construct a collation that uses the specified culture and compare options.
/// </summary>
private XmlCollation(CultureInfo cultureInfo, Options options) {
this.cultInfo = cultureInfo;
this.options = options;
this.compops = options.CompareOptions;
}
//-----------------------------------------------
// Create
//-----------------------------------------------
/// <summary>
/// Singleton collation that sorts according to Unicode code points.
/// </summary>
private static XmlCollation cp = new XmlCollation(CultureInfo.InvariantCulture, new Options((int)CompareOptions.Ordinal));
internal static XmlCollation CodePointCollation {
get { return cp; }
}
internal static XmlCollation Create(string collationLiteral) {
return Create(collationLiteral, /*throw:*/true);
}
// This function is used in both parser and F&O library, so just strictly map valid literals to XmlCollation.
// Set compare options one by one:
// 0, false: no effect; 1, true: yes
// Disregard unrecognized options.
internal static XmlCollation Create(string collationLiteral, bool throwOnError) {
Debug.Assert(collationLiteral != null, "collation literal should not be null");
if (collationLiteral == XmlReservedNs.NsCollCodePoint) {
return CodePointCollation;
}
Uri collationUri;
CultureInfo cultInfo = null;
Options options = new Options();
if (throwOnError) {
collationUri = new Uri(collationLiteral);
} else {
if (!Uri.TryCreate(collationLiteral, UriKind.Absolute, out collationUri)) {
return null;
}
}
string authority = collationUri.GetLeftPart(UriPartial.Authority);
if (authority == XmlReservedNs.NsCollationBase) {
// Language
// at least a '/' will be returned for Uri.LocalPath
string lang = collationUri.LocalPath.Substring(1);
if (lang.Length == 0) {
// Use default culture of current thread (cultinfo = null)
} else {
// Create culture from RFC 1766 string
try {
cultInfo = new CultureInfo(lang);
}
catch (ArgumentException) {
if (!throwOnError) return null;
throw new XslTransformException(Res.Coll_UnsupportedLanguage, lang);
}
}
} else if (collationUri.IsBaseOf(new Uri(XmlReservedNs.NsCollCodePoint))) {
// language with codepoint collation is not allowed
options.CompareOptions = CompareOptions.Ordinal;
} else {
// Unrecognized collation
if (!throwOnError) return null;
throw new XslTransformException(Res.Coll_Unsupported, collationLiteral);
}
// Sort & Compare option
// at least a '?' will be returned for Uri.Query if not empty
string query = collationUri.Query;
string sort = null;
if (query.Length != 0) {
foreach (string option in query.Substring(1).Split('&')) {
string[] pair = option.Split('=');
if (pair.Length != 2) {
if (!throwOnError) return null;
throw new XslTransformException(Res.Coll_BadOptFormat, option);
}
string optionName = pair[0].ToUpper(CultureInfo.InvariantCulture);
string optionValue = pair[1].ToUpper(CultureInfo.InvariantCulture);
if (optionName == "SORT") {
sort = optionValue;
}
else {
int flag;
switch (optionName) {
case "IGNORECASE": flag = (int)CompareOptions.IgnoreCase; break;
case "IGNORENONSPACE": flag = (int)CompareOptions.IgnoreNonSpace; break;
case "IGNORESYMBOLS": flag = (int)CompareOptions.IgnoreSymbols; break;
case "IGNOREKANATYPE": flag = (int)CompareOptions.IgnoreKanaType; break;
case "IGNOREWIDTH": flag = (int)CompareOptions.IgnoreWidth; break;
case "UPPERFIRST": flag = Options.FlagUpperFirst; break;
case "EMPTYGREATEST": flag = Options.FlagEmptyGreatest; break;
case "DESCENDINGORDER": flag = Options.FlagDescendingOrder; break;
default:
if (!throwOnError) return null;
throw new XslTransformException(Res.Coll_UnsupportedOpt, pair[0]);
}
switch (optionValue) {
case "0": case "FALSE": options.SetFlag(flag, false); break;
case "1": case "TRUE" : options.SetFlag(flag, true ); break;
default:
if (!throwOnError) return null;
throw new XslTransformException(Res.Coll_UnsupportedOptVal, pair[0], pair[1]);
}
}
}
}
// upperfirst option is only meaningful when not ignore case
if (options.UpperFirst && options.IgnoreCase)
options.UpperFirst = false;
// other CompareOptions are only meaningful if Ordinal comparison is not being used
if (options.Ordinal) {
options.CompareOptions = CompareOptions.Ordinal;
options.UpperFirst = false;
}
// new cultureinfo based on alternate sorting option
if (sort != null && cultInfo != null) {
int lgid = GetLangID(cultInfo.LCID);
switch (sort) {
case "bopo":
if (lgid == zhTW) {
cultInfo = new CultureInfo(zhTWbopo);
}
break;
case "strk":
if (lgid == zhCN || lgid == zhHK || lgid == zhSG || lgid == zhMO) {
cultInfo = new CultureInfo(MakeLCID(cultInfo.LCID, /*Stroke*/ 0x02));
}
break;
case "uni":
if (lgid == jaJP || lgid == koKR) {
cultInfo = new CultureInfo(MakeLCID(cultInfo.LCID, /*Unicode*/ 0x01));
}
break;
case "phn":
if (lgid == deDE) {
cultInfo = new CultureInfo(deDEphon);
}
break;
case "tech":
if (lgid == huHU) {
cultInfo = new CultureInfo(huHUtech);
}
break;
case "mod":
// ka-GE(Georgian - Georgia) Modern Sort: 0x00010437
if (lgid == kaGE) {
cultInfo = new CultureInfo(kaGEmode);
}
break;
case "pron": case "dict": case "trad":
// es-ES(Spanish - Spain) Traditional: 0x0000040A
// They are removing 0x040a (Spanish Traditional sort) in NLS+.
// So if you create 0x040a, it's just like 0x0c0a (Spanish International sort).
// Thus I don't handle it differently.
break;
default:
if (!throwOnError) return null;
throw new XslTransformException(Res.Coll_UnsupportedSortOpt, sort);
}
}
return new XmlCollation(cultInfo, options);
}
//-----------------------------------------------
// Collection Support
//-----------------------------------------------
// Redefine Equals and GetHashCode methods, they are needed for UniqueList<XmlCollation>
public override bool Equals(object obj) {
if (this == obj) {
return true;
}
XmlCollation that = obj as XmlCollation;
return that != null &&
this.options == that.options &&
object.Equals(this.cultInfo, that.cultInfo);
}
public override int GetHashCode() {
int hashCode = this.options;
if (this.cultInfo != null) {
hashCode ^= this.cultInfo.GetHashCode();
}
return hashCode;
}
//-----------------------------------------------
// Serialization Support
//-----------------------------------------------
// Denotes the current thread locale
private const int LOCALE_CURRENT = -1;
internal void GetObjectData(BinaryWriter writer) {
// NOTE: For CultureInfo we serialize only LCID. It seems to suffice for our purposes.
Debug.Assert(this.cultInfo == null || this.cultInfo.Equals(new CultureInfo(this.cultInfo.LCID)),
"Cannot serialize CultureInfo correctly");
writer.Write(this.cultInfo != null ? this.cultInfo.LCID : LOCALE_CURRENT);
writer.Write(this.options);
}
internal XmlCollation(BinaryReader reader) {
int lcid = reader.ReadInt32();
this.cultInfo = (lcid != LOCALE_CURRENT) ? new CultureInfo(lcid) : null;
this.options = new Options(reader.ReadInt32());
this.compops = options.CompareOptions;
}
//-----------------------------------------------
// Compare Properties
//-----------------------------------------------
internal bool UpperFirst {
get { return this.options.UpperFirst; }
}
internal bool EmptyGreatest {
get { return this.options.EmptyGreatest; }
}
internal bool DescendingOrder {
get { return this.options.DescendingOrder; }
}
internal CultureInfo Culture {
get {
// Use default thread culture if this.cultinfo = null
if (this.cultInfo == null)
return CultureInfo.CurrentCulture;
return this.cultInfo;
}
}
//-----------------------------------------------
//
//-----------------------------------------------
/// <summary>
/// Create a sort key that can be compared quickly with other keys.
/// </summary>
internal XmlSortKey CreateSortKey(string s) {
SortKey sortKey;
byte[] bytesKey;
int idx;
//
sortKey = Culture.CompareInfo.GetSortKey(s, this.compops);
// Create an XmlStringSortKey using the SortKey if possible
#if DEBUG
// In debug-only code, test other code path more frequently
if (!UpperFirst && DescendingOrder)
return new XmlStringSortKey(sortKey, DescendingOrder);
#else
if (!UpperFirst)
return new XmlStringSortKey(sortKey, DescendingOrder);
#endif
// Get byte buffer from SortKey and modify it
bytesKey = sortKey.KeyData;
if (UpperFirst && bytesKey.Length != 0) {
// By default lower-case is always sorted first for any locale (verified by empirical testing).
// In order to place upper-case first, invert the case weights in the generated sort key.
// Skip to case weight section (3rd weight section)
idx = 0;
while (bytesKey[idx] != 1)
idx++;
do {
idx++;
}
while (bytesKey[idx] != 1);
// Invert all case weights (including terminating 0x1)
do {
idx++;
bytesKey[idx] ^= 0xff;
}
while (bytesKey[idx] != 0xfe);
}
return new XmlStringSortKey(bytesKey, DescendingOrder);
}
#if not_used
/// <summary>
/// Compare two strings with each other. Return <0 if str1 sorts before str2, 0 if they're equal, and >0
/// if str1 sorts after str2.
/// </summary>
internal int Compare(string str1, string str2) {
CultureInfo cultinfo = Culture;
int result;
if (this.options.Ordinal) {
result = string.CompareOrdinal(str1, str2);
if (result < 0) result = -1;
else if (result > 0) result = 1;
}
else if (UpperFirst) {
// First compare case-insensitive, then break ties by considering case
result = cultinfo.CompareInfo.Compare(str1, str2, this.compops | CompareOptions.IgnoreCase);
if (result == 0)
result = -cultinfo.CompareInfo.Compare(str1, str2, this.compops);
}
else {
result = cultinfo.CompareInfo.Compare(str1, str2, this.compops);
}
if (DescendingOrder)
result = -result;
return result;
}
/// <summary>
/// Return the index of str1 in str2, or -1 if str1 is not a substring of str2.
/// </summary>
internal int IndexOf(string str1, string str2) {
return Culture.CompareInfo.IndexOf(str1, str2, this.compops);
}
/// <summary>
/// Return true if str1 ends with str2.
/// </summary>
internal bool IsSuffix(string str1, string str2) {
if (this.options.Ordinal){
if (str1.Length < str2.Length) {
return false;
} else {
return String.CompareOrdinal(str1, str1.Length - str2.Length, str2, 0, str2.Length) == 0;
}
}
return Culture.CompareInfo.IsSuffix (str1, str2, this.compops);
}
/// <summary>
/// Return true if str1 starts with str2.
/// </summary>
internal bool IsPrefix(string str1, string str2) {
if (this.options.Ordinal) {
if (str1.Length < str2.Length) {
return false;
} else {
return String.CompareOrdinal(str1, 0, str2, 0, str2.Length) == 0;
}
}
return Culture.CompareInfo.IsPrefix (str1, str2, this.compops);
}
#endif
//-----------------------------------------------
// Helper Functions
//-----------------------------------------------
private static int MakeLCID(int langid, int sortid) {
return (langid & 0xffff) | ((sortid & 0xf) << 16);
}
private static int GetLangID(int lcid) {
return (lcid & 0xffff);
}
}
}