# ICU / intl

In [103]:
out(function () {
    yield ['Blabla', 'üí©'];
}, $locales);

[
  "[34mcs_CZ[39m" => [
    "[32mBlabla: üí©[39m",
  ],
  "[34mja_JP[39m" => [
    "[32mBlabla: üí©[39m",
  ],
]

In [1]:
function canonicalize ($item) {
    if (is_array($item)) {
        return implode(', ', $item);
    }
    
    if (is_null($item)) {
        return 'null';
    }
    
    if (is_bool($item)) {
        return $item ? 'true' : 'false';
    }
    
    if (is_int($item) || is_float($item)) {
        return "{$item}";
    }
    
    if (is_string($item)) {
        return $item;
    }
    
    return "";
}

function single_out(callable $function, ...$args) {
    $out = [];
    foreach ($function(...$args) as $item) {
        if (is_null($item[0])) {
            $out[] = canonicalize($item[1]);
            continue;
        }
        
        $key = canonicalize($item[0]);
        $value = canonicalize($item[1]);
        $out[] = "{$key}: {$value}";
    }
    return $out;
}

function out (callable $function, ?array $locales = null) {
    if (is_null($locales)) {
        return single_out($function);
    }
    
    $out = [];
    foreach ($locales as $locale) {
        $out[$locale] = single_out($function, $locale);
    }
    
    return $out;
}

$locales = ['cs_CZ', 'ja_JP'];

[
  "[32mcs_CZ[39m",
  "[32mja_JP[39m",
]

## √övod

Jistƒõ ji≈æ V√°s nƒõkdy potkala nƒõkter√° z¬†n√°sleduj√≠c√≠ch situac√≠:

* Na≈°li jste ƒåesko v¬†seznamu zem√≠ zahraban√© a≈æ kdesi za Zimbabwe.
* Zvolili jste ≈°patn√© datum, proto≈æe t√Ωden v¬†grafick√©m kalend√°≈ôi nezaƒç√≠nal pondƒõl√≠m.
* Nebyli jste si jisti, kter√° ƒç√°st data je den a kter√° mƒõs√≠c.
* Zkop√≠rovali jste ‚Äû2 soubor≈Ø‚Äú.
* P≈ôi≈°el V√°m e-mail nadepsan√Ω ‚ÄûByl(a) jste s¬†na≈°imi slu≈æbami spokojen(a)?‚Äú
* Nevƒõdƒõli jste, v¬†jak√©m ƒçasov√©m p√°smu jsou uvedeny ƒçasy v¬†j√≠zdn√≠m ≈ô√°du.
* M√≠sto popisk≈Ø tlaƒç√≠tek jste vidƒõli jen v√Ωpustku.
* Nechtƒõnƒõ jste zadali stovky m√≠sto jednotek kv≈Øli desetinn√© teƒçce m√≠sto ƒç√°rky.

Nejen to n√°m pom√°h√° vy≈ôe≈°it knihovna ICU a datab√°ze CLDR.

Spousta program√°tor≈Ø by byla r√°da, kdyby v≈°ichni na svƒõtƒõ pou≈æ√≠vali jednotn√Ω syst√©m prakticky v≈°eho, nejl√©pe i jen jedin√Ω jazyk. Nƒõkter√Ωm by snad staƒçilo, kdyby v≈°ichni aspo≈à pou≈æ√≠vali latinku a arabsk√© ƒç√≠slice. Jen≈æe‚Ä¶¬†lidi nezmƒõn√≠te a software dƒõl√°te pro nƒõ.

Podpora ciz√≠ch jazyk≈Ø je z√°sadn√≠. Nesm√≠ se st√°t, ≈æe rozd√≠ly v¬†jazyce, znakov√© sadƒõ ƒçi n√°rodn√≠m prost≈ôed√≠ zp≈Øsob√≠, ≈æe aplikace nebude fungovat. Dob≈ôe udƒõlan√° lokalizace a p≈ôizp≈Øsoben√≠ n√°rodn√≠mu prost≈ôed√≠, kter√© nen√≠ z√°sadn√≠ pro funkƒçnost, je dal≈°√≠m krokem: v√Ωraznƒõ zlep≈°uje to, jak je n√°m pou≈æ√≠v√°n√≠ aplikace p≈ô√≠jemn√©.

### P≈ô√≠klady

* Nejhor≈°√≠: Kv≈Øli rozd√≠lu v¬†nastaven√≠ oddƒõlovaƒç≈Ø tis√≠c≈Ø nep≈Øjde odeslat formul√°≈ô s¬†ƒç√°stkou.
* Nep≈ô√≠jemn√©: Nadpisy ƒçl√°nk≈Ø, kter√© nejsou latinkou, vytvo≈ô√≠ nesrozumiteln√© adresy: _------_ m√≠sto _privet_.
* Kosmetick√©: Datum se zobraz√≠ s¬†jin√Ωm po≈ôad√≠m jednotliv√Ωch souƒç√°st√≠.

### Co u≈æ jsem sly≈°el

* Sestavujte vƒõty tak, aby nebylo pot≈ôeba mƒõnit tvary slov.
* Nem≈Ø≈æeme ty rusk√© e-maily pos√≠lat napsan√© latinkou?

## IntlDateFormatter

In [2]:
out(function () {
    $dateFormat = IntlDateFormatter::LONG;
    $timeFormat = IntlDateFormatter::LONG;
    $timeZone = new DateTimeZone('Europe/Prague');
    $formatter = new IntlDateFormatter('cs_CZ', $dateFormat, $timeFormat, $timeZone);
    $dateTime = new DateTime;
    yield [null, $formatter->format($dateTime)];
}, $locales);

[
  "[34mcs_CZ[39m" => [
    "[32m20. ≈ô√≠jna 2018 5:50:08 SELƒå[39m",
  ],
  "[34mja_JP[39m" => [
    "[32m20. ≈ô√≠jna 2018 5:50:08 SELƒå[39m",
  ],
]

## NumberFormatter

In [14]:
out(function ($locale) {
    $formatter = new NumberFormatter($locale, NumberFormatter::DECIMAL);
    yield ['default', $formatter->format(123456789.0123456)];
    $formatter->setAttribute(NumberFormatter::MAX_FRACTION_DIGITS, 4);
    yield ['MAX_FRACTION_DIGITS', $formatter->format(123456789.0123456)];
}, $locales);

[
  "[34mcs_CZ[39m" => [
    "[32mdefault: 123¬†456¬†789,012[39m",
    "[32mMAX_FRACTION_DIGITS: 123¬†456¬†789,0123[39m",
  ],
  "[34mja_JP[39m" => [
    "[32mdefault: 123,456,789.012[39m",
    "[32mMAX_FRACTION_DIGITS: 123,456,789.0123[39m",
  ],
]

In [15]:
out(function ($locale) {
    $formatter = new NumberFormatter($locale, NumberFormatter::SPELLOUT);
    yield [null, $formatter->format(123456)];
}, $locales);

[
  "[34mcs_CZ[39m" => [
    "[32msto dvacet t≈ôi tis√≠c ƒçty≈ôi sta pades√°t ≈°est[39m",
  ],
  "[34mja_JP[39m" => [
    "[32mÂçÅ‰∫å‰∏á‰∏âÂçÉÂõõÁôæ‰∫îÂçÅÂÖ≠[39m",
  ],
]

In [5]:
out(function ($locale) {
    $formatter = new NumberFormatter($locale, NumberFormatter::CURRENCY);
    yield [null, $formatter->format(1234.5678)];
}, $locales);

[
  "[34mcs_CZ[39m" => [
    "[32m1¬†234,57¬†Kƒç[39m",
  ],
  "[34mja_JP[39m" => [
    "[32mÔø•1,235[39m",
  ],
]

## Collator

In [6]:
out(function () {
    $birds = ['ch≈ô√°stal', 'ƒç√≠≈æek', '≈•uh√Ωk', 'datel', 'ƒç√°p', 'turpan', 'cetie'];

    sort($birds);
    yield ['sort', $birds];
    
    $collator = new Collator('cs_CZ');
    $collator->sort($birds);

    yield ['Collator#sort', $birds];
});

[
  "[32msort: cetie, ch≈ô√°stal, datel, turpan, ƒç√°p, ƒç√≠≈æek, ≈•uh√Ωk[39m",
  "[32mCollator#sort: cetie, ƒç√°p, ƒç√≠≈æek, datel, ch≈ô√°stal, ≈•uh√Ωk, turpan[39m",
]

In [26]:
out(function () {
    $words = ['≈°arka', '≈°ƒÖla', 'itin', 'ypaƒç', 'suolas', 'ilgas'];
    $collator = new Collator('lt_LT');
    $collator->sort($words);
    
    yield [null, $words];
});

[
  "[32milgas, ypaƒç, itin, suolas, ≈°ƒÖla, ≈°arka[39m",
]

P≈ô√≠klad ze ≈æivota: Telefonn√≠ seznam nƒõkter√Ωch star≈°√≠ch Noki√≠ spr√°vnƒõ ≈ôadil *ch* a≈æ za *h*. Vyhled√°v√°n√≠ v≈°ak zpracov√°val postupnƒõ po jednotliv√Ωch p√≠smenech. Jm√©na obsahuj√≠c√≠ *ch* tak nebylo mo≈æn√© vyhledat zad√°n√≠m *ch* do vyhled√°vac√≠ho pole.

## MessageFormatter

U≈æ jsem sly≈°el i v√Ωzvy: tak ty vƒõty pi≈°te tak, aby se slova nemƒõnila v¬†z√°vislosti na ƒç√≠sle.

> Poƒçet jablek, kter√° mƒõla babka: 4.

Jako v√°≈ænƒõ?

In [7]:
out(function() {
    foreach ($counts = [5, 1.1] as $count) {
        yield ["{$count}", "Mƒõla babka $count jablek."];
    }
});

[
  "[32m5: Mƒõla babka 5 jablek.[39m",
  "[32m1.1: Mƒõla babka 1.1 jablek.[39m",
]

In [8]:
out(function () {
    $pattern = 'Mƒõla babka {count, number} {count, plural, one {jablko} '  .
                                                               'few {jablka} ' . 
                                                               'many {jablka} ' .
                                                               'other {jablek}}.';
    $formatter = new MessageFormatter('cs_CZ', $pattern);

    $counts = [1, 4, 5, 5.5, 0];
    foreach ($counts as $count) {
        yield ["{$count}", $formatter->format(['count' => $count])];
    }
});

[
  "[32m1: Mƒõla babka 1 jablko.[39m",
  "[32m4: Mƒõla babka 4 jablka.[39m",
  "[32m5: Mƒõla babka 5 jablek.[39m",
  "[32m5.5: Mƒõla babka 5,5 jablka.[39m",
  "[32m0: Mƒõla babka 0 jablek.[39m",
]

In [9]:
out(function() {
    $pattern = '{gender, select, m {{subject} mƒõl jablka.} ' .
                                'f {{subject} mƒõla jablka.} ' .
                                'n {{subject} mƒõlo jablka.} ' .
                                'other {{subject} si nezaslou≈æ√≠ jablka.}}';
    $formatter = new MessageFormatter('cs_CZ', $pattern);

    $examples = [['subject' => 'Dƒõdek', 'gender' => 'm'],
                 ['subject' => 'Babka', 'gender' => 'f'],
                 ['subject' => 'Vnouƒçe', 'gender' => 'n']];
    foreach ($examples as $example) {
        yield [$example, $formatter->format($example)];
    }
});

[
  "[32mDƒõdek, m: Dƒõdek mƒõl jablka.[39m",
  "[32mBabka, f: Babka mƒõla jablka.[39m",
  "[32mVnouƒçe, n: Vnouƒçe mƒõlo jablka.[39m",
]

## Normalizer

In [10]:
out(function () {
    $original = 'k' . 'u' . 'Ãä' . 'n' . 'Ãå';
    yield ['original', $original];
    yield ['original mb_strlen', mb_strlen($original)];
    yield ['original Normalizer::isNormalized', Normalizer::isNormalized($original)];

    $normalized = Normalizer::normalize($original);
    
    yield ['normalized', $normalized];
    yield ['normalized mb_strlen', mb_strlen($normalized)];
    yield ['normalized Normalizer::isNormalized', Normalizer::isNormalized($normalized)];
    
    yield ['==', $original == $normalized];
});

[
  "[32moriginal: k≈Ø≈à[39m",
  "[32moriginal mb_strlen: 5[39m",
  "[32moriginal Normalizer::isNormalized: false[39m",
  "[32mnormalized: k≈Ø≈à[39m",
  "[32mnormalized mb_strlen: 3[39m",
  "[32mnormalized Normalizer::isNormalized: true[39m",
  "[32m==: false[39m",
]

Narazil jsem na chybu v j√°d≈ôe Ruby: p≈ôi vytvo≈ôen√≠ souboru na souborov√©m syst√©mu HFS souborov√Ω syst√©m n√°zev souboru normalizuje, kdy≈æ se p≈ôed√° nenormalizovan√Ω. Ruby si ale ponechalo p≈Øvodn√≠ nenormalizovan√Ω n√°zev, pod kter√Ωm zapsan√Ω soubor nebylo mo≈æn√© nal√©zt.

## Transliterate

In [101]:
out(function () {
    $ids = ['uk-uk_Latn/BGN', 'ru-ru_Latn/BGN'];
    foreach ($ids as $id) {
        $transliterator = Transliterator::create($id);
        yield [$id, $transliterator->transliterate('–õ—É–≥–∞–Ω—Å–∫')];
    }

    $ids = ['Upper', 'tr-Upper'];
    foreach ($ids as $id) {
        $transliterator = Transliterator::create($id);
        yield [$id, $transliterator->transliterate('binbir')];
    }
});

[
  "[32muk-uk_Latn/BGN: Luhansk[39m",
  "[32mru-ru_Latn/BGN: Lugansk[39m",
  "[32mUpper: BINBIR[39m",
  "[32mtr-Upper: Bƒ∞NBƒ∞R[39m",
]

## IntlChar

In [11]:
out(function () {
    $chars = ['≈à', '‚ÑÉ', '„Åµ', 'Í∞®', 'üç∫'];
    foreach ($chars as $char) {
        yield [$char, IntlChar::charname($char)];
    }
});

[
  "[32m≈à: LATIN SMALL LETTER N WITH CARON[39m",
  "[32m‚ÑÉ: DEGREE CELSIUS[39m",
  "[32m„Åµ: HIRAGANA LETTER HU[39m",
  "[32mÍ∞®: HANGUL SYLLABLE GAELS[39m",
  "[32müç∫: BEER MUG[39m",
]

In [12]:
out(function () {
    $chars = ['≈á', '≈à', 'üç∫'];
    foreach ($chars as $char) {
        yield ["{$char} islower", IntlChar::islower($char)];
        yield ["{$char} isgraph", IntlChar::isgraph($char)];
    }
});

[
  "[32m≈á islower: false[39m",
  "[32m≈á isgraph: true[39m",
  "[32m≈à islower: true[39m",
  "[32m≈à isgraph: true[39m",
  "[32müç∫ islower: false[39m",
  "[32müç∫ isgraph: true[39m",
]

In [13]:
out(function () {
    $char = '¬†';
    $trimmed = trim($char);
    
    yield ['mb_strlen', mb_strlen($trimmed)];
    yield ['IntlChar::isWhiteSpace', IntlChar::isWhiteSpace($char)];
    yield ['IntlChar::isUWhiteSpace', IntlChar::isUWhiteSpace($char)];
});

[
  "[32mmb_strlen: 1[39m",
  "[32mIntlChar::isWhiteSpace: false[39m",
  "[32mIntlChar::isUWhiteSpace: true[39m",
]

# üßúüèø‚Äç‚ôÇÔ∏è