Skip to content

Commit

Permalink
Complete Inflector transliterations.
Browse files Browse the repository at this point in the history
  • Loading branch information
euromark committed Apr 23, 2014
1 parent 466714d commit 7e6bc48
Show file tree
Hide file tree
Showing 2 changed files with 206 additions and 47 deletions.
116 changes: 116 additions & 0 deletions lib/Cake/Test/Case/Utility/InflectorTest.php
Expand Up @@ -30,6 +30,108 @@
*/
class InflectorTest extends CakeTestCase {

/**
* A list of chars to test transliteration.
*
* @var array
*/
public static $maps = array (
'de' => array ( /* German */
'Ä' => 'Ae', 'Ö' => 'Oe', 'Ü' => 'Ue', 'ä' => 'ae', 'ö' => 'oe', 'ü' => 'ue', 'ß' => 'ss',
'ẞ' => 'SS'
),
'latin' => array (
'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Å' => 'A', 'Ă' => 'A', 'Æ' => 'AE', 'Ç' =>
'C', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I', 'Î' => 'I',
'Ï' => 'I', 'Ð' => 'D', 'Ñ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ő' => 'O', 'Ø' => 'O',
'Ș' => 'S', 'Ț' => 'T', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ű' => 'U',
'Ý' => 'Y', 'Þ' => 'TH', 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a',
'å' => 'a', 'ă' => 'a', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e',
'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ð' => 'd', 'ñ' => 'n', 'ò' => 'o', 'ó' =>
'o', 'ô' => 'o', 'õ' => 'o', 'ő' => 'o', 'ø' => 'o', 'ș' => 's', 'ț' => 't', 'ù' => 'u', 'ú' => 'u',
'û' => 'u', 'ű' => 'u', 'ý' => 'y', 'þ' => 'th', 'ÿ' => 'y'
),
'el' => array ( /* Greek */
'α' => 'a', 'β' => 'b', 'γ' => 'g', 'δ' => 'd', 'ε' => 'e', 'ζ' => 'z', 'η' => 'h', 'θ' => 'th',
'ι' => 'i', 'κ' => 'k', 'λ' => 'l', 'μ' => 'm', 'ν' => 'n', 'ξ' => 'x', 'ο' => 'o', 'π' => 'p',
'ρ' => 'r', 'σ' => 's', 'τ' => 't', 'υ' => 'y', 'φ' => 'f', 'χ' => 'x', 'ψ' => 'ps', 'ω' => 'w',
'ά' => 'a', 'έ' => 'e', 'ί' => 'i', 'ό' => 'o', 'ύ' => 'y', 'ή' => 'h', 'ώ' => 'w', 'ς' => 's',
'ϊ' => 'i', 'ΰ' => 'y', 'ϋ' => 'y', 'ΐ' => 'i',
'Α' => 'A', 'Β' => 'B', 'Γ' => 'G', 'Δ' => 'D', 'Ε' => 'E', 'Ζ' => 'Z', 'Η' => 'H', 'Θ' => 'TH',
'Ι' => 'I', 'Κ' => 'K', 'Λ' => 'L', 'Μ' => 'M', 'Ν' => 'N', 'Ξ' => 'X', 'Ο' => 'O', 'Π' => 'P',
'Ρ' => 'R', 'Σ' => 'S', 'Τ' => 'T', 'Υ' => 'Y', 'Φ' => 'F', 'Χ' => 'X', 'Ψ' => 'PS', 'Ω' => 'W',
'Ά' => 'A', 'Έ' => 'E', 'Ί' => 'I', 'Ό' => 'O', 'Ύ' => 'Y', 'Ή' => 'H', 'Ώ' => 'W', 'Ϊ' => 'I',
'Ϋ' => 'Y'
),
'tr' => array ( /* Turkish */
'ş' => 's', 'Ş' => 'S', 'ı' => 'i', 'İ' => 'I', 'ç' => 'c', 'Ç' => 'C', 'ğ' => 'g', 'Ğ' => 'G'
),
'ru' => array ( /* Russian */
'а' => 'a', 'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd', 'е' => 'e', 'ё' => 'yo', 'ж' => 'zh',
'з' => 'z', 'и' => 'i', 'й' => 'j', 'к' => 'k', 'л' => 'l', 'м' => 'm', 'н' => 'n', 'о' => 'o',
'п' => 'p', 'р' => 'r', 'с' => 's', 'т' => 't', 'у' => 'u', 'ф' => 'f', 'х' => 'h', 'ц' => 'c',
'ч' => 'ch', 'ш' => 'sh', 'щ' => 'sh', 'ы' => 'y', 'э' => 'e', 'ю' => 'yu',
'я' => 'ya',
'А' => 'A', 'Б' => 'B', 'В' => 'V', 'Г' => 'G', 'Д' => 'D', 'Е' => 'E', 'Ё' => 'Yo', 'Ж' => 'Zh',
'З' => 'Z', 'И' => 'I', 'Й' => 'J', 'К' => 'K', 'Л' => 'L', 'М' => 'M', 'Н' => 'N', 'О' => 'O',
'П' => 'P', 'Р' => 'R', 'С' => 'S', 'Т' => 'T', 'У' => 'U', 'Ф' => 'F', 'Х' => 'H', 'Ц' => 'C',
'Ч' => 'Ch', 'Ш' => 'Sh', 'Щ' => 'Sh', 'Ы' => 'Y', 'Э' => 'E', 'Ю' => 'Yu',
'Я' => 'Ya',
),
'uk' => array ( /* Ukrainian */
'Є' => 'Ye', 'І' => 'I', 'Ї' => 'Yi', 'Ґ' => 'G', 'є' => 'ye', 'і' => 'i', 'ї' => 'yi', 'ґ' => 'g'
),
'cs' => array ( /* Czech */
'č' => 'c', 'ď' => 'd', 'ě' => 'e', 'ň' => 'n', 'ř' => 'r', 'š' => 's', 'ť' => 't', 'ů' => 'u',
'ž' => 'z', 'Č' => 'C', 'Ď' => 'D', 'Ě' => 'E', 'Ň' => 'N', 'Ř' => 'R', 'Š' => 'S', 'Ť' => 'T',
'Ů' => 'U', 'Ž' => 'Z'
),
'pl' => array ( /* Polish */
'ą' => 'a', 'ć' => 'c', 'ę' => 'e', 'ł' => 'l', 'ń' => 'n', 'ó' => 'o', 'ś' => 's', 'ź' => 'z',
'ż' => 'z', 'Ą' => 'A', 'Ć' => 'C', 'Ł' => 'L', 'Ń' => 'N', 'Ó' => 'O', 'Ś' => 'S',
'Ź' => 'Z', 'Ż' => 'Z'
),
'ro' => array ( /* Romanian */
'ă' => 'a', 'â' => 'a', 'î' => 'i', 'ș' => 's', 'ț' => 't', 'Ţ' => 'T', 'ţ' => 't'
),
'lv' => array ( /* Latvian */
'ā' => 'a', 'č' => 'c', 'ē' => 'e', 'ģ' => 'g', 'ī' => 'i', 'ķ' => 'k', 'ļ' => 'l', 'ņ' => 'n',
'š' => 's', 'ū' => 'u', 'ž' => 'z', 'Ā' => 'A', 'Č' => 'C', 'Ē' => 'E', 'Ģ' => 'G', 'Ī' => 'I',
'Ķ' => 'K', 'Ļ' => 'L', 'Ņ' => 'N', 'Š' => 'S', 'Ū' => 'U', 'Ž' => 'Z'
),
'lt' => array ( /* Lithuanian */
'ą' => 'a', 'č' => 'c', 'ę' => 'e', 'ė' => 'e', 'į' => 'i', 'š' => 's', 'ų' => 'u', 'ū' => 'u', 'ž' => 'z',
'Ą' => 'A', 'Č' => 'C', 'Ę' => 'E', 'Ė' => 'E', 'Į' => 'I', 'Š' => 'S', 'Ų' => 'U', 'Ū' => 'U', 'Ž' => 'Z'
),
'vn' => array ( /* Vietnamese */
'Á' => 'A', 'À' => 'A', 'Ả' => 'A', 'Ã' => 'A', 'Ạ' => 'A', 'Ă' => 'A', 'Ắ' => 'A', 'Ằ' => 'A', 'Ẳ' => 'A', 'Ẵ' => 'A', 'Ặ' => 'A', 'Â' => 'A', 'Ấ' => 'A', 'Ầ' => 'A', 'Ẩ' => 'A', 'Ẫ' => 'A', 'Ậ' => 'A',
'á' => 'a', 'à' => 'a', 'ả' => 'a', 'ã' => 'a', 'ạ' => 'a', 'ă' => 'a', 'ắ' => 'a', 'ằ' => 'a', 'ẳ' => 'a', 'ẵ' => 'a', 'ặ' => 'a', 'â' => 'a', 'ấ' => 'a', 'ầ' => 'a', 'ẩ' => 'a', 'ẫ' => 'a', 'ậ' => 'a',
'É' => 'E', 'È' => 'E', 'Ẻ' => 'E', 'Ẽ' => 'E', 'Ẹ' => 'E', 'Ê' => 'E', 'Ế' => 'E', 'Ề' => 'E', 'Ể' => 'E', 'Ễ' => 'E', 'Ệ' => 'E',
'é' => 'e', 'è' => 'e', 'ẻ' => 'e', 'ẽ' => 'e', 'ẹ' => 'e', 'ê' => 'e', 'ế' => 'e', 'ề' => 'e', 'ể' => 'e', 'ễ' => 'e', 'ệ' => 'e',
'Í' => 'I', 'Ì' => 'I', 'Ỉ' => 'I', 'Ĩ' => 'I', 'Ị' => 'I', 'í' => 'i', 'ì' => 'i', 'ỉ' => 'i', 'ĩ' => 'i', 'ị' => 'i',
'Ó' => 'O', 'Ò' => 'O', 'Ỏ' => 'O', 'Õ' => 'O', 'Ọ' => 'O', 'Ô' => 'O', 'Ố' => 'O', 'Ồ' => 'O', 'Ổ' => 'O', 'Ỗ' => 'O', 'Ộ' => 'O', 'Ơ' => 'O', 'Ớ' => 'O', 'Ờ' => 'O', 'Ở' => 'O', 'Ỡ' => 'O', 'Ợ' => 'O',
'ó' => 'o', 'ò' => 'o', 'ỏ' => 'o', 'õ' => 'o', 'ọ' => 'o', 'ô' => 'o', 'ố' => 'o', 'ồ' => 'o', 'ổ' => 'o', 'ỗ' => 'o', 'ộ' => 'o', 'ơ' => 'o', 'ớ' => 'o', 'ờ' => 'o', 'ở' => 'o', 'ỡ' => 'o', 'ợ' => 'o',
'Ú' => 'U', 'Ù' => 'U', 'Ủ' => 'U', 'Ũ' => 'U', 'Ụ' => 'U', 'Ư' => 'U', 'Ứ' => 'U', 'Ừ' => 'U', 'Ử' => 'U', 'Ữ' => 'U', 'Ự' => 'U',
'ú' => 'u', 'ù' => 'u', 'ủ' => 'u', 'ũ' => 'u', 'ụ' => 'u', 'ư' => 'u', 'ứ' => 'u', 'ừ' => 'u', 'ử' => 'u', 'ữ' => 'u', 'ự' => 'u',
'Ý' => 'Y', 'Ỳ' => 'Y', 'Ỷ' => 'Y', 'Ỹ' => 'Y', 'Ỵ' => 'Y', 'ý' => 'y', 'ỳ' => 'y', 'ỷ' => 'y', 'ỹ' => 'y', 'ỵ' => 'y',
'Đ' => 'D', 'đ' => 'd'
),
'ar' => array ( /* Arabic */
'أ' => 'a', 'ب' => 'b', 'ت' => 't', 'ث' => 'th', 'ج' => 'g', 'ح' => 'h', 'خ' => 'kh', 'د' => 'd',
'ذ' => 'th', 'ر' => 'r', 'ز' => 'z', 'س' => 's', 'ش' => 'sh', 'ص' => 's', 'ض' => 'd', 'ط' => 't',
'ظ' => 'th', 'ع' => 'aa', 'غ' => 'gh', 'ف' => 'f', 'ق' => 'k', 'ك' => 'k', 'ل' => 'l', 'م' => 'm',
'ن' => 'n', 'ه' => 'h', 'و' => 'o', 'ي' => 'y'
),
'sr' => array ( /* Serbian */
'ђ' => 'dj', 'ј' => 'j', 'љ' => 'lj', 'њ' => 'nj', 'ћ' => 'c', 'џ' => 'dz',
'Ђ' => 'Dj', 'Ј' => 'j', 'Љ' => 'Lj', 'Њ' => 'Nj', 'Ћ' => 'C', 'Џ' => 'Dz',
),
'az' => array ( /* Azerbaijani */
'ç' => 'c', 'ə' => 'e', 'ğ' => 'g', 'ı' => 'i', 'ş' => 's',
'Ç' => 'C', 'Ə' => 'E', 'Ğ' => 'G', 'İ' => 'I', 'Ş' => 'S',
)
);

/**
* tearDown
*
Expand Down Expand Up @@ -259,6 +361,20 @@ public function testInflectorSlug() {
$this->assertEquals('non_breaking_space', $result);
}

/**
* Test slug() with a complete list of special chars.
*
* @return void
*/
public function testInflectorSlugCharList() {
foreach (self::$maps as $language => $list) {
foreach ($list as $from => $to) {
$result = Inflector::slug($from);
$this->assertEquals($to, $result, $from . ' (' . $language . ') should be ' . $to . ' - but is ' . $result);
}
}
}

/**
* testInflectorSlugWithMap method
*
Expand Down
137 changes: 90 additions & 47 deletions lib/Cake/Utility/Inflector.php
Expand Up @@ -176,56 +176,99 @@ class Inflector {
* @var array
*/
protected static $_transliteration = array(
'/ä|æ|ǽ/' => 'ae',
'/ö|œ/' => 'oe',
'/ü/' => 'ue',
'/Ä/' => 'Ae',
'/Ü/' => 'Ue',
'/Ö/' => 'Oe',
'/À|Á|Â|Ã|Å|Ǻ|Ā|Ă|Ą|Ǎ/' => 'A',
'/à|á|â|ã|å|ǻ|ā|ă|ą|ǎ|ª/' => 'a',
'/Ç|Ć|Ĉ|Ċ|Č/' => 'C',
'/ç|ć|ĉ|ċ|č/' => 'c',
'/Ð|Ď|Đ/' => 'D',
'/ð|ď|đ/' => 'd',
'/È|É|Ê|Ë|Ē|Ĕ|Ė|Ę|Ě/' => 'E',
'/è|é|ê|ë|ē|ĕ|ė|ę|ě/' => 'e',
'/Ĝ|Ğ|Ġ|Ģ/' => 'G',
'/ĝ|ğ|ġ|ģ/' => 'g',
'/Ĥ|Ħ/' => 'H',
'/ĥ|ħ/' => 'h',
'/Ì|Í|Î|Ï|Ĩ|Ī|Ĭ|Ǐ|Į|İ/' => 'I',
'/ì|í|î|ï|ĩ|ī|ĭ|ǐ|į|ı/' => 'i',
'/Ĵ/' => 'J',
'/ĵ/' => 'j',
'/Ķ/' => 'K',
'/ķ/' => 'k',
'/Ĺ|Ļ|Ľ|Ŀ|Ł/' => 'L',
'/ĺ|ļ|ľ|ŀ|ł/' => 'l',
'/Ñ|Ń|Ņ|Ň/' => 'N',
'/ñ|ń|ņ|ň|ʼn/' => 'n',
'/Ò|Ó|Ô|Õ|Ō|Ŏ|Ǒ|Ő|Ơ|Ø|Ǿ/' => 'O',
'/ò|ó|ô|õ|ō|ŏ|ǒ|ő|ơ|ø|ǿ|º/' => 'o',
'/Ŕ|Ŗ|Ř/' => 'R',
'/ŕ|ŗ|ř/' => 'r',
'/Ś|Ŝ|Ş|Ș|Š/' => 'S',
'/ś|ŝ|ş|ș|š|ſ/' => 's',
'/Ţ|Ț|Ť|Ŧ/' => 'T',
'/ţ|ț|ť|ŧ/' => 't',
'/Ù|Ú|Û|Ũ|Ū|Ŭ|Ů|Ű|Ų|Ư|Ǔ|Ǖ|Ǘ|Ǚ|Ǜ/' => 'U',
'/ù|ú|û|ũ|ū|ŭ|ů|ű|ų|ư|ǔ|ǖ|ǘ|ǚ|ǜ/' => 'u',
'/Ý|Ÿ|Ŷ/' => 'Y',
'/ý|ÿ|ŷ/' => 'y',
'/Ŵ/' => 'W',
'/ŵ/' => 'w',
'/Ź|Ż|Ž/' => 'Z',
'/ź|ż|ž/' => 'z',
'/À|Á|Â|Ã|Å|Ǻ|Ā|Ă|Ą|Ǎ|Α|Ά|А|Ả|Ạ|Ắ|Ằ|Ẳ|Ẵ|Ặ|Ấ|Ầ|Ẩ|Ẫ|Ậ/' => 'A',
'/Æ|Ǽ/' => 'AE',
'/ß/' => 'ss',
'/Ä/' => 'Ae',
'/Β|Б/' => 'B',
'/Ç|Ć|Ĉ|Ċ|Č|Ц|Ћ/' => 'C',
'/Ч/' => 'Ch',
'/Ð|Ď|Đ|Δ|Д/' => 'D',
'/Ђ/' => 'Dj',
'/Џ/' => 'Dz',
'/È|É|Ê|Ë|Ē|Ĕ|Ė|Ę|Ě|Ε|Έ|Е|Э|Ẻ|Ẽ|Ẹ|Ế|Ề|Ể|Ễ|Ệ|Ə/' => 'E',
'/Φ|Ф/' => 'F',
'/Ĝ|Ğ|Ġ|Ģ|Γ|Г|Ґ/' => 'G',
'/Ĥ|Ħ|Η|Ή|Х/' => 'H',
'/Ì|Í|Î|Ï|Ĩ|Ī|Ĭ|Ǐ|Į|İ|Ι|Ί|Ϊ|И|І|Ỉ|Ị/' => 'I',
'/IJ/' => 'IJ',
'/ij/' => 'ij',
'/Ĵ|Й/' => 'J',
'/Ķ|Κ|К/' => 'K',
'/Ĺ|Ļ|Ľ|Ŀ|Ł|Λ|Л/' => 'L',
'/Љ/' => 'Lj',
'/Μ|М/' => 'M',
'/Ñ|Ń|Ņ|Ň|Ν|Н/' => 'N',
'/Њ/' => 'Nj',
'/Ò|Ó|Ô|Õ|Ō|Ŏ|Ǒ|Ő|Ơ|Ø|Ǿ|Ο|Ό|О|Ỏ|Ọ|Ố|Ồ|Ổ|Ỗ|Ộ|Ớ|Ờ|Ở|Ỡ|Ợ/' => 'O',
'/Œ/' => 'OE',
'/ƒ/' => 'f'
'/Ö/' => 'Oe',
'/Π|П/' => 'P',
'/Ψ/' => 'PS',
'/Ŕ|Ŗ|Ř|Ρ|Р/' => 'R',
'/Ś|Ŝ|Ş|Ș|Š|Σ|С/' => 'S',
'/ẞ/' => 'SS',
'/Ш|Щ/' => 'Sh',
'/Ţ|Ț|Ť|Ŧ|Τ|Т/' => 'T',
'/Þ|Θ/' => 'TH',
'/Ù|Ú|Û|Ũ|Ū|Ŭ|Ů|Ű|Ų|Ư|Ǔ|Ǖ|Ǘ|Ǚ|Ǜ|У|Ủ|Ụ|Ứ|Ừ|Ử|Ữ|Ự/' => 'U',
'/Ü/' => 'Ue',
'/В/' => 'V',
'/Ŵ|Ω|Ώ/' => 'W',
'/Ξ|Χ/' => 'X',
'/Ý|Ÿ|Ŷ|Υ|Ύ|Ϋ|Ы|Ỳ|Ỷ|Ỹ|Ỵ/' => 'Y',
'/Я/' => 'Ya',
'/Є/' => 'Ye',
'/Ї/' => 'Yi',
'/Ё/' => 'Yo',
'/Ю/' => 'Yu',
'/Ź|Ż|Ž|Ζ|З/' => 'Z',
'/Ж/' => 'Zh',
'/à|á|â|ã|å|ǻ|ā|ă|ą|ǎ|ª|α|ά|а|ả|ạ|ắ|ằ|ẳ|ẵ|ặ|ấ|ầ|ẩ|ẫ|ậ|أ/' => 'a',
'/ع/' => 'aa',
'/ä|æ|ǽ/' => 'ae',
'/β|б|ب/' => 'b',
'/ç|ć|ĉ|ċ|č|ц/ћ' => 'c',
'/ч/' => 'ch',
'/ð|ď|đ|δ|д|د|ض/' => 'd',
'/ђ/' => 'dj',
'/џ/' => 'dz',
'/è|é|ê|ë|ē|ĕ|ė|ę|ě|ε|έ|е|э|ẻ|ẽ|ẹ|ế|ề|ể|ễ|ệ|ə/' => 'e',
'/ƒ|φ|ф|ف/' => 'f',
'/ĝ|ğ|ġ|ģ|γ|г|ґ|ج/' => 'g',
'/غ/' => 'gh',
'/ĥ|ħ|η|ή|х|ح|ه/' => 'h',
'/ì|í|î|ï|ĩ|ī|ĭ|ǐ|į|ı|ι|ί|ϊ|ΐ|и|і|ỉ|ị/' => 'i',
'/ij/' => 'ij',
'/ĵ|й|ј|Ј/' => 'j',
'/ķ|κ|к|ق|ك/' => 'k',
'/خ/' => 'kh',
'/ĺ|ļ|ľ|ŀ|ł|λ|л|ل/' => 'l',
'/љ/' => 'lj',
'/μ|м|م/' => 'm',
'/ñ|ń|ņ|ň|ʼn|ν|н|ن/' => 'n',
'/њ/' => 'nj',
'/ò|ó|ô|õ|ō|ŏ|ǒ|ő|ơ|ø|ǿ|º|ο|ό|о|ỏ|ọ|ố|ồ|ổ|ỗ|ộ|ớ|ờ|ở|ỡ|ợ|و/' => 'o',
'/ö|œ/' => 'oe',
'/π|п/' => 'p',
'/ψ/' => 'ps',
'/ŕ|ŗ|ř|ρ|р|ر/' => 'r',
'/ś|ŝ|ş|ș|š|ſ|σ|ς|с|س|ص/' => 's',
'/ш|щ|ش/' => 'sh',
'/ß/' => 'ss',
'/ţ|ț|ť|ŧ|τ|т|ت|ط/' => 't',
'/þ|θ|ث|ذ|ظ/' => 'th',
'/ù|ú|û|ũ|ū|ŭ|ů|ű|ų|ư|ǔ|ǖ|ǘ|ǚ|ǜ|у|ủ|ụ|ứ|ừ|ử|ữ|ự/' => 'u',
'/ü/' => 'ue',
'/в/' => 'v',
'/ŵ|ω|ώ/' => 'w',
'/ξ|χ/' => 'x',
'/ý|ÿ|ŷ|υ|ύ|ΰ|ϋ|ы|ỳ|ỷ|ỹ|ỵ|ي/' => 'y',
'/я/' => 'ya',
'/є/' => 'ye',
'/ї/' => 'yi',
'/ё/' => 'yo',
'/ю/' => 'yu',
'/ź|ż|ž|ζ|з|ز/' => 'z',
'/ж/' => 'zh',
);

/**
Expand Down

0 comments on commit 7e6bc48

Please sign in to comment.