diff --git a/CHANGELOG.md b/CHANGELOG.md index e2fda163..f058b879 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,17 +24,18 @@ Unreleased - Adds all UniMorph languages to morphology. (\#393) - Added `data/covering_grammar/tsv/fre_latn_phonemic.tsv` (\#398) - Added `data/covering_grammar/lib/make_test_file.py` (\#396, \#399) -- Scraped Komi-Zyrian (`kpv`). (\#400) -- Scraped Makasar (`mak`). (\#415, #419) -- Scraped Zou (`zom`). (\#421) -- Scraped Wiyot (`wiy`). (\#422) -- Scrape Sidamo (`sid`). (\#423) -- Scraped Central Atlas Tamazight (`tzm`). (\#429) -- Scraped Chibcha (`chb`). (\#430) -- Scraped Kashmiri (`kas`). (\#431) -- Added Malayalam (`mal`) scrape. (\#434) -- Configuration and initial scrape for Dhivehi (`div`, Maldivian). (\#437) -- Gujarati scrape (`guj`). (\#445) +- Added Komi-Zyrian (`kpv`). (\#400) +- Added Makasar (`mak`). (\#415, #419) +- Added Zou (`zom`). (\#421) +- Added Wiyot (`wiy`). (\#422) +- Added Sidamo (`sid`). (\#423) +- Added Central Atlas Tamazight (`tzm`). (\#429) +- Added Chibcha (`chb`). (\#430) +- Added Kashmiri (`kas`). (\#431) +- Added Malayalam (`mal`). (\#434) +- Added Dhivehi (`div`). (\#437) +- Added Gujarati (`guj`). (\#445) +- Added Karelian (`krl`). (\#447) #### Changed diff --git a/data/scrape/README.md b/data/scrape/README.md index 12f83079..08be8468 100644 --- a/data/scrape/README.md +++ b/data/scrape/README.md @@ -171,6 +171,7 @@ | [TSV](tsv/kor_hang_narrow.tsv) | kor | Korean | Korean | Hangul | | False | Narrow | False | 16,402 | | [TSV](tsv/kor_hang_narrow_filtered.tsv) | kor | Korean | Korean | Hangul | | True | Narrow | False | 14,141 | | [TSV](tsv/kpv_cyrl_broad.tsv) | kpv | Komi-Zyrian | Komi-Zyrian | Cyrillic | | False | Broad | True | 321 | +| [TSV](tsv/krl_latn_broad.tsv) | krl | Karelian | Karelian | Latin | | False | Broad | True | 106 | | [TSV](tsv/kxd_latn_broad.tsv) | kxd | Brunei | Brunei Malay | Latin | | False | Broad | True | 346 | | [TSV](tsv/lad_latn_broad.tsv) | lad | Ladino | Ladino | Latin | | False | Broad | True | 101 | | [TSV](tsv/lao_laoo_broad.tsv) | lao | Lao | Lao | Lao | | False | Broad | False | 513 | diff --git a/data/scrape/lib/languages.json b/data/scrape/lib/languages.json index 715df443..94bcda79 100644 --- a/data/scrape/lib/languages.json +++ b/data/scrape/lib/languages.json @@ -865,7 +865,10 @@ "iso639_name": "Karelian", "wiktionary_name": "Karelian", "wiktionary_code": "krl", - "casefold": true + "casefold": true, + "script": { + "latn": "Latin" + } }, "kas": { "iso639_name": "Kashmiri", diff --git a/data/scrape/tsv/krl_latn_broad.tsv b/data/scrape/tsv/krl_latn_broad.tsv new file mode 100644 index 00000000..a68b5b69 --- /dev/null +++ b/data/scrape/tsv/krl_latn_broad.tsv @@ -0,0 +1,106 @@ +aberi ɑ b e r i +aiga ɑ i̯ ɡ ɑ +aika ɑ i̯ k ɑ +akottuo ɑ k o tː u o̯ +ativo ɑ t i v o +eklen e k l e n +hammas h ɑ mː ɑ s +hauki h ɑ u̯ k i +hiän h i æ̯ n +homeh h o m e h +hyö h y ø̯ +hämehikki h æ m e h i kː i +höperö h ø p e r ø +igä i ɡ æ +ikä i k æ +istuo i s t u o̯ +iäni i æ̯ n i +iče i t͡ʃ e +iččeh i t͡ʃː e h +iččeni i t͡ʃː e n i +iččes i t͡ʃː e s +j j +jalga j ɑ l ɡ ɑ +jalka j ɑ l k ɑ +jänis j æ n i s +kaheksan k ɑ h e k s ɑ n +kakla k ɑ k l ɑ +kaksi k ɑ k s i +kaunis k ɑ u̯ n i s +ken k e n +kennih k e nː i h +kirja k i r j ɑ +koira k o i̯ r ɑ +kolmas k o l m ɑ s +kolme k o l m e +ku k u +kukko k u kː o +kumpani k u m p ɑ n i +kumpi k u m p i +kuusi k uː s i +kuva k u ʋ ɑ +kyly k y l y +kymmenen k y mː e n e n +kynkä k y n k æ +kätyt k æ t y t +käyvvä k æ y̯ ʋː æ +laksi l ɑ k s i +laukku l ɑ u̯ kː u +lukku l u kː u +maido m ɑ i̯ d o +maito m ɑ i̯ t o +mehiläini m e h i l æ i̯ n i +mennä m e nː æ +meččo m e t͡ʃː o +mi m i +mie m i e̯ +miljardi m i l j a r d i +miljoona m i l j oː n ɑ +minih m i n i h +myö m y ø̯ +mägrä m æ ɡ r æ +mäkrä m æ k r æ +männä m æ nː æ +ne n e +nellä n e lː æ +niken n i k e n +nimi n i m i +nuo n u o̯ +nämä n æ m æ +očča o t͡ʃː ɑ +palava p ɑ l ɑ v ɑ +pereh p e r e h +piirdiä p iː r d i æ +pitkä p i t k æ +poiga p o i̯ ɡ ɑ +poika p o i̯ k ɑ +pyssy p y sː y +rahvas r ɑ h v ɑ s +riähkä r i æ̯ h k æ +sana s ɑ n ɑ +sanakirja s ɑ n ɑ k i r j ɑ +sata s ɑ t ɑ +se s e +seiččemen s e i̯ t͡ʃː e m e n +sie s i e̯ +toini t o i̯ n i +tuhat t u h ɑ t +tuli t u l i +tuo t u o̯ +tuuli t uː l i +työ t y ø̯ +tämmöni t æ mː ø n i +tämä t æ m æ +törky t ø r k y +viijes v iː j e s +viisi v iː s i +väkövä v æ k ø v æ +yheksän y h e k s æ n +yksi y k s i +yksitoista y k s i t o i̯ s t ɑ +yö y̯ ø +ämmö æ mː ø +č t͡ʃ +šada ʃ ɑ d ɑ +šana ʃ ɑ n ɑ +šanakirja ʃ ɑ n ɑ k i r j ɑ diff --git a/data/scrape/tsv_summary.tsv b/data/scrape/tsv_summary.tsv index 55b52ecb..f7a99ede 100644 --- a/data/scrape/tsv_summary.tsv +++ b/data/scrape/tsv_summary.tsv @@ -169,6 +169,7 @@ kmr_latn_broad.tsv kmr Northern Kurdish Northern Kurdish Latin False Broad Fals kor_hang_narrow.tsv kor Korean Korean Hangul False Narrow False 16402 kor_hang_narrow_filtered.tsv kor Korean Korean Hangul True Narrow False 14141 kpv_cyrl_broad.tsv kpv Komi-Zyrian Komi-Zyrian Cyrillic False Broad True 321 +krl_latn_broad.tsv krl Karelian Karelian Latin False Broad True 106 kxd_latn_broad.tsv kxd Brunei Brunei Malay Latin False Broad True 346 lad_latn_broad.tsv lad Ladino Ladino Latin False Broad True 101 lao_laoo_broad.tsv lao Lao Lao Lao False Broad False 513