In [64]:
# Load the Excel file
import pandas as pd
path_data = r'../../UKR_DATA/'
path_rules = r'UKR_transliteration/ukrainian_transliteration_rules.xlsx'

transliteration_df = pd.read_excel(f"{path_data}{path_rules}")

# create a transliteration dictionary mapping the cyrillic to its latin counterpart, but not the exceptions
translit_dict = dict(zip(transliteration_df['Cyrillic'], transliteration_df['Latin1']))


In [65]:
# 1. Define a function that finds "ї" after Cyrillic vowels and replaces it with 'yi'
def find_ї_after_vowels(string):
    # List all the Ukrainian vowels    
    list_vowels = ['а', 'е', 'є', 'и', 'i', 'ї', 'й', 'о', 'у', 'ю', 'я']
    
    # create a list of all the characters comprising the string
    char_list = list(string)
    
    # loop through each character in the list and when 'ї' follows a vowel 
    # defined in the list_vowels change it to 'yi'
    for count, value in enumerate(char_list):
        if char_list[count-1] in list_vowels and char_list[count] == 'ї':
            char_list[count] = 'yi'
        
    # rebuild the string
    modified_str = "".join(char_list)

    # return the modified string
    return modified_str

# 2. Define a function that deals with all the other exceptions and then maps the characters that remain to their appropriate Latin counterpart                   
def transliterate_the_rest(string):
    new_str = ''

    # change 'зг' in 'zgh' 
    if 'зг' in string:
        string = string.replace('зг', 'zgh')

    # if a string starts with 'є' change it into 'ye'
    if string[0] == 'є':
        new_str = 'ye'
        new_str = new_str + string[1:]
        string = new_str
    
    # if a string starts with 'й' change it into 'y'
    elif string[0] == 'й':
        new_str = 'y'
        new_str = new_str + string[1:]
        string = new_str

    # if a string starts with 'ю' change it into 'yu'
    elif string[0] == 'ю':
        new_str = 'yu'
        new_str = new_str + string[1:]
        string = new_str

    # if a string starts with 'я' change it into 'ya'
    elif string[0] == 'я':
        new_str = 'ya'
        new_str = new_str + string[1:]
        string = new_str

    # if a string starts with 'ї' change it into 'yi'
    elif string[0] == 'ї':
        new_str = 'yi'
        new_str = new_str + string[1:]
        string = new_str

    # Transliterate the remaining characters that have not yet been transliterated
    output_str = ''.join(translit_dict[char] if char in translit_dict else char for char in string)
    
    # return the final string
    return output_str

# Sequence both functions into a single one
def transliterate_ukrainian(string):
    modified_str = find_ї_after_vowels(string)
    output_str = transliterate_the_rest(modified_str)
    return output_str

In [69]:
ukrainian_words = [
    'абетка-абетка',     # Test for 'ї' following a vowel
    'вино',       # Test for 'ї' following a vowel
    'гора',       # Test for 'ї' following a vowel
    'дім',        # Test for 'ї' following a vowel
    'єж',         # Test for 'ї' at the beginning of a word
    'згода',      # Test for 'зг' exception
    'зірка',      # Test for 'ї' following a vowel
    'ідея',       # Test for 'ї' following a vowel
    'їжа',        # Test for 'ї' following a vowel
    'йогурт',     # Test for 'ї' following a vowel
    'окно',       # Test for 'ї' following a vowel
    'пісок',      # Test for 'ї' following a vowel
    'усміх',      # Test for 'ї' following a vowel
    'футбол',     # Test for 'ї' following a vowel
    'хвиля',      # Test for 'ї' following a vowel
    'цирк',       # Test for 'ї' following a vowel
    'чайка',      # Test for 'ї' following a vowel
    'школа',      # Test for 'ї' following a vowel
    'щасливий',   # Test for 'ї' following a vowel
    'юнак',       # Test for 'ї' following a vowel
    'ялинка',     # Test for 'ї' following a vowel
    'єксперт'     # Test for 'є' exception
    'абеткаї',     # Test for 'ї' following a vowel in find_ї_after_vowels
    'виної',       # Test for 'ї' following a vowel in find_ї_after_vowels
    'гораї',       # Test for 'ї' following a vowel in find_ї_after_vowels
    'дімї',        # Test for 'ї' following a vowel in find_ї_after_vowels
    'єжї',         # Test for 'ї' at the beginning of a word in find_ї_after_vowels
    'згодаї',      # Test for 'зг' exception in transliterate_the_rest
    'зіркаї',      # Test for 'ї' following a vowel in find_ї_after_vowels
    'ідеяї',       # Test for 'ї' following a vowel in find_ї_after_vowels
    'їжаї',        # Test for 'ї' following a vowel in find_ї_after_vowels
    'йогуртї',     # Test for 'ї' following a vowel in find_ї_after_vowels
    'окної',       # Test for 'ї' following a vowel in find_ї_after_vowels
    'пісокї',      # Test for 'ї' following a vowel in find_ї_after_vowels
    'усміхї',      # Test for 'ї' following a vowel in find_ї_after_vowels
    'футболї',     # Test for 'ї' following a vowel in find_ї_after_vowels
    'хвиляї',      # Test for 'ї' following a vowel in find_ї_after_vowels
    'циркї',       # Test for 'ї' following a vowel in find_ї_after_vowels
    'чайкаї',      # Test for 'ї' following a vowel in find_ї_after_vowels
    'школаї',      # Test for 'ї' following a vowel in find_ї_after_vowels
    'щасливийї',   # Test for 'ї' following a vowel in find_ї_after_vowels
    'юнакї',       # Test for 'ї' following a vowel in find_ї_after_vowels
    'ялинкаї',     # Test for 'ї' following a vowel in find_ї_after_vowels
    ]

for x in ukrainian_words:
    print(x)
    result1 = find_ї_after_vowels(x)
    print(result1)
    # result2 = transliterate_the_rest(result1)
    # print(result2)
    result3 = transliterate_ukrainian(x)
    print(result3)

абетка-абетка
абетка-абетка
abetka-abetka
вино
вино
vyno
гора
гора
hora
дім
дім
dіm
єж
єж
yezh
згода
згода
zghoda
зірка
зірка
zіrka
ідея
ідея
іdeia
їжа
yiжа
yizha
йогурт
йогурт
yohurt
окно
окно
okno
пісок
пісок
pіsok
усміх
усміх
usmіkh
футбол
футбол
futbol
хвиля
хвиля
khvylia
цирк
цирк
tsyrk
чайка
чайка
chaika
школа
школа
shkola
щасливий
щасливий
shchaslyvyi
юнак
юнак
yunak
ялинка
ялинка
yalynka
єкспертабеткаї
єкспертабеткаyi
yekspertabetkayi
виної
виноyi
vynoyi
гораї
гораyi
horayi
дімї
дімї
dіmi
єжї
єжї
yezhi
згодаї
згодаyi
zghodayi
зіркаї
зіркаyi
zіrkayi
ідеяї
ідеяyi
іdeiayi
їжаї
yiжаyi
yizhayi
йогуртї
йогуртї
yohurti
окної
окноyi
oknoyi
пісокї
пісокї
pіsoki
усміхї
усміхї
usmіkhi
футболї
футболї
futboli
хвиляї
хвиляyi
khvyliayi
циркї
циркї
tsyrki
чайкаї
чайкаyi
chaikayi
школаї
школаyi
shkolayi
щасливийї
щасливийyi
shchaslyvyiyi
юнакї
юнакї
yunaki
ялинкаї
ялинкаyi
yalynkayi
