In [None]:
import json
import pandas as pd

## 90

In [None]:
# Load json file
json_file_path = " "
df = pd.read_json(json_file_path,orient="records", encoding="utf-8")

# (1)Standard Korean Initial (Choseong)/ Medial (Jungseong) / Final (Jongseong) List
CHOSEONG_LIST = [
    'ㄱ','ㄲ','ㄴ','ㄷ','ㄸ','ㄹ','ㅁ','ㅂ','ㅃ','ㅅ','ㅆ',
    'ㅇ','ㅈ','ㅉ','ㅊ','ㅋ','ㅌ','ㅍ','ㅎ'
]
JUNGSEONG_LIST = [
    'ㅏ','ㅐ','ㅑ','ㅒ','ㅓ','ㅔ','ㅕ','ㅖ','ㅗ','ㅘ','ㅙ','ㅚ','ㅛ','ㅜ','ㅝ','ㅞ','ㅟ',
    'ㅠ','ㅡ','ㅢ','ㅣ'
]
JONGSEONG_LIST = [
    None,
    'ㄱ','ㄲ','ㄳ','ㄴ','ㄵ','ㄶ','ㄷ','ㄹ','ㄺ','ㄻ','ㄼ','ㄽ','ㄾ','ㄿ','ㅀ',
    'ㅁ','ㅂ','ㅄ','ㅅ','ㅆ','ㅇ','ㅈ','ㅊ','ㅋ','ㅌ','ㅍ','ㅎ'
]

# (2) Decomposing Syllables
def decompose_syllable(ch):
    """
    Decomposes a Korean syllable (Unicode range: 가~힣) and returns (Initial, Medial, Final).
    Returns (None, None, None) if out of range.
    """
    code = ord(ch)
    # Check Korean character range
    if code < ord('가') or code > ord('힣'):
        return (None, None, None)

    base = code - ord('가')
    jong = base % 28
    jung = ((base - jong) // 28) % 21
    cho = ((base - jong) // 28) // 21
    return (
        CHOSEONG_LIST[cho],
        JUNGSEONG_LIST[jung],
        JONGSEONG_LIST[jong]
    )


def decompose_word(word):
    """
    Splits a word (string) into a list of (Initial, Medial, Final) components.
    """
    result = []
    for ch in word:
        c, v, f = decompose_syllable(ch)
        if c is None and v is None and f is None:
            result.append((ch, None, None))
        else:
            result.append((c, v, f))
    return result

# (3) Substitution Map
consonant_map = {
    'ㄱ': 'r',
    'ㄴ': 'J',
    'ㄷ': 'U',
    'ㄹ': 'N',
    'ㅁ': 'ㅁ',
    'ㅂ': '고',
    'ㅅ': 'ㄷ',
    'ㅇ': 'ㅇ',
    'ㅈ': 'K',
    'ㅊ': 'lK',
    'ㅌ': 'W',
    'ㅍ': 'ㅒ',
    'ㅎ': 'ㅓㅇ',
    'ㄲ': 'F',
    'ㄸ': 'ㅂ',
    'ㅃ': '丑',
    'ㅆ': 'E'
}

vowel_map = {
    'ㅏ': 'ㅗ',
    'ㅗ': 'ㅓ',
    'ㅛ': 'ㅕ',
    'ㅜ': 'ㅏ',
    'ㅡ': 'ㅣ',
    'ㅣ': 'ㅡ',
    'ㅢ': '丁'
}

# (4) Substitution Function
def transform_word(word):
    """
    Returns the given word in its transformed form.
    """
    decomposed = decompose_word(word)
    pieces = []

    for (c, v, f) in decomposed:
        if c is None and v is None:
            pieces.append(c or '')
            continue

        # Special Handling: Transformation Based on Initial and Medial Combination
        if c == 'ㄴ' and v == 'ㅣ':
            replaced_syll = '그'
        elif c == 'ㄷ' and v == 'ㅣ':
            replaced_syll = 'ㅁ'
        elif c == 'ㅇ' and v == 'ㅏ':
            replaced_syll = 'ㅎ'
        else:
            replaced_c = consonant_map.get(c, c)
            replaced_v = vowel_map.get(v, v)
            replaced_syll = replaced_c + replaced_v

        # Final Consonant Handling: Substitute if a final consonant exists
        if f is not None:
            replaced_f = consonant_map.get(f, f)
            replaced_syll += replaced_f

        pieces.append(replaced_syll)

    final_str = ''.join(pieces)
    return final_str


# Apply Transformation Function
df["transformed"] = df["word"].apply(transform_word)
df.head(10)

# Save Transformed Data
output_json_path = " "
df.to_json(output_json_path, orient="records", force_ascii=False, indent=4)


## 180

In [None]:
# Load json file
json_file_path = " "
df = pd.read_json(json_file_path,orient="records", encoding="utf-8")

# (1)Standard Korean Initial (Choseong)/ Medial (Jungseong) / Final (Jongseong) List
CHOSEONG_LIST = [
    'ㄱ','ㄲ','ㄴ','ㄷ','ㄸ','ㄹ','ㅁ','ㅂ','ㅃ','ㅅ','ㅆ',
    'ㅇ','ㅈ','ㅉ','ㅊ','ㅋ','ㅌ','ㅍ','ㅎ'
]
JUNGSEONG_LIST = [
    'ㅏ','ㅐ','ㅑ','ㅒ','ㅓ','ㅔ','ㅕ','ㅖ','ㅗ','ㅘ','ㅙ','ㅚ','ㅛ','ㅜ','ㅝ','ㅞ','ㅟ',
    'ㅠ','ㅡ','ㅢ','ㅣ'
]
JONGSEONG_LIST = [
    None,'ㄱ','ㄲ','ㄳ','ㄴ','ㄵ','ㄶ','ㄷ','ㄹ','ㄺ','ㄻ','ㄼ','ㄽ','ㄾ','ㄿ','ㅀ',
    'ㅁ','ㅂ','ㅄ','ㅅ','ㅆ','ㅇ','ㅈ','ㅊ','ㅋ','ㅌ','ㅍ','ㅎ'
]

# (2) Decomposing Syllables
def decompose_syllable(ch):
    """
    Decomposes a Korean syllable (Unicode range: 가~힣) and returns (Initial, Medial, Final).
    If out of range, returns (ch, None, None).
    """
    code = ord(ch)
    # Check Korean character range
    if code < ord('가') or code > ord('힣'):
        return (ch, None, None)

    base = code - ord('가')
    jong = base % 28
    jung = ((base - jong) // 28) % 21
    cho = ((base - jong) // 28) // 21
    return (
        CHOSEONG_LIST[cho],
        JUNGSEONG_LIST[jung],
        JONGSEONG_LIST[jong]
    )

def decompose_word_into_syllables(word):
    """
    String -> Syllable units [(Initial, Medial), (Initial, Medial, Final), ...]
    Characters outside the range are processed as (ch, None, None).
    """
    result = []
    for ch in word:
        c, v, f = decompose_syllable(ch)
        if f is not None:
            result.append((c, v, f))
        elif v is not None:
            result.append((c, v))
        else:
            result.append((c,))
    return result


# (3) Substitution Map
consonant_map = {
    'ㄱ':'ㄴ',
    'ㄴ':'ㄱ',
    'ㄷ':'그',
    'ㄹ':'ㄹ',
    'ㅁ':'ㅁ',
    'ㅂ':'A',
    'ㅅ':'V',
    'ㅇ':'ㅇ',
    'ㅈ':'と',
    'ㅋ':'ヒ',
    'ㅌ':'彐',
    'ㅍ':'ㅍ',
    'ㅎ':'우',
    'ㄲ':'ㄴㄴ',
    'ㄸ':'그그',
    'ㅃ':'AA',
    'ㅆ':'W'
}

vowel_map = {
    'ㅏ': 'ㅓ',
    'ㅑ': 'ㅕ',
    'ㅓ': 'ㅏ',
    'ㅕ': 'ㅑ',
    'ㅗ': 'ㅜ',
    'ㅛ': 'ㅠ',
    'ㅜ': 'ㅗ',
    'ㅡ': 'ㅡ',
    'ㅣ': 'ㅣ',
    'ㅐ': 'ㅐ',
    'ㅒ': 'ㅒ',
    'ㅔ': 'ㅣㅏ',
    'ㅖ': 'ㅣㅑ'
}

# (Original Initial, Original Medial) -> Special Syllable
special_cv_map = {
    ("ㄱ","ㅡ"): "그",
    ("ㅇ","ㅡ"): "으",
    ("ㅂ","ㅡ"): "브"
}

# Special Syllable -> Separate Substitution
separate_map = {
    "그": "匸",
    "으": "ㆆ",
    "브": "Ā"
}


# (4) Substitution Function
def transform_syllable(tup):
    length = len(tup)
    if length == 3:
        c, v, f = tup
        if (c, v) in special_cv_map:
            special_syll = special_cv_map[(c, v)]
            final_syll = separate_map.get(special_syll, special_syll)

            f2 = consonant_map.get(f, f) if f else None

            if f2 is not None:
                return (final_syll, f2)
            else:
                return (final_syll,)

        else:
            c2 = consonant_map.get(c, c)
            v2 = vowel_map.get(v, v)
            f2 = consonant_map.get(f, f)
            return tuple(x for x in (c2, v2, f2) if x is not None)

    elif length == 2:
        c, v = tup
        if (c, v) in special_cv_map:
            special_syll = special_cv_map[(c, v)]
            final_syll = separate_map.get(special_syll, special_syll)
            return (final_syll,)

        else:
            c2 = consonant_map.get(c, c)
            v2 = vowel_map.get(v, v)
            return tuple(x for x in (c2, v2) if x is not None)

    else:
        (x,) = tup
        x2 = consonant_map.get(x, x)
        return (x2,)


# (5) Reversing & Reassembling
def reverse_tuple_and_reassemble(tup):
    """
    After reversing the tuple, if (Initial, Medial, Final) can be combined,
    they are synthesized; otherwise, they are concatenated as is.
    """
    rev = tup[::-1]
    length = len(rev)

    if length == 3:
        c, v, f = rev
        if (c in CHOSEONG_LIST) and (v in JUNGSEONG_LIST) and (f in JONGSEONG_LIST):
            cho_idx = CHOSEONG_LIST.index(c)
            jung_idx = JUNGSEONG_LIST.index(v)
            if f is None:
                jong_idx = 0
            else:
                jong_idx = JONGSEONG_LIST.index(f)
            code = 0xAC00 + (cho_idx * 21 * 28) + (jung_idx * 28) + jong_idx
            return chr(code)
        else:
            return "".join(rev)
    elif length == 2:
        c, v = rev
        if (c in CHOSEONG_LIST) and (v in JUNGSEONG_LIST):
            cho_idx = CHOSEONG_LIST.index(c)
            jung_idx = JUNGSEONG_LIST.index(v)
            code = 0xAC00 + (cho_idx * 21 * 28) + (jung_idx * 28)
            return chr(code)
        else:
            return "".join(rev)
    else:
        return rev[0]


# (6) Final Transformation Function
def transform_word_180(word, reverse_syllable_order=False):

    syllable_list = decompose_word_into_syllables(word)

    transformed_list = [transform_syllable(s) for s in syllable_list]

    if reverse_syllable_order:
        target_list = reversed(transformed_list)
    else:
        target_list = transformed_list

    result_parts = []
    for tup in target_list:
        part = reverse_tuple_and_reassemble(tup)
        result_parts.append(part)

    return "".join(result_parts)


# Apply Transformation Function
df["transformed"] = df["word"].apply(lambda w: transform_word_180(w, reverse_syllable_order=True))
df.head(10)

# Save Transformed Data
output_json_path = " "
df.to_json(output_json_path, orient="records", force_ascii=False, indent=4)

## 270

In [None]:
# Load json file
json_file_path = " "
df = pd.read_json(json_file_path,orient="records", encoding="utf-8")

# (1)Standard Korean Initial (Choseong)/ Medial (Jungseong) / Final (Jongseong) List
CHOSEONG_LIST = [
    'ㄱ','ㄲ','ㄴ','ㄷ','ㄸ','ㄹ','ㅁ','ㅂ','ㅃ','ㅅ','ㅆ',
    'ㅇ','ㅈ','ㅉ','ㅊ','ㅋ','ㅌ','ㅍ','ㅎ'
]
JUNGSEONG_LIST = [
    'ㅏ','ㅐ','ㅑ','ㅒ','ㅓ','ㅔ','ㅕ','ㅖ','ㅗ','ㅘ','ㅙ','ㅚ','ㅛ','ㅜ','ㅝ','ㅞ','ㅟ',
    'ㅠ','ㅡ','ㅢ','ㅣ'
]
JONGSEONG_LIST = [
    None,'ㄱ','ㄲ','ㄳ','ㄴ','ㄵ','ㄶ','ㄷ','ㄹ','ㄺ','ㄻ','ㄼ','ㄽ','ㄾ','ㄿ','ㅀ',
    'ㅁ','ㅂ','ㅄ','ㅅ','ㅆ','ㅇ','ㅈ','ㅊ','ㅋ','ㅌ','ㅍ','ㅎ'
]


# (2) Decomposing Syllables
def decompose_syllable(ch):
    """
    Decomposes a Korean syllable (Unicode range: 가~힣) and returns (Initial, Medial, Final).
    If out of range, returns (ch, None, None).
    """
    code = ord(ch)
    # Check Korean character range
    if code < ord('가') or code > ord('힣'):
        return (ch, None, None)

    base_code = code - ord('가')
    jong = base_code % 28
    jung = ((base_code - jong) // 28) % 21
    cho = ((base_code - jong) // 28) // 21
    return (
        CHOSEONG_LIST[cho],
        JUNGSEONG_LIST[jung],
        JONGSEONG_LIST[jong]
    )

def decompose_word(word):
    """
    String -> [(Initial, Medial, Final), (Initial, Medial, Final), ...].
    Characters outside the range are processed as (ch, None, None).
    """
    result = []
    for ch in word:
        c, v, f = decompose_syllable(ch)
        result.append((c, v, f))
    return result


# (3) Substitution Map
consonant_map = {
    'ㄱ': 'J',
    'ㄴ': 'r',
    'ㄷ': 'ㅅ',
    'ㄹ': 'N',
    'ㅁ': 'ㅁ',
    'ㅂ': 'ㄸ',
    'ㅅ': '>',
    'ㅇ': 'ㅇ',
    'ㅈ': '>ㅣ',
    'ㅊ': '>ㅏ',
    'ㅋ': 'JJ',
    'ㅌ': 'ㆬ',
    'ㅍ': 'ㅒ',
    'ㅎ': '아',
    'ㄸ': 'A',
    'ㅃ': 'EE',
    'ㅆ': '3'
}

vowel_map = {
    'ㅏ': 'ㅜ',
    'ㅑ': 'ㅠ',
    'ㅓ': 'ㅗ',
    'ㅕ': 'ㅛ',
    'ㅗ': 'ㅏ',
    'ㅛ': 'ㅑ',
    'ㅜ': 'ㅓ',
    'ㅣ': 'ㅡ',
    'ㅐ': 'ㅐ',
    'ㅔ': 'ㅔ',
    'ㅘ': 'ㅏㅜ',
    'ㅚ': '上',
    'ㅢ': '丄',
    'ㅡ': 'ㅣ'
}

# (Original Initial, Original Medial) -> Special Syllable
special_cv_map = {
    ("ㄷ","ㅐ"): "대",
    ("ㄴ","ㅔ"): "네",
    ("ㄱ","ㅘ"): "과",
    ("ㄷ","ㅘ"): "돠",
    ("ㅁ","ㅘ"): "뫄",
    ("ㅂ","ㅘ"): "봐",
    ("ㅇ","ㅘ"): "와",
}


# Special Syllable -> Separate Substitution
separate_map = {
    "대": "모",
    "네": "또",
    "과": "부",
    "돠": "ㅏ수",
    "뫄": "ㅏ무",
    "봐": "ㅏ뚜",
    "와": "ㅏ우"
}


# (4) Substitution Function
def handle_special_syllable(syll):
    if syll in separate_map:
        return separate_map[syll]
    else:
        return syll

def transform_syllable(c, v, f):
    """
    Substitutes the (Initial, Medial, Final) of a syllable and returns a list of characters (or a string).
    """

    if c is None and v is None and f is None:
        return [c]


    if (c, v) in special_cv_map:
        special_syll = special_cv_map[(c, v)]
        final_str = handle_special_syllable(special_syll)

        tail_list = []
        if f is not None:
            tail_ch = consonant_map.get(f, f)
            tail_list.append(tail_ch)
        return [final_str] + tail_list


    result = []

    if c is not None:
        c2 = consonant_map.get(c, c)
        result.append(c2)

    if v is not None:
        v2 = vowel_map.get(v, v)
        result.append(v2)

    if f is not None:
        f2 = consonant_map.get(f, f)
        result.append(f2)
    return result


# (5) Obtain a list of Jamo (characters) and reverse the order
def transform_word_to_list(word):

    decomposed = decompose_word(word)
    transformed_jamo_list = []
    for (c, v, f) in decomposed:
        chunk_list = transform_syllable(c, v, f)
        transformed_jamo_list.extend(chunk_list)

    reversed_list = transformed_jamo_list[::-1]
    return reversed_list


# (6) Reassemble "Vowel + Right-side Consonant"
EXCLUDE_VOWELS = {'ㅏ','ㅑ','ㅓ','ㅣ'}

def reassemble_jamos_vowel_and_right_consonant(jamo_list):

    result = []
    i = 0
    n = len(jamo_list)
    while i < n:
        cur = jamo_list[i]

        if cur in JUNGSEONG_LIST:
            if cur in EXCLUDE_VOWELS:
                result.append(cur)
            else:
                if (i+1) < n and jamo_list[i+1] in CHOSEONG_LIST:
                    vowel = cur
                    cho = jamo_list[i+1]
                    cho_idx = CHOSEONG_LIST.index(cho)
                    jung_idx = JUNGSEONG_LIST.index(vowel)
                    code = 0xAC00 + (cho_idx * 21 * 28) + (jung_idx * 28)
                    merged_syll = chr(code)

                    result.append(merged_syll)
                    i += 2
                    continue
                else:
                    result.append(cur)
        else:
            result.append(cur)

        i += 1

    return "".join(result)


# (6) Final Transformation Function
def transform_and_reassemble(word):
    jamo_list_reversed = transform_word_to_list(word)
    final_str = reassemble_jamos_vowel_and_right_consonant(jamo_list_reversed)
    return final_str


# Apply Transformation Function
df["transformed"] = df["word"].apply(transform_and_reassemble)
df.head(10)

# Save Transformed Data
output_json_path = " "
df.to_json(output_json_path, orient="records", force_ascii=False, indent=4)