In [None]:
import random
import pandas as pd
import hgtk

In [None]:
# File path definition for input and output
file_path = " "
output_path = " "

# Excel file reading into a DataFrame
df = pd.read_excel(file_path)

def decompose_and_shuffle(word):
    """
    Decomposition of a given Korean word into its initial, medial, and final consonants.
    Random shuffling of the decomposed characters.

    Args:
        word (str): Korean word for decomposition and shuffling.

    Returns:
        tuple:
            - String of decomposed characters joined by commas.
            - String of the same characters shuffled and joined by commas.
    """
    try:
        # Hangul decomposition into initial, medial, and final consonants with delimiter removal
        decomposed = hgtk.text.decompose(word).replace("ᴥ", "")

        # Conversion of decomposed characters into a list and shuffling
        decomposed_list = list(decomposed)
        random.shuffle(decomposed_list)

        # Creation of comma-separated strings for both original and shuffled decompositions
        shuffled_comma = ",".join(decomposed_list)
        return ",".join(decomposed), shuffled_comma
    except:
        # Return of the original word in case of an error
        return word, word

# Application of the decompose_and_shuffle function to the 'words' column
df['Combination'], df['Combination_random'] = zip(*df['words'].apply(decompose_and_shuffle))

# DataFrame export to an Excel file
df.to_excel(output_path, index=False)

# Success message output
print("success")

In [None]:
# Input and output file names
excel_file = " "
json_file = " "

# Load Excel file
df = pd.read_excel(excel_file)

# Filter data: Keep 'Beginner' as is, and randomly sample 1101 rows from 'Intermediate'
beginner_df = df[df['level'] == 'Beginner']  # Keep Beginner data as is
intermediate_df = df[df['level'] == 'Intermediate'].sample(n=1101, random_state=42)  # Randomly select 1101 from Intermediate

# Combine both datasets
final_df = pd.concat([beginner_df, intermediate_df], ignore_index=True)

# Convert to string to prevent JSON encoding issues
final_df = final_df.astype(str)

# Save to JSON (preserve Korean characters)
final_df.to_json(json_file, orient="records", force_ascii=False, indent=4)

print(f"Success: {json_file}")