In [1]:
import pandas as pd

# ==========================================
# 1. INPUT: Core Wordlist (87 Roots)
# ==========================================
core_wordlist = [
    {"root": "gamet", "meaning": "hand"}, {"root": "wiri", "meaning": "left"},
    {"root": "wanan", "meaning": "right"}, {"root": "bitih", "meaning": "leg/foot"},
    {"root": "daan", "meaning": "road/path"}, {"root": "tangoy", "meaning": "to swim"},
    {"root": "tapok", "meaning": "dust"}, {"root": "katat", "meaning": "skin"},
    {"root": "gorot", "meaning": "back"}, {"root": "tyan", "meaning": "belly"},
    {"root": "botol", "meaning": "bone"}, {"root": "agtay", "meaning": "liver"},
    {"root": "soso", "meaning": "breast"}, {"root": "abaya", "meaning": "shoulder"},
    {"root": "daya", "meaning": "blood"}, {"root": "olo", "meaning": "head"},
    {"root": "leey", "meaning": "neck"}, {"root": "sabot", "meaning": "hair"},
    {"root": "arong", "meaning": "nose"}, {"root": "angot", "meaning": "to sniff/smell"},
    {"root": "bebey", "meaning": "mouth"}, {"root": "ngipin", "meaning": "tooth"},
    {"root": "dila", "meaning": "tongue"}, {"root": "kalis", "meaning": "to laugh"},
    {"root": "akis", "meaning": "to cry"}, {"root": "soka", "meaning": "to vomit"},
    {"root": "kan", "meaning": "to eat"}, {"root": "inom", "meaning": "to drink"},
    {"root": "kayat", "meaning": "to bite"}, {"root": "sepsep", "meaning": "to suck"},
    {"root": "toly", "meaning": "ear"}, {"root": "ingar", "meaning": "to hear"},
    {"root": "mata", "meaning": "eye"}, {"root": "kit", "meaning": "to see"},
    {"root": "elek", "meaning": "to sleep"}, {"root": "taynep", "meaning": "to dream"},
    {"root": "tekre", "meaning": "to sit"}, {"root": "ideng", "meaning": "to stand"},
    {"root": "lalaki", "meaning": "man/male"}, {"root": "babayi", "meaning": "woman/female"},
    {"root": "anak", "meaning": "child"}, {"root": "ahawa", "meaning": "spouse"},
    {"root": "ina", "meaning": "mother"}, {"root": "tatay", "meaning": "father"},
    {"root": "bali", "meaning": "house"}, {"root": "atep", "meaning": "roof"},
    {"root": "ngaran", "meaning": "name"}, {"root": "robir", "meaning": "rope"},
    {"root": "tayi", "meaning": "to sew"}, {"root": "kadayem", "meaning": "needle"},
    {"root": "takaw", "meaning": "to steal"}, {"root": "pati", "meaning": "to kill"},
    {"root": "tadem", "meaning": "sharp"}, {"root": "obra", "meaning": "to work"},
    {"root": "tanem", "meaning": "to plant"}, {"root": "pili", "meaning": "to choose"},
    {"root": "pespes", "meaning": "to squeeze"}, {"root": "kotkot", "meaning": "to dig"},
    {"root": "haliw", "meaning": "to buy"}, {"root": "bantak", "meaning": "to throw"},
    {"root": "aso", "meaning": "dog"}, {"root": "manok", "meaning": "bird/chicken"},
    {"root": "salay", "meaning": "egg"}, {"root": "pakpak", "meaning": "wing"},
    {"root": "lompad", "meaning": "to fly"}, {"root": "ikoy", "meaning": "tail"},
    {"root": "olay", "meaning": "snake"}, {"root": "bolati", "meaning": "worm"},
    {"root": "gigang", "meaning": "spider"}, {"root": "kona", "meaning": "fish"},
    {"root": "yamot", "meaning": "root"}, {"root": "bonga", "meaning": "fruit"},
    {"root": "bato", "meaning": "stone"}, {"root": "boyangin", "meaning": "sand"},
    {"root": "ranom", "meaning": "water"}, {"root": "asin", "meaning": "salt"},
    {"root": "langit", "meaning": "sky"}, {"root": "bulan", "meaning": "moon"},
    {"root": "bitoen", "meaning": "star"}, {"root": "gonem", "meaning": "cloud"},
    {"root": "rapeg", "meaning": "rain"}, {"root": "kodor", "meaning": "thunder"},
    {"root": "kimat", "meaning": "lightning"}, {"root": "emot", "meaning": "warm"},
    {"root": "rayep", "meaning": "cold"}, {"root": "albet", "meaning": "wet"},
    {"root": "byat", "meaning": "heavy"}
]

# ==========================================
# 2. THE GENERATOR LOGIC (Rules A & B)
# ==========================================

def apply_forward_gemination(root):
    """
    Rule A: Simulates Assimilation/Gemination.
    Input: 'bato' -> Output: 'mammato'
    """
    first_char = root[0]

    if first_char in ['b', 'p']:
        # Bilabial: Replace with 'mm'
        return "mamm" + root[1:]
    elif first_char in ['t', 'd', 's']:
        # Alveolar: Replace with 'nn'
        return "mann" + root[1:]
    elif first_char in ['k']:
        # Velar: Replace with 'mang' (Standard nasal substitution)
        return "mang" + root[1:]
    else:
        # Default: Just add 'man' prefix
        return "man" + root

def apply_forward_reduction(root):
    """
    Rule B: Simulates Vowel Reduction (Syncope).
    Input: 'atep' -> Output: ('atpen', 'atpan')
    """
    vowels = "aeiou"
    vowel_indices = [i for i, char in enumerate(root) if char in vowels]

    # Logic: If root has >= 2 vowels, drop the last one (CVCVC -> CVCC)
    if len(vowel_indices) >= 2:
        target_idx = vowel_indices[-1]

        # Create reduced base: atep -> atp
        base_reduced = root[:target_idx] + root[target_idx+1:]

        return base_reduced + "en", base_reduced + "an"
    else:
        # Too short to reduce, return full forms
        return root + "en", root + "an"

# ==========================================
# 3. APPLY RULES TO DATASET
# ==========================================

augmented_data = []

for item in core_wordlist:
    root = item['root']
    meaning = item['meaning']

    # Apply Rule A (Gemination)
    gem_form = apply_forward_gemination(root)

    # Apply Rule B (Reduction)
    red_en, red_an = apply_forward_reduction(root)

    augmented_data.append({
        "Root": root,
        "Meaning": meaning,
        "Gen: Geminated (Rule A)": gem_form,
        "Gen: Reduced -en (Rule B)": red_en,
        "Gen: Reduced -an (Rule B)": red_an
    })

# ==========================================
# 4. VIEW RESULTS
# ==========================================

df = pd.DataFrame(augmented_data)

# Display options to show all columns
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

print(f"Successfully generated {len(df)} variants from {len(core_wordlist)} roots.")
display(df)

# Optional: Download to CSV
# df.to_csv("bolinao_generated_variants.csv", index=False)

Successfully generated 87 variants from 87 roots.


Unnamed: 0,Root,Meaning,Gen: Geminated (Rule A),Gen: Reduced -en (Rule B),Gen: Reduced -an (Rule B)
0,gamet,hand,mangamet,gamten,gamtan
1,wiri,left,manwiri,wiren,wiran
2,wanan,right,manwanan,wannen,wannan
3,bitih,leg/foot,mammitih,bithen,bithan
4,daan,road/path,mannaan,danen,danan
...,...,...,...,...,...
82,kimat,lightning,mangimat,kimten,kimtan
83,emot,warm,manemot,emten,emtan
84,rayep,cold,manrayep,raypen,raypan
85,albet,wet,manalbet,albten,albtan
