In [None]:
import pandas as pd

df = pd.read_csv("Bolinao_Lexicon.csv", encoding='latin-1')
print("Dataset loaded. Total words:", len(df))

def undo_botolan_rules(word: str):
    """
    Applies morphological reversal based on Botolan Sambal grammar
    (Antworth, 1979) to identify potential roots.
    """
    candidates = []

    if not isinstance(word, str):
        return []

    word = word.lower().strip()

    # --- RULE 1: The "Amp-" Imperfective (Progressive) --
    # Reference: Antworth (1979), p. 27
    if word.startswith("ampag"):
        candidates.append(word[5:])       # ampaglinis -> linis
    elif word.startswith("ampaka"):
        candidates.append(word[6:])       # ampakakanta -> kanta
    elif word.startswith("ampaki"):
        candidates.append(word[6:])       # ampakilako -> lako
    elif word.startswith("ampang"):
        # Could be ampang + root OR nasal assimilation (k/g/ng)
        base = word[6:]
        candidates.append(base)           # ampanggawa -> gawa
        candidates.append("k" + base)     # assimilation: ng -> k
        candidates.append("g" + base)     # assimilation: ng -> g
    elif word.startswith("amp"):
        # Assimilation to bilabials (p/b)
        base = word[3:]
        candidates.append("p" + base)     # ampahok -> pahok
        candidates.append("b" + base)     # ambato -> bato
        candidates.append(base)
    elif word.startswith("an"):
        # Assimilation to dental/alveolar or general prefix
        candidates.append(word[2:])       # anlomateng -> lomateng

    # --- RULE 2: The "Ni-" Metathesis (Perfective) ---
    # "-in- metathesizes to ni- before l and y"
    # Reference: Antworth (1979), p. 26
    if word.startswith("ni"):
        root_candidate = word[2:]
        # Strict Botolan Rule: This usually applies to 'l' and 'y' roots
        if root_candidate.startswith(("l", "y")):
            candidates.append(root_candidate)
            # Check for suffixes often paired with aspect change (-an/-en)
            if root_candidate.endswith("an"):
                candidates.append(root_candidate[:-2]) # nilinisan -> linis
            if root_candidate.endswith("en"):
                candidates.append(root_candidate[:-2])

    # --- RULE 3: Pluralized Verbs (Mipa-) ---
    # Reference: Antworth (1979), p. 25
    if word.startswith("mipag"):
        candidates.append(word[5:])       # mipagalih -> alih
    elif word.startswith("mipang"):
        base = word[6:]
        candidates.append(base)
        candidates.append("k" + base)

    # --- RULE 4: Social/Reciprocal (Maki-, Mi-) ---
    # Reference: Antworth (1979), p. 23-24
    if word.startswith("maki"):
        candidates.append(word[4:])       # makilako -> lako
    elif word.startswith("mi"):
        candidates.append(word[2:])       # mitapon -> tapon

        # Handle reduplication in reciprocals (mitatapon -> tapon)
        remainder = word[2:]
        if len(remainder) >= 4:
            if remainder[0] == remainder[2] and remainder[1] == remainder[3]:
                 candidates.append(remainder[2:])

    return list(set(candidates))

confirmed_botolan_matches = []

for idx, row in df.iterrows():
    word = row['word']

    if pd.isna(word): continue
    meaning = row.get('meaning_english', '')
    upos = row.get('upos', '')
    roots = undo_botolan_rules(word)

    for r in roots:
        match = df[df['word'].str.strip().str.lower() == r.strip().lower()]

        if not match.empty:
            record = {
                "botolan_rule_applied": "Yes",
                "original_word": word,
                "original_meaning": meaning,
                "predicted_root": r,
                "root_meaning": "; ".join(match['meaning_english'].dropna().unique()),
                "match_count": len(match)
            }
            confirmed_botolan_matches.append(record)

botolan_df = pd.DataFrame(confirmed_botolan_matches)
print(f"Total Botolan-Rule Matches Found: {len(botolan_df)}")
if not botolan_df.empty:
    print(botolan_df.head(10))
botolan_df.to_csv("botolan_rules_compatibility_test.csv", index=False)

Dataset loaded. Total words: 20307
Total Botolan-Rule Matches Found: 510
  botolan_rule_applied original_word  \
0                  Yes       ampaya'   
1                  Yes       ampaya'   
2                  Yes       anem'em   
3                  Yes         anina   
4                  Yes     anrarawan   
5                  Yes        ansain   
6                  Yes      ansarain   
7                  Yes    ansaraytaw   
8                  Yes     ansarayti   
9                  Yes      ansaytaw   

                                    original_meaning predicted_root  \
0                Protective care, upholding someone.          paya'   
1                Protective care, upholding someone.          baya'   
2  The degree of warmness that is the same as the...          em'em   
3  An expression of an unmeasurable thing such as...            ina   
4                  A long necked and tail dragonfly.        rarawan   
5  Identifies a specific grouping of things close...        