### Myanmar Open WordNet walkthrough

###### Thura Aung @ LU L

In [3]:
# Singapore owned Myanmar Open WordNet inspired by Princeton WordNet
# Maintainer: Wenjie WANG (王 文杰 wáng wén jié） < wwang5 + @ + ntu.edu.sg> Computational Linguistics Lab, Linguistics & Multilingual Studies, Nanyang Technological University
# under a Creative Commons Attribution 4.0 International License

In [2]:
# website: https://wordnet.burmese.sg/
# link: https://github.com/myanmaropenwordnet/mow

In [41]:
# format
!head -5 mmWORDNET.txt

## MOW 0.1.3	mya	CC BY 4.0
00001740-v	mya:lemma	ရှူ
00005815-v	mya:lemma	ချောင်းဆိုး
00007328-v	mya:lemma	သမ်း
00007846-n	mya:lemma	မနုဿ


In [1]:
# small dataset :(

In [63]:
def load_wordnet(file_path):
    wordnet = {}
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            parts = line.strip().split('\t')
            if len(parts) >= 3:
                synset_id = parts[0]
                pos_tag = parts[1]
                words = parts[2:]
                if synset_id not in wordnet:
                    wordnet[synset_id] = []
                wordnet[synset_id].extend(words)
    return wordnet

def find_synonyms(wordnet, word):
    synonyms = []
    for synset_id, words in wordnet.items():
        if word in words:
            synonyms.extend(words)
    # Not found OOV return itself
    if len(synonyms) == 0:
        synonyms.append(word)
    return synonyms

file_path = './mmWORDNET.txt'  
wordnet = load_wordnet(file_path)

word_to_find = 'ကရော်ကမည်'  
synonyms = find_synonyms(wordnet, word_to_find)
print(f"Synonyms for '{word_to_find}': {', '.join(synonyms)}")

Synonyms for 'ကရော်ကမည်': ကပြက်ကချော်, ကရော်ကမည်, ကရော်ကမည်, ကပြက်ကချော်, ပြက်တီးပြက်ချော်, ကပြက်ကချော်, ပေါ့ပေါ့တန်တန်, ပေါ့ရွှတ်ရွှတ်, ကရော်ကမည်


In [60]:
word_to_find = 'မြည်တွန်တောက်တီး'  
synonyms = find_synonyms(wordnet, word_to_find)
print(f"Synonyms for '{word_to_find}': {', '.join(synonyms)}")

Synonyms for 'မြည်တွန်တောက်တီး': တွတ်, မြည်တွန်တောက်တီး, မြည်, မြည်တွန်တောက်တီး, တွတ်, မြည်တွန်တောက်တီး


In [31]:
word_to_find = 'အမည်မှည့်'  
synonyms = find_synonyms(wordnet, word_to_find)
print(f"Synonyms for '{word_to_find}': {', '.join(synonyms)}")

Synonyms for 'အမည်မှည့်': မှည့်, ပညတ်, အမည်မှည့်, သမုတ်


In [32]:
!wc mmWORDNET.txt

    1476    4739   62541 mmWORDNET.txt


In [62]:
# no synonyms return itself
word_to_find = 'ရန်ဖြစ်'  
synonyms = find_synonyms(wordnet, word_to_find)
print(f"Synonyms for '{word_to_find}': {', '.join(synonyms)}")

Synonyms for 'ရန်ဖြစ်': ရန်ဖြစ်


In [34]:
# can find real synonyms
word_to_find = 'ဆင်တူ'  
synonyms = find_synonyms(wordnet, word_to_find)
print(f"Synonyms for '{word_to_find}': {', '.join(synonyms)}")

Synonyms for 'ဆင်တူ': တူ, ဆင်, ဆင်တူ, တူ, ဆင်, ဆင်တူ, တူ, ဆင်တူ, တူ, ဆင်, ဆင်တူ


In [42]:
# here are their ids
# same ids
# 02665282-v	mya:lemma	တူ
# 02665282-v	mya:lemma	ဆင်
# 02665282-v	mya:lemma	ဆင်တူ

In [37]:
# derived words not the same - little pig, pig and wild boar
words_to_find = ['ဝက်ကလေး', 'ဝက်', 'တောဝက်']
for word_to_find in words_to_find:
    synonyms = find_synonyms(wordnet, word_to_find)
    print(f"Synonyms for '{word_to_find}': {', '.join(synonyms)}")

Synonyms for 'ဝက်ကလေး': ဝက်ကလေး
Synonyms for 'ဝက်': ဝက်
Synonyms for 'တောဝက်': တောဝက်


In [40]:
# Not synonym but similar (help vs save)
words_to_find = ['မစ', 'ကယ်မ', 'ကယ်']
for word_to_find in words_to_find:
    synonyms = find_synonyms(wordnet, word_to_find)
    print(f"Synonyms for '{word_to_find}': {', '.join(synonyms)}")

Synonyms for 'မစ': မစ, မစ, မစ
Synonyms for 'ကယ်မ': ကယ်မ
Synonyms for 'ကယ်': ကယ်


In [64]:
# also added for oov 
# return itself
word_to_find = 'ဂေါ်'  
synonyms = find_synonyms(wordnet, word_to_find)
print(f"Synonyms for '{word_to_find}': {', '.join(synonyms)}")

Synonyms for 'ဂေါ်': ဂေါ်


In [67]:
def load_wordnet(file_path):
    wordnet = {}
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            parts = line.strip().split('\t')
            if len(parts) >= 3:
                synset_id = parts[0]
                pos_tag = parts[1]
                words = parts[2:]
                if synset_id not in wordnet:
                    wordnet[synset_id] = {'lemma': [], 'antonym': []}
                
                if pos_tag == 'mya:lemma':
                    wordnet[synset_id]['lemma'].extend(words)
                elif pos_tag == 'antonym':
                    wordnet[synset_id]['antonym'].extend(words)
    return wordnet

def find_antonyms(wordnet, word):
    antonyms = []
    for synset_id, info in wordnet.items():
        if word in info['lemma']:
            antonyms.extend(info['antonym'])
    # Not found OOV return itself
    if len(antonyms) == 0:
        antonyms.append(word)
    return antonyms

file_path = './mmWORDNET.txt'  
wordnet = load_wordnet(file_path)

word_to_find = 'ရှူ' 
antonyms = find_antonyms(wordnet, word_to_find)
print(f"Antonyms for '{word_to_find}': {', '.join(antonyms)}")

Antonyms for 'ရှူ': ရှူ


In [69]:
# can find real antonyms
word_to_find = 'ဆင်တူ'  
synonyms = find_antonyms(wordnet, word_to_find)
print(f"Synonyms for '{word_to_find}': {', '.join(synonyms)}")

Synonyms for 'ဆင်တူ': ဆင်တူ


In [70]:
word_to_find = 'မြည်တွန်တောက်တီး'  
synonyms = find_synonyms(wordnet, word_to_find)
print(f"Synonyms for '{word_to_find}': {', '.join(synonyms)}")

Synonyms for 'မြည်တွန်တောက်တီး': မြည်တွန်တောက်တီး


In [None]:
# No antonym ?
# Yes there is no annotation for antonym in our wordnet file !

grep "antonym" mmWORDNET.txt