Skip to content

Commit

Permalink
Add pythainlp.util.morse
Browse files Browse the repository at this point in the history
  • Loading branch information
wannaphong committed Dec 12, 2023
1 parent 3b6daf0 commit 2899ad8
Show file tree
Hide file tree
Showing 3 changed files with 228 additions and 0 deletions.
10 changes: 10 additions & 0 deletions docs/api/util.rst
Original file line number Diff line number Diff line change
Expand Up @@ -277,3 +277,13 @@ Modules
:members:

The `Trie` class is a data structure for efficient dictionary operations. It's a valuable resource for managing and searching word lists and dictionaries in a structured and efficient manner.

.. autofunction:: pythainlp.util.morse.morse_encode
:noindex:

The `pythainlp.util.morse.morse_encode` function is convert text to Morse code.

.. autofunction:: pythainlp.util.morse.morse_decode
:noindex:

The `pythainlp.util.morse.morse_decode` function is convert Morse code to text.
209 changes: 209 additions & 0 deletions pythainlp/util/morse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project
# SPDX-License-Identifier: Apache-2.0
THAI_MORSE_CODE = {
'ก': '--.',
'ข': '-.-.',
'ค': '-.-',
'ฆ': '-.-',
'ง': '-.--.',
'จ': '-..-.',
'ฉ': '----',
'ช': '-..-',
'ฌ':'-..-',
'ซ': '--..',
'ญ': '.---',
'ด': '-..',
'ถ': '-.-..',
'ฐ': '-.-..',
'ฑ': '-..--',
'ฒ': '-..--',
'ท': '-..--',
'ธ': '-..--',
'ณ': '-.',
'น': '-.',
'บ': '-...',
'ป': '.--.',
'ผ': '--.-',
'ฝ': '-.-.-',
'พ': '.--..',
'ภ': '.--..',
'ฟ': '..-.',
'ม': '--',
'ย': '-.--',
'ร': '.-.',
'ล': '.-..',
'ฬ': '.-..',
'ว': '.--',
'ศ': '...',
'ษ': '...',
'ส': '...',
'ห': '....',
'อ': '-...-',
'ฮ': '--.--',
'ฎ': '-..',
'ต': '-',
'ฏ': '-',
'ฤ': '.-.--',
'่': '..-',
'้': '...-',
'๊': '--...',
'๋': '.-.-.',
'ั': '.--.-',
'็': '---..',
'์': '--..-',
'ั้': '.---.',
'ฯ': '--.-.',
'ฯลฯ': '---.-',
'ๆ': '---.-',
'ะ': '.-...',
'า': '.-',
'ิ': '..-..',
'ี': '..',
'ึ': '..--.',
'ื': '..--',
'ุ': '..-.-',
'ู': '---.',
'เ': '.',
'แ': '.-.-',
'โ': '---',
'ไ': '.-..-',
'ใ': '.-..-',
'ำ': '...-.',
'อ': '-...-'
}
ENGLISH_MORSE_CODE = {
'A': '.-',
'B': '-...',
'C': '-.-.',
'D': '-..',
'E': '.',
'F': '..-.',
'G': '--.',
'H': '....',
'I': '..',
'J': '.---',
'K': '-.-',
'L': '.-..',
'M': '--',
'N': '-.',
'O': '---',
'P': '.--.',
'Q': '--.-',
'R': '.-.',
'S': '...',
'T': '-',
'U': '..-',
'V': '...-',
'W': '.--',
'X': '-..-',
'Y': '-.--',
'Z': '--..',
'0': '-----',
',': '--..--',
'1': '.----',
'.': '.-.-.-',
'2': '..---',
'?': '..--..',
'3': '...--',
';': '-.-.-.',
'4': '....-',
':': '---...',
'5': '.....',
"'": '.----.',
'6': '-....',
'-': '-....-',
'7': '--...',
'/': '-..-.',
'8': '---..',
'(': '-.--.-'
}

decodingeng = {} #สร้าง Dictionary สำหรับใช้ถอดรหัสมอร์สภาษาอังกฤษ
for key, val in ENGLISH_MORSE_CODE.items():
decodingeng[val] = key
decodingthai = {}
for key, val in THAI_MORSE_CODE.items():
decodingthai[val.replace(" ","")] = key
for key, val in THAI_MORSE_CODE.items():
THAI_MORSE_CODE[key] = val.replace(" ","")


def morse_encode(text: str, lang: str="th") -> str:
"""
Convert text to Morse code (support Thai and English)
:param str text: Text
:param str lang: Language Code (*th* is Thai and *en* is English)
:return: Morse code
:rtype: str
:Example:
::
from pythainlp.util.morse import morse_encode
print(morse_encode("แมว", lang="th"))
# output: .-.- -- .--
print(morse_encode("cat", lang="en"))
# output: -.-. .- -
"""
if lang == "th": # Thai
return ' '.join(
map(
lambda x,
g=THAI_MORSE_CODE.get: g(x, ' '),
text.upper()
)
)
elif lang == "en": # English
return ' '.join(
map(
lambda x,
g=ENGLISH_MORSE_CODE.get: g(x, ' '),
text.upper()
)
)
else:
raise NotImplementedError(f"This function doesn't support {lang}.")


def morse_decode(morse_text: str, lang: str="th") -> str:
"""
Simple Convert Morse code to text
Thai still have some wrong character problem that\
can fix by spell corrector.
:param str morse_text: Morse code
:param str lang: Language Code (*th* is Thai and *en* is English)
:return: Text
:rtype: str
:Example:
::
from pythainlp.util.morse import morse_decode
print(morse_decode(".-.- -- .--", lang="th"))
# output: แมว
print(morse_decode("-.-. .- -", lang="en"))
# output: CAT
"""
if lang == "th":
ans = ''.join(
map(
lambda x,
g=decodingthai.get: g(x, ''),
morse_text.split(' '))
)
return ''.join(ans.split())
elif lang == "en":
ans = ''.join(
map(
lambda x,
g=decodingeng.get: g(x, ' '),
morse_text.split(' ')
)
)
return ' '.join(ans.split())
else:
raise NotImplementedError(f"This function doesn't support {lang}.")
9 changes: 9 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
remove_trailing_repeat_consonants,
)
from pythainlp.util.spell_words import spell_word
from pythainlp.util.morse import morse_encode, morse_decode


class TestUtilPackage(unittest.TestCase):
Expand Down Expand Up @@ -835,5 +836,13 @@ def test_remove_repeat_consonants(self):
"อืมมม คุณมีบุคลิกที่เริ่ด ฉันจะให้เกรดดีกับคุณ\nนี่เป็นความลับ",
)

def test_morse_encode(self):
self.assertEqual(morse_encode("แมว", lang="th"), ".-.- -- .--")
self.assertEqual(morse_encode("cat", lang="en"), "-.-. .- -")

def test_morse_decode(self):
self.assertEqual(morse_decode(".-.- -- .--", lang="th"), "แมว")
self.assertEqual(morse_decode("-.-. .- -", lang="en"), "CAT")

# def test_abbreviation_to_full_text(self):
# self.assertIsInstance(abbreviation_to_full_text("รร.ของเราน่าอยู่", list))

0 comments on commit 2899ad8

Please sign in to comment.