# Koiwriter
Writes English word to Tsevhu (Koilang) and then Koiwrit

### Imports

In [24]:
import json
import pandas as pd
from pathlib import Path
from typing import List, Union

## Read the excel sheet with the dictionary

In [25]:
dictionary_pd = pd.read_excel(Path("koilang.xlsx"), sheet_name="Dictionary", usecols="A:E")
dictionary_pd

Unnamed: 0,Word,Pronunciation,POS,Meaning,Verb Class
0,'eujkae,ʔœʒke,v,rot away,4
1,'eujvha,ʔœʒβɑ,v,rot,3
2,'iis'en,ʔɪsʔɛn,v,ripple; reflect light (often from water); spre...,1
3,aekak,ekɑk,v,land on; slam/smack into; crash; hit hard enou...,4
4,aekkae,ekke,v,smack; hit hard enough to injure someone,4
...,...,...,...,...,...
2521,yrieun,əɾiœn,adj,loyal; faithful,
2522,yue,juɛ,adj,dry,
2523,yul,jul,adj,early,
2524,yuvi,juvi,adj,dark grey (like rain clouds),color


## Choose a translation

In [26]:
english_to_koilang = True
word_to_translate = "sun"

In [27]:
results = None
if english_to_koilang:
    results = dictionary_pd.loc[dictionary_pd['Meaning'].str.contains(word_to_translate)]
else:
    # Saved for future where reverse translation is done
    results = dictionary_pd.loc[dictionary_pd['Word'].str.contains(word_to_translate)]

results.set_index("Word", inplace=True)
results.reset_index(inplace=True)
results.index += 1
num_of_results = len(results.index)
chosen_translation = None

if num_of_results == 0:
    print("No translations found.")

elif num_of_results == 1:
    print("Translation:")
    chosen_translation = results.iloc[[0]]
    
elif num_of_results > 1:
    print("Multiple options exist. Please choose one:")
    print(results, end="\n\n")
    
    # Allow for a choice
    valid_choice = False
    
    while not valid_choice:
        choice = input("Choice (1, 2, 3, ...): ")
        try:
            if choice.isdigit() and int(choice) in range(1, int(len(results.index)) + 1):
                choice = int(choice)
                chosen_translation = results.iloc[[choice - 1]]
                valid_choice = True
        except:
            print(f"Input is not a number or it is not in range {choice}")
    
    print(f"Chosen translation:")

print(chosen_translation)
        
    

Multiple options exist. Please choose one:
         Word Pronunciation POS  \
1     aesiivh          esɪβ   v   
2   kombaekae       kombeke   v   
3         nak           nɑk   v   
4     li'enak       liʔɛnɑk   n   
5       Oitje           NaN   n   
6        soem          soɛm   n   
7     soemrha       soɛmr̥ɑ   n   
8     soemsii        soɛmsɪ   n   
9    soemsyun      soɛmsʲun   n   
10       syun          sjun   n   

                                              Meaning Verb Class  
1   sink; drown; go below; slip under; under(groun...          3  
2   divide apart; split apart; divide asunder (imp...          4  
3                        sunbathe; sit out in the sun          1  
4   lizard (lit. sunbathing shield (shield often r...     animal  
5   earth deity (connected to a myth of the sun in...        NaN  
6                                                 sun        NaN  
7                      sunrise (can shorten to sorha)        NaN  
8                       sunset (can

## Separate word into writeable parts

In [28]:
def rules_for_tokens(word: str) -> List[str]:
    """All the words in Koilang only have at most two characters to represent a ripple, 
    and it's either a vowel or "h", so separating a word into tokens that can map to
    the ripples is very straightforward. If the language evolves with more complex rules,
    I recommend switching to using a parser like PEST or something similar.

    Args:
        word (str): Word to tokenize

    Returns:
        List[str]: List of tokens as strings
    """
    def both_chars_go_together(char_before: str, char_now: str) -> bool:
        # CONSONANTS
        # Ending in 'h'
        if char_now == 'h' and char_before in ['c', 'k', 'p', 's', 't', 'v', ]:
            return True
        
        # VOWELS
        # Ending in 'e'
        if char_now == 'e' and char_before in ['a']:
            return True
        
        # Ending in 'i'
        if char_now == 'i' and char_before in ['a', 'i', 'o']:
            return True
        
        # Ending in 'e'
        if char_now == 'u' and char_before in ['a', 'e']:
            return True

        # None of the previous conditions where met
        return False
    word = word.strip().lower()
    
    tokens = []
    
    # Start at second char
    i = 1
    word_length = len(word)
    last_char_index = word_length - 1
    while i < word_length:
        
        char_before = word[i-1]
        char_now = word[i]
        
        # Add both chars as one token or just add char_before
        if both_chars_go_together(char_before, char_now):
            tokens.append(f'{char_before}{char_now}')
            
            if i + 1 == last_char_index:  # If only one char left after char_now
                tokens.append(word[i+1])  # Just add that last char
                break  # End tokenizing
            else:
                i += 1  # Skip char_now becoming char_before in the next iteration
            
        else:
            tokens.append(char_before)  # Add char normally
            if i == last_char_index:  # If char_now is the last char in the word
                tokens.append(char_now)
        
        i += 1
    
    return tokens

In [29]:
tokens = rules_for_tokens(chosen_translation["Word"].values[0])
# tokens = rules_for_tokens("Tsevhu")
for i, t in enumerate(tokens):
    if t == "'":
        tokens[i] = "`"
tokens

['ae', 's', 'ii', 'vh']

## Determine the best orientation for each ripple

In [30]:
# Get density data
with open(Path("ripples/density_data.json")) as f:
    density_data = json.loads(f.read())

In [31]:
translate_orientation = {
    "0": {
        "N": "N",
        "NE": "NE",
        "E": "E",
        "SE": "SE",
        "S": "S",
        "SW": "SW",
        "W": "W",
        "NW": "NW"
    },
    "1": {
        "N": "E",
        "NE": "SE",
        "E": "S",
        "SE": "SW",
        "S": "W",
        "SW": "NW",
        "W": "N",
        "NW": "NE"
    },
    "2": {
        "N": "S",
        "NE": "SW",
        "E": "W",
        "SE": "NW",
        "S": "N",
        "SW": "NE",
        "W": "E",
        "NW": "SE"
    },
    "3": {
        "N": "W",
        "NE": "NW",
        "E": "N",
        "SE": "NE",
        "S": "E",
        "SW": "SE",
        "W": "S",
        "NW": "SW"
    },
}

# Choose orientations based on densities
orientations = []  
previous_dense_above_dir = None
previous_quarters = None

for i, token in enumerate(tokens):
    if i == 0:
        # Only need to record dense above direction as is. The first token is never rotated.
        previous_dense_above_dir: List[str] = density_data[token]['dense_above_dir']
        previous_quarters: int = density_data[token]['quarters']
        orientations.append(0)
        continue
    
    # Have a temporary orientation which starts where the previous ripple ended
    temp_orientation = (orientations[i-1] + previous_quarters) % 4
    
    # CHOOSE ORIENTATION BASE ON DENSITY
    # Where it is dense below, it must not coincide with where the previous one is dense above
    dense_below_dir: List[str] = density_data[token]['dense_below_dir']
    chosen_orientation = 0
    for orientation in range(4):
        temp_dense_below_dir = [translate_orientation[str((orientation + temp_orientation) % 4)][d] for d in dense_below_dir]
        
        # Check intersection. Assumes orientation choices have been taken into consideration
        common_dirs = set(temp_dense_below_dir).intersection(set(previous_dense_above_dir))
        if not common_dirs:
            # No clash, valid orientation found
            chosen_orientation = (orientation + temp_orientation) % 4
            break
    
    orientations.append(chosen_orientation)
    
    # Keep information to help next token orientation
    previous_dense_above_dir = [translate_orientation[str(chosen_orientation)][d] for d in density_data[token]['dense_above_dir']]
    previous_quarters: int = density_data[token]['quarters']

orientations
    
    

[0, 1, 2, 3]

## Draw the ripples

In [32]:
from xml.dom.minidom import parse

# Create the output file
with open(Path("output.svg"), 'w') as output_f:
    view_box_val = 500
    view_box_val2 = 500
    output_f.write(f'<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="{view_box_val}" height="{view_box_val}" viewBox="0 0 {view_box_val2} {view_box_val2}">\n')
    
    # Obtain the SVG files for the tokens and add it to the image
    num_tokens = len(tokens)
    for i, token in enumerate(tokens):
        doc = parse(str(Path(f"ripples/images/{token}.svg")))
        scale_value = 0.65 ** (num_tokens - (i + 1))
        
        output_f.write(f'\t<g id="{token}_{i}" transform-origin="250 250" transform="scale({scale_value}) rotate({90 * orientations[i]})">\n')
        for child_elem in doc.getElementsByTagName("path"):
            output_f.write(f'\t\t{child_elem.toprettyxml()}\n')
        output_f.write(f'\t</g>\n')
    
    output_f.write('</svg>\n')