## 1 - Packages

- time
- [selenium](https://selenium-python.readthedocs.io)

In [None]:
import time
import os
from selenium import webdriver
from selenium.webdriver.common.by import By

## 2 - Constants

In [None]:
SITE_URL = 'http://www.ali-soft.com/portal/index.php'
SITE_NAME = 'ali-soft'
ALTR_LIST = 'ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوىي'
DIAC_LIST = ['َ', 'ً', 'ِ', 'ٍ', 'ُ', 'ٌ', 'ْ', 'ّ']
CLSS_LIST = ['َ', 'ً', 'ُ', 'ٌ', 'ِ', 'ٍ', 'ْ', 'ّ', 'َّ', 'ًّ', 'ُّ', 'ٌّ', 'ِّ', 'ٍّ']

DIAC_MAPPING = dict()
REV_DIAC_MAPPING = dict()
for idx, ch in enumerate(CLSS_LIST):
    DIAC_MAPPING[ch] = idx + 1
    REV_DIAC_MAPPING[idx + 1] = ch

## 3 - Helper functions

In [None]:
def remove_diacritics(content):
    return content.translate(str.maketrans('', '', ''.join(DIAC_LIST)))

## 4 - Get test data

In [None]:
file_path = '../test.txt'
lines = open(file_path).readlines()

In [None]:
print('Maximum line length:', max([len(line.split(' ')) for line in lines]), 'words')

## 5 - Initialize webdriver

Initialize new webdriver using selenium Chrome driver, optionally we can initialize it to start in `headless` mode so the browser GUI does not show up.

In [None]:
options = webdriver.ChromeOptions()
# options.add_argument('headless')
driver = webdriver.Chrome(options = options)

## 6 - Go to the website

In [None]:
driver.get(SITE_URL)

## 7 - Test the website

- Get the input text area
- Get the output text area
- Send each line to the site and wait for the diacritic version

In [None]:
driver.switch_to.frame(driver.find_element_by_id('modfram1_3'))
button = driver.find_element_by_css_selector('#mce_50 button')
driver.switch_to.frame(driver.find_element_by_id('content_ifr'))
content = driver.find_element_by_id('tinymce')
driver.execute_script("arguments[0].innerHTML=''", content)

In [None]:
result = list()
for line in lines:
    driver.execute_script("arguments[0].innerHTML=arguments[1]", content, remove_diacritics(line))
    driver.switch_to.parent_frame()
    button.click()
    driver.switch_to.frame(driver.find_element_by_id('content_ifr'))
    time.sleep(1)
    result.append(content.text)

## 6 - Evaluating

In [None]:
equal = 0
not_equal = 0
for (original, prediction) in zip(lines, result):
    print(original, prediction)
    
    Y_true = list()
    for idx, ch in enumerate(original):
        if ch not in ALTR_LIST:
            continue
            
        y_true = 0
        if idx + 1 < len(original) and original[idx + 1] in DIAC_LIST:
            cur_diac = original[idx + 1]
            if idx + 2 < len(original) and original[idx + 2] in DIAC_LIST and cur_diac + original[idx + 2] in DIAC_MAPPING:
                cur_diac += original[idx + 2]
            y_true = DIAC_MAPPING[cur_diac]
        
        Y_true.append(y_true)
    
    cur_y = 0
    for idx, ch in enumerate(prediction):
        if ch not in ALTR_LIST:
            continue
        
        y_true = 0
        if idx + 1 < len(prediction) and prediction[idx + 1] in DIAC_LIST:
            cur_diac = prediction[idx + 1]
            if idx + 2 < len(prediction) and prediction[idx + 2] in DIAC_LIST and cur_diac + prediction[idx + 2] in DIAC_MAPPING:
                cur_diac += prediction[idx + 2]
            y_true = DIAC_MAPPING[cur_diac]
        
        if Y_true[cur_y] == 0:
            cur_y += 1
            continue
            
        
        equal += (Y_true[cur_y] == y_true)
        not_equal += (Y_true[cur_y] != y_true)
        
        cur_y += 1
    
    break

print(equal, not_equal, equal / (equal + not_equal) * 100)

## 7 - Write the content

In [None]:
file_path = file_path.split(os.sep)
file_name = file_path[-1]
file_path[-1] = file_name
os.mkdir(os.path.join(os.sep.join(file_path[:-1]), SITE_NAME))
file_path = os.path.join(os.sep.join(file_path[:-1]), SITE_NAME, file_name)
print('Writing:', file_path)
with open(file_path, mode='w') as file_writer:
    file_writer.write(content)
print('Finished!')