# Parse LaTeX file and convert into DeepL-friendly format

## Input LaTeX source from file and parse

In [1]:
from pylatexenc.latexwalker import LatexWalker, LatexCharsNode, LatexEnvironmentNode, LatexMacroNode, LatexSpecialsNode
import re

input_file = input()
with open(input_file) as f:
    latex = f.read()
w = LatexWalker(latex)
(nodelist, pos, len_) = w.get_latex_nodes(pos=0)

 /Users/SoChigusa/works/Latex2DeepL/test/test.tex


## Look for \begin{document} ... \end{document} environment

In [3]:
# nodelist = nodelist[-1].nodelist[-1].nodelist[-1].nodelist[-1].nodelist[-1].nodelist[-1].nodelist[-1].nodelist[-1].nodelist[-1].nodelist[-1].nodelist[-1].nodelist ##################################################
env = [n for n in nodelist if n.isNodeType(LatexEnvironmentNode)]
doc = [e for e in env if e.environmentname=='document']
if len(doc)==1:
    doc = doc[0]
else:
    print('Unexpected format with more than one document environment')

## Look for successive LaTeX special expressions and replace them by the format P(pos)L(len)

In [4]:
format8 = '{:008}'
def replaceSpecial(node):
    if node.isNodeType(LatexCharsNode):
        s = node.chars
    else:
        s = ' P'+format8.format(node.pos)+'L'+format8.format(node.len)+' '
    return s

In [5]:
def replaceSuccessiveTags(match):
    n_space = len(match.group(3))
    return 'P'+match.group(1)+'L'+format8.format(int(match.group(2))+int(match.group(4))+n_space)

In [6]:
dnl = doc.nodelist
str_list = [replaceSpecial(n) for n in dnl]
latex_rep = ''.join(str_list)
while re.search('P(\d{8})L(\d{8}) ([ \n]+) P\d{8}L(\d{8})', latex_rep) is not None:
    latex_rep = re.sub('P(\d{8})L(\d{8}) ([ \n]+) P\d{8}L(\d{8})', replaceSuccessiveTags, latex_rep)

## Preprocess the string and translate using DeepL

In [7]:
paras = re.split('\n\n', latex_rep)
paras = [p.replace('\n', ' ') for p in paras]

In [14]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import time
import pyperclip as ppc

options = Options()
options.add_argument('--disable-gpu')
options.add_argument('--disable-extensions')
options.add_argument('--proxy-server="direct://"')
options.add_argument('--proxy-bypass-list=*')
options.add_argument('--start-maximized')

DRIVER_PATH = '/Users/SoChigusa/works/Latex2DeepL/bin/chromedriver'
driver = webdriver.Chrome(executable_path=DRIVER_PATH, options=options)

load_url = 'https://www.deepl.com/ja/translator#en/ja'
driver.get(load_url)

clipboard = ppc.paste()
stextarea = driver.find_element_by_css_selector(
    '.lmt__textarea.lmt__source_textarea.lmt__textarea_base_style')
ttextarea = driver.find_element_by_css_selector(
    '.lmt__textarea.lmt__target_textarea.lmt__textarea_base_style')

In [15]:
def translateParagraph(par):
    if par == '' or par == '\n':
        return par
    ppc.copy(par)
    stextarea.send_keys(Keys.COMMAND, 'v')
    translated_text = ''
    while not translated_text:
        time.sleep(1)
        translated_text = ttextarea.get_property('value')
    stextarea.send_keys(Keys.COMMAND, 'a')
    stextarea.send_keys(Keys.BACKSPACE)
    return translated_text

In [13]:
paras_JP = [translateParagraph(par) for par in paras]
latex_JP = '\n\n'.join(paras_JP)

NoSuchWindowException: Message: no such window: window was already closed
  (Session info: chrome=90.0.4430.93)


In [168]:
ppc.copy(clipboard)
driver.quit()

## Debug: comparing original tags and "translated" ones

In [169]:
def posl2tag(posl):
    return 'P'+posl[0]+'L'+posl[1]

In [170]:
pattern = re.compile('P(\d{8})L(\d{8})')
tag0 = pattern.findall(latex_rep)
tag1 = pattern.findall(latex_JP) + re.compile('p(\d{8})l(\d{8})').findall(latex_JP)
tag_miss = list((set(tag0) ^ set(tag1)) & set(tag0))
tag_miss = [posl2tag(t) for t in tag_miss]
(tag_miss)

['P00025060L00000045',
 'P00010787L00000036',
 'P00012554L00000190',
 'P00024973L00000007',
 'P00024985L00000007',
 'P00025010L00000045']

In [184]:
target = [p for p in paras if tag_miss[2] in p]
paras.index(target[0])

8

In [185]:
paras[8]

'The dominant contribution to  P00012392L00000011  typically comes from the wino loops. In the limit that all SUSY masses are equal and neglecting the threshold corrections, the above expressions give  P00012554L00000190 As it is evident from the above equation, even for large values of  P00012811L00000021 , the typical mass scale of the involved supersymmetric particles (sleptons and electroweakinos) is below 1 P00012939L00000002 TeV. This is confirmed by our numerical results in Figure  P00012999L00000014  (see the blue and purple shaded regions). The fact that an explanation of  P00013088L00000011  prefers a light spectrum of sleptons and electroweakinos has been re-emphasized recently in several studies of the MSSM P00013219L00000249  and of MSSM extensions P00013491L00000045 . It is possible to accommodate the preferred value for  P00013592L00000014  for a somewhat heavier spectrum ( P00013640L00000026  TeV) in corners of parameter space with either a very large  P00013727L0000000

In [186]:
paras_JP[8]

'P00012392L00000011には、通常、ウィノループからの寄与が大きい。上の式から明らかなように，P00012811L00000021の値が大きい場合でも，関係する超対称粒子（スリープトンとエレクトロウィーキーノ）の典型的な質量スケールは1 P00012939L00000002 TeV以下です。これは、図P00012999L00000014（青と紫の斜線部分を参照）の数値計算結果によって確認されています。P00013088L00000011の説明は、スリープトンとエレクトロウェアキノスの軽いスペクトルを好むという事実は、最近、MSSM P00013219L00000249やMSSM拡張P00013491L00000045のいくつかの研究で再強調されています。P00013592L00000014の望ましい値は、非常に大きなP00013727L00000005項P00013737L00000032か、非常に大きなP00013799L00000011を持つパラメータ空間の隅で、やや重いスペクトル( P00013640L00000026 TeV)に対応することが可能です。しかし、P00013841L00000005の値が大きい場合、MSSMスカラーポテンシャルは電荷破壊極小値を示す可能性があり、真空安定性を考慮すると、パラメータ空間が強く制約されます。P00014010L00000022の値が非常に大きい場合、ボトムとタウのユカワカップリングは、GUTスケールの前にランダウ極を形成します（例：P00014120L00000028）。 '

## Replace P(pos)L(len) to original LaTeX formula

In [177]:
def replaceRule(match):
    (pos, l) = (int(match.group(1)), int(match.group(2)))
    return latex[pos:pos+l]

In [178]:
latex_fin = re.sub('P(\d{8})L(\d{8})', replaceRule, latex_JP)
latex_fin = re.sub('p(\d{8})l(\d{8})', replaceRule, latex_fin) # DeepL sometimes translates P->p and L->l
latex_fin = latex[:doc.pos]+'\\begin{document}\n'+latex_fin+'\n\\end{document}'

In [179]:
with open('DraftSG_JP.tex', mode='w') as f:
    f.write(latex_fin)