# Parse LaTeX file and convert into DeepL-friendly format

## Parse LaTeX and generate tree structure

In [175]:
import regex
from anytree import Node, RenderTree

env1 = regex.compile(r'\\begin\{((?>[^\{\}]+|(?R))*)\}') # take care of correspondence btw \{ and \}
env2 = regex.compile(r'\\end\{((?>[^\{\}]+|(?R))*)\}')
def latex2tree(latex, tree, texdict, pos=1, structure=[]):
    
    # initialize tree with input file name
    if structure == []:
        structure = [tree]
    
    name = 'n'+str(pos)
    match1 = env1.search(latex)
    match2 = env2.search(latex)
    
    # end of document
    if match1 is None and match2 is None:
        Node(name, parent=structure[-1], dirtype='plain')
        texdict[name] = latex
    
    # end environment
    elif match1 is None or match1.span(0)[0] > match2.span(0)[0]:
        Node(name, parent=structure[-1], dirtype='plain')
        texdict[name] = latex[:match2.span(0)[0]]
        
        latex = latex[match2.span(0)[1]:]
        structure.pop(-1)
        latex2tree(latex, tree, texdict, pos+1, structure)
        
    # being environment
    else:
        Node(name, parent=structure[-1], dirtype='plain')
        child = Node(match1.group(1), parent=structure[-1], dirtype=match1.group(1))
        texdict[name] = latex[:match1.span(0)[0]]
        
        structure.append(child)
        latex = latex[match1.span(0)[1]:]
        latex2tree(latex, tree, texdict, pos+1, structure)

In [99]:
tree = Node(input_file, parent=None)
structure = [tree]
texdict = {}
env1 = regex.compile(r'\\begin\{((?>[^\{\}]+|(?R))*)\}') # take care of correspondence btw \{ and \}
env2 = regex.compile(r'\\end\{((?>[^\{\}]+|(?R))*)\}')

latex = latex_orig

pos = 1
name = 'n'+str(pos)
match = env1.search(latex)
Node(name, parent=structure[-1])
child = Node('env', parent=tree, dirtype=match.group(1))
structure.append(child)
texdict[name] = latex[:match.span(0)[0]]
latex = latex[match.span(0)[1]:]

pos = pos + 1
name = 'n'+str(pos)
match = env2.search(latex)
Node(name, parent=structure[-1])
structure.pop(-1)
texdict[name] = latex[:match.span(0)[0]]
latex = latex[match.span(0)[1]:]

pos = pos + 1
name = 'n'+str(pos)
match = env1.search(latex)
if match is None:
    Node(name, parent=structure[-1])
    texdict[name] = latex

reply.tex
├── n1
├── env
│   └── n2
└── n3


In [185]:
#input_file = input()
input_file = 'reply.tex'
with open(input_file) as f:
    latex_orig = f.read()
    
tree = Node(input_file, parent=None)
texdict = {}
latex2tree(latex_orig, tree, texdict)

In [186]:
for pre, fill, node in RenderTree(tree):
    print('%s%s' % (pre, node.name))

reply.tex
├── n1
├── document
│   ├── n2
│   ├── enumerate
│   │   ├── n3
│   │   ├── align*
│   │   │   └── n4
│   │   └── n5
│   ├── n6
│   ├── flushleft
│   │   └── n7
│   └── n8
└── n9


In [187]:
texdict['n1']

'\\documentclass[12pt]{article}\n\\usepackage{amsmath,amssymb}\n\\usepackage{color}\n\\usepackage{ulem}\n\\def\\rem#1{ {\\bf\\textcolor{red}{($\\clubsuit$ #1 $\\clubsuit$)}}}\n'

In [188]:
texdict['n2']

'\n\nWe thank the referee very much for careful reading and useful comments that help to improve our draft.\nReplies to the comments are listed below.\n\n'

In [189]:
texdict['n3']

'\n    \\item Do the authors have a specific material candidate in mind modelled by the FKMH model?\n    \n    The material example of Fe-doped Bismuth Selenide (Li et al, 2010), is not FKMH (to my understanding). In this case the CM axion is a purely longitudinal magnon. The longitudinal case does not have the simple Heisenberg interpretation or Kittel splitting of FKMH.\n    \n    Can the present treatment be extended to this case of a longitudinal CM axion?\n    \n    \\textbf{Reply}:\n    %%\n    We agree that the Fe-doped BiSe3 is different from the FKMH model. To our understanding, the axion as the longitudinal magnon in the Fe-doped BiSe3 is not simply expressed by the magnon creation/annihilation operator (at least at the linear level). Thus we think that some extended formalism is required to describe the CM axion in the Fe-doped BiSe3, although we have not found it.\n    %%\n    \n    \\item Eq. 5.11 is formally divergent when $m_a=m_m$. Finite linewidth of the magnon regulat

In [190]:
texdict['n4']

'\n        b \\sim 0.006  \\,\\mathrm{meV}\n        \\left( \\frac{B_0}{2\\,\\mathrm{T}} \\right)\n        \\left( \\frac{1}{\\epsilon} \\right)^{1/2}\n        |D|\n        \\left( u_{\\vec{0}} - v_{\\vec{0}} \\right)\n        \\left( \\frac{m_a}{ 1\\,\\mathrm{meV}} \\right)^{1/2}\n        \\left( \\frac{d}{(0.3\\,\\mathrm{keV})^{-1}} \\right)^{3/2},\n    '

In [191]:
texdict['n5']

'\n    which is much smaller than the assumed value $m\\sim 1\\,\\mathrm{meV}$ with a moderate choice of $|D| \\sim (u_{\\vec{0}}-v_{\\vec{0}}) \\sim O(1)$.\n    Thus, we confirm that CM axion mixing to the polariton is small and the effects of mixing on dispertion relations are negligible.\n    Also, it can be checked from Eqs. (5.14) and (5.29) that the small $b$ parameter works as a suppression factor to the signal rate $dN_{\\text{signal}} / dt$, which is proportional to $(b/m)^2$.\n    To achieve this conslusion, we assumed that the effect of the magnetic field and mixing are neglected so that the frequency of a magnon mode $\\omega_{\\vec{0}}$ can be used to approximate the mass of the axionic polariton.\n    %%\n    \n    \\item It is stated “We do not discuss in detail the detection method of generated magnons in this paper; they might be observed through the conversion into photons at the boundary of the material." This conversion was shown explicitly in the paper appearing at

In [192]:
texdict['n6']

'\n\n\nWe hope that the modified version is suitable for publication in JHEP.\\\\\n\nSincerely,\\\\\n\n'

In [193]:
texdict['n7']

'\nSo Chigusa\\\\\nTakeo Moroi\\\\\nKazunori Nakayama\n'

In [194]:
texdict['n8']

'\n\n\n'

In [195]:
texdict['n9']

'\n'