In [None]:
from pathlib import Path

import udapi
from udapi.block.ud.fixchain import FixChain
from udapi.block.ud.fixleaf import FixLeaf
from udapi.block.ud.fixmultisubjects import FixMultiSubjects
from udapi.block.ud.fixpunct import FixPunct
from udapi.block.ud.fixrightheaded import FixRightheaded
from udapi.block.ud.setspaceafterfromtext import SetSpaceAfterFromText

In [None]:
# filename = "UD_output.conllu"
filename = "UD_Norwegian-NynorskLIA/no_nynorsklia-ud-dev.conllu"

repo_dir = Path.cwd().parent
UDFILE = repo_dir.parent / filename

assert UDFILE.exists()
UDFILE = str(UDFILE)

In [None]:
doc = udapi.Document(filename=UDFILE)

In [None]:
# Processing full document

spaceafter = SetSpaceAfterFromText()
spaceafter.run(document=doc)

fixpunct = FixPunct(check_paired_punct_upos=True)
fixpunct.run(document=doc)

fix_chain = FixChain()
fix_chain.run(document=doc)

fix_multisubj = FixMultiSubjects()
fix_multisubj.run(document=doc)

fix_right = FixRightheaded()
fix_right.run(document=doc)

fix_leaf = FixLeaf(deprels="aux,cop,case,mark,cc,det")
fix_leaf.run(document=doc)

doc.store_conllu("out.conllu")

In [None]:
# Run validation script at the end
validation_script = repo_dir / "tools/validate.py"

!pdm run python $validation_script --max-err 0 --lang no out.conllu

In [None]:
# Processing individual nodes

for b in doc.bundles[:2]:
    root = b.get_tree()
    root.draw(layout="align", attributes="ord,form,feats")
    nodes = root.descendants
    for node in nodes:
        if (node.upos == "VERB") and (node.feats["VerbForm"] == ""):
            #            node.feats["VerbForm"] = "Fin"
            print(node.form, node.feats)

# doc.store_conllu("out.conllu")