## How to use the usfm-grammar python APIs


### Installation

#### From PyPI

In [None]:
# Good to set up a virtual environment
# requires python >= 3.10
!pip install usfm-grammar

#### From code base

In [None]:
! cd python-usfm-parser/
! pip install -e . # from the python-usfm-parser

In [None]:
! python usfm_grammar.py -h # to view the command line options

In [None]:
! cd src/
! python grammar_rebuild.py # to bring the changes, after update on the local tree-sitter-usfm grammar

### Parsing an input USFM

In [None]:
from usfm_grammar import USFMParser, Filter # importing from the local module, not from an installed library

In [None]:
input_usfm_str = open("sample.usfm","r", encoding='utf8').read()


print(input_usfm_str)

In [None]:
my_parser = USFMParser(input_usfm_str)

In [None]:
# To validate the input USFM file. 
# The rest of operations will work even if there are small errors
my_parser.errors 

#### Converting to other formats and extracting specific contents via filters

In [None]:
my_parser.to_dict()

In [None]:
my_parser.to_dict(Filter.ALL)

In [None]:
my_parser.to_dict(Filter.NOTES)

In [None]:
my_parser.to_dict(Filter.NOTES_TEXT)

In [None]:
table_output = my_parser.to_list()
table_output


In [None]:
print("\n".join(["\t".join(row) for row in table_output]))

In [None]:
table_output = my_parser.to_list(Filter.NOTES)
print("\n".join(["\t".join(row) for row in table_output]))


In [None]:
my_parser.to_dict(Filter.SCRIPTURE_PARAGRAPHS)

In [None]:
table_output = my_parser.to_list(Filter.SCRIPTURE_PARAGRAPHS)
print("\n".join(["\t".join(row) for row in table_output]))


In [None]:
from lxml import etree
usx_elem = my_parser.to_usx()
print(etree.tostring(usx_elem, encoding="unicode", pretty_print=True))

#### USX conversion and validation of the generated USX file

In [None]:
import sys
sys.path.append('/home/kavitha/Documents/PEG JS and USFM/usfm-grammar-v3/usfm-grammar/python-usfm-parser/ENV3.10/lib/python3.10/site-packages')


from usfm_grammar import USFMParser, Filter
from lxml import etree

input_usfm_str = open("origin.usfm","r", encoding='utf8').read()
my_parser = USFMParser(input_usfm_str)

usx_elem = my_parser.to_usx()
usx_str = etree.tostring(usx_elem, encoding="unicode")

usx_str

In [None]:
from lxml import etree
with open("../schemas/usx.rnc") as f:
    usxrnc_doc  = f.read()
    relaxng = etree.RelaxNG.from_rnc_string(usxrnc_doc)

In [None]:


from io import StringIO 
usx_f = StringIO(usx_str)
doc = etree.parse(usx_f)
if relaxng.validate(doc):
    print("valid")
else:
    relaxng.assertValid(doc)

#### To work with the syntax tree itself

In [None]:
my_parser.to_syntax_tree()