-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fa47624
commit 95962a5
Showing
4 changed files
with
1,563 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,35 +1,15 @@ | ||
The MyCapytain local file implementation | ||
======================================== | ||
Working with Local CapiTainS XML File | ||
===================================== | ||
|
||
Introduction | ||
############ | ||
|
||
The module `MyCapytain.resources.local.text` requires the `guidelines of Capitains <https://capitains.github.io/pages/guidelines.html>`_ to be implemented in your files. | ||
|
||
Basics and examples | ||
################### | ||
|
||
Getting all passages from a text | ||
******************************** | ||
|
||
.. code-block:: python | ||
# We import the correct classes from the local module | ||
from MyCapytain.resources.texts.local import Text, Passage | ||
# We open a file | ||
with open("/tests/testing_data/texts/sample.xml") as f: | ||
# We initiate a Text object giving the IO instance to resource argument | ||
text = Text(resource=f) | ||
# Text objects have a citation property | ||
# len(Citation(...)) gives the depth of the citation scheme | ||
# in the case of this sample, this would be 3 (Book, Poem, Line) | ||
for ref in text.getChildren(level=len(text.citation)): | ||
# We retrieve a Passage object for each reference that we find | ||
# We can pass the reference many way, including in the form of a list of strings | ||
psg = text.getPassage(ref.split("."), hypercontext=False) | ||
# We print the passage from which we retrieve <note> nodes | ||
print("\t".join([ref, psg.text(exclude=["note"])])) | ||
The class :class:`MyCapytain.resources.texts.locals.tei.Text` requires the \ | ||
`guidelines of Capitains <https://capitains.github.io/pages/guidelines.html>`_ to be implemented in your file. | ||
|
||
Example | ||
####### | ||
|
||
.. literalinclude:: Text.py | ||
:language: python | ||
:linenos: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
# We import the correct classes from the local module | ||
from MyCapytain.resources.texts.locals.tei import Text | ||
from MyCapytain.common.constants import Mimetypes, NS | ||
from lxml.etree import tostring | ||
|
||
# We open a file | ||
with open("./tests/testing_data/examples/text.martial.xml") as f: | ||
# We initiate a Text object giving the IO instance to resource argument | ||
text = Text(resource=f) | ||
|
||
# Text objects have a citation property | ||
# len(Citation(...)) gives the depth of the citation scheme | ||
# in the case of this sample, this would be 3 (Book, Poem, Line) | ||
for ref in text.getReffs(level=len(text.citation)): | ||
# We retrieve a Passage object for each reference that we find | ||
# We can pass the reference many way, including in the form of a list of strings | ||
# We use the _simple parameter to get a fairly simple object | ||
# Simple makes a straight object that has only the targeted node inside of it | ||
psg = text.getTextualNode(subreference=ref, simple=True) | ||
# We print the passage from which we retrieve <note> nodes | ||
print("\t".join([ref, psg.export(Mimetypes.PLAINTEXT, exclude=["tei:note"])])) | ||
|
||
""" | ||
You'll print something like the following : | ||
1.pr.1 Spero me secutum in libellis meis tale temperamen- | ||
1.pr.2 tum, ut de illis queri non possit quisquis de se bene | ||
1.pr.3 senserit, cum salva infimarum quoque personarum re- | ||
1.pr.4 verentia ludant; quae adeo antiquis auctoribus defuit, ut | ||
1.pr.5 nominibus non tantum veris abusi sint, sed et magnis. | ||
1.pr.6 Mihi fama vilius constet et probetur in me novissimum | ||
""" | ||
|
||
# It is possible that what you're interested in is a little more complex | ||
# Like for example, getting a specific text sample with a specific reference | ||
# In TEI ! | ||
|
||
# We open another such as Cicero's texts ! | ||
with open("./tests/testing_data/examples/text.cicero.xml") as f: | ||
# We initiate a Text object giving the IO instance to resource argument | ||
text = Text(resource=f) | ||
# We are specifically interest in the portion 28-30 | ||
# Note that we won't use 28-30 as cross passage reference won't work properly | ||
p28_29 = text.getTextualNode("28-29") | ||
|
||
# And we want to be able to work with the xml | ||
# To be injected in a third party API for lemmatization purposes | ||
xml = p28_29.export(Mimetypes.XML.Std) | ||
print("XML of 28-29") | ||
print(xml) | ||
print("------------") | ||
|
||
# But what we really want to do, is suppress the note from the XML. | ||
# So we export to an LXML Object | ||
document = p28_29.export(Mimetypes.PYTHON.ETREE) | ||
# We remove some XML | ||
for element in document.xpath("//tei:note", namespaces=NS): | ||
element.getparent().remove(element) | ||
# And we print using LXML constants | ||
print("Clean XML of 28-29") | ||
print(tostring(document, encoding=str)) | ||
print("------------") | ||
|
Oops, something went wrong.