In [1]:
from IPython.lib.deepreload import reload
%load_ext autoreload
%autoreload 2

In [2]:
import spacy
from spacy import displacy

from adept.preprocess import preprocessors
from adept.components.registry import ComponentsRegistry
from adept.components.sentencizer import Sentencizer

In [3]:
nlp = spacy.load("en_core_web_trf", exclude=["parser"])

In [4]:
registry = ComponentsRegistry(nlp)
registry.add_components([
    Sentencizer,
    # PeriodSentencizer
])

INIT custom_sentencizer


In [None]:
# doc = nlp(text)

doc = registry.nlp(text)

In [7]:
for sent in doc.sents:
    print(sent)

Perennials, 6-65+ cm (usually rhizomatous, sometimes stoloniferous).
Stems 1(-4), erect, simple or branched, densely lanate-tomentose to glabrate.
Leaves petiolate (proximally) or sessile (distally, weakly clasping and gradually reduced);
blades oblong or lanceolate, 3.5-35+ cm × 5-35 mm, 1-2-pinnately lobed (ultimate lobes ± lanceolate, often arrayed in multiple planes), faces glabrate to sparsely tomentose or densely lanate.
Heads 10-100+, in simple or compound, corymbiform arrays.
Phyllaries 20-30 in ± 3 series, (light green, midribs dark green to yellowish, margins green to light or dark brown) ovate to lanceolate, abaxial faces tomentose.
Receptacles convex;
paleae lanceolate, 1.5-4 mm.
Ray florets (3-)5-8, pistillate, fertile;
corollas white or light pink to deep purple, laminae 1.5-3 × 1.5-3 mm.
Disc florets 10-20;
corollas white to grayish white, 2-4.5 mm.
Cypselae 1-2 mm (margins broadly winged).
2n = 18, 27, 36, 45, 54, 63, 72 (including counts from Europe).
Morphologic chara

In [73]:
from itertools import chain

x = ['1', '2;3', '3;4;5', '6', '7;8']

list(chain(*[s.split(';') for s in x]))

['1', '2', '3', '3', '4', '5', '6', '7', '8']

In [148]:
t = [1,2,3,4,5,6,7,8]
split_points = [3, 6]



In [152]:

# :3
# 3:6
# 6:



    

    
    

[[1, 2, 3], [4, 5, 6], [7, 8]]


In [8]:
from adept.config import ADEPT_DIR

In [9]:
import yaml

In [19]:
spacy_config = yaml.safe_load((ADEPT_DIR / 'project.yml').open()).get('vars', {})

In [20]:
spacy_config['file']['traits']

'traits.jsonl'

In [21]:
nlp = spacy.load("en_core_web_trf")

In [31]:
from spacy.pipeline import EntityRuler
from adept.components.base import BaseComponent
from pathlib import Path
from adept.config import CORPUS_DIR, spacy_config
from adept.utils.expand import ExpandSpan
import enum

from spacy.matcher import Matcher
from spacy.tokens import Span, Token
from adept.config import unit_registry
from spacy.util import filter_spans
from spacy.tokens import Span, Token, Doc


class ExpandSpan():
    
    """
    Expand span, so it incorporates neighbouring tokens with characters or pos_tag
    """
    
    class Direction(enum.Enum):
        FORWARD = 1
        BACKWARD = -1     
    
    def __init__(self, characters: list, pos_tags: list = None, entity_types: list = None):
        self.characters = characters
        self.pos_tags = pos_tags if pos_tags else []
        self.entity_types = entity_types if entity_types else []
        
    def __call__(self, doc: Doc, token: Token):
        end_token = self._expand(doc, token, direction=self.Direction.FORWARD)
        start_token = self._expand(doc, token, direction=self.Direction.BACKWARD)
        return Span(doc, start_token.i, end_token.i + 1) 
    
    def _expand(self, doc: Doc, token: Token, direction):    
        i = token.i
        while True:
            next_i = i + direction.value
            
            # Do not look beyond the edges of the doc
            if next_i >= len(doc) or next_i < 0:
                break

            next_token = doc[next_i]            
            if self._is_matching_token(next_token):
                # Fix 6-65+ cm (usually rhizomatous                 
                if direction==self.Direction.FORWARD and next_token.text == '(' and not self._is_matching_token(doc[next_token.i + direction.value]):
                    break
                    
                i = next_token.i
            else:
                break
        
        return doc[i]
    
    def _is_matching_token(self, token: Token):
        return token.lower_ in self.characters or token.pos_ in self.pos_tags or token.ent_type_ in self.entity_types 
    

class Numeric(BaseComponent):
    
    name = "numeric"
    
    numeric_entity_types = ['QUANTITY', 'CARDINAL']
    
   
    
    def __init__(self, nlp):                 

        super().__init__(nlp)
        self.matcher = Matcher(nlp.vocab)          
        self.matcher.add('NUMERIC', [[{"POS": "NUM"}]]) 
        
        Span.set_extension("is_modified_numeric", default=None, force=True)
        
        
    def __call__(self, doc):
        
        ents = set()
        orig_ents = set(doc.ents)
        
        expand_numeric = ExpandSpan(
            ['-', '(', ')', '+', 'cm', 'mm', 'm', '×', 'x', ('ca', '.')], 
            pos_tags=['NUM'],
            entity_types=['QUANTITY', 'CARDINAL']
        )
        
        for _, start, end in self.matcher(doc):
            token = doc[start]
            # if not token.ent_type_ in self.numeric_entity_types:                
            span = expand_numeric(doc, token)            
            has_unit = [t for t in span if t.lower_ in ['mm', 'cm', 'm']]
            
            label = 'QUANTITYX' if has_unit else 'CARDINALX'

            ent = Span(doc, span.start, span.end, label=label)
      
            ents.add(ent)
            
        combined_ents = set(filter_spans(ents | orig_ents))                        
        doc.ents = combined_ents
        
        # Get all the new ents, and label the sent as a training sent         
        new_ents = combined_ents.difference(orig_ents)
        
        for new_ent in new_ents:
            new_ent.sent._.is_modified_numeric = True
        
        return doc
                                  
                                        
            
ner = Numeric(nlp)

# text = 'A glabrous perennial 15-30(-40) cm. Rhizomes far-creeping producing tufts of 1-3 shoots at ± regular intervals. Roots pale yellow-brown. Scales grey brown, often tinged wine red, soon becoming fibrous.           Stems trigonous, smooth below, rough above, ± decumbent at base. Lvs 12-40 x 2-4 mm, concave, often curved, thick, bluntly keeled or channelled, gradually tapering to a trigonous point up to 5 cm long, mid green, ± shiny. Ligule 1-4 mm, broadly ovate or almost truncate. Lower sheaths lfless, persistent, thick, white, tinged with red. Infl 1/5-1/4 lngth of stem. Lower bract lf-like, about as long as infl, not sheathing, upper glumaceous.  Male spike 1(-2), 10-15 x 3 mm, fusiform. Male glumes 3-4 mm, ovate, acute or obtuse, dark purple-black with hyaline margin. Female spikes 1-2(-3), ± contiguous, (5-)10-15(-20) x 4-5 mm, ovoid or subglobose, erect, lower peduncled, upper ±  sessile. Female glumes 2-3 mm, ovate, acute, dark purplish-brown with pale midrib and hyaline margin, shorter than fr.  Utricle 3-3.5 mm, smooth, ±          inflated, dark purple green in upper half, shiny. Beak 0.5 mm, ±          notched. Stigmas 2, rarely 3. Nut 2 mm, subglobose.' 
text = "Herbs, perennial, 40-100 cm tall, with long rhizomes; stems erect, unbranched or branched in upper part, often with short sterile branches at leaf axils above middle, striate, usually white villous. Leaves sessile; leaf blade lanceolate, oblong-lanceolate, or sublinear, 5-20 x 1-2.5 cm, (2 or)3-pinnatisect, abaxially densely villous, adaxially densely depressed glandular punctuate; ultimate segments lanceolate to linear, 0.5-1.5 x 0.3-0.5 mm, apex cartilaginous-mucronulate. Synflorescence a terminal flat-topped panicle 2-6 cm in diam. Capitula many. Involucres oblong or subovoid, ca. 4 x 3 mm; phyllaries in 3 rows, elliptic or oblong, 1.5-3 x 1-1.3 mm, scarious margin pale yellow or brown; midvein convex. Paleae oblong-elliptic, scarious, abaxially yellow gland-dotted. Ray florets 5; lamina white, pink, or violet-red, suborbicular, 1.5-3 x 2-2.5 mm, apex 2- or 3-denticulate. Disk florets yellow, tubular, 2.2-3 mm, exterior gland-dotted, apex 5-lobed. Achenes greenish, oblong, ca. 2 mm, with white lateral ribs. Corona absent. flower. and fruit. Jul-Sep. 2n = 18, 27, 36 + 0-2B, 45, 54 + 0-3B, 72."

doc = nlp(text)


doc = ner(doc)

training_data = []

def has_complex_numeric_ent(sent):
    
    return True
    
    MIN_NUMBER_LENGTH = 1
    
    # We don't want to add a load of training data with cardinals of length 1
    # So get the maximum length of numeric chars, and skip if not > 1         
    max_num_len = max([sum([len(t) for t in ent if t.pos_ == 'NUM']) for ent in sent.ents])

    return max_num_len > MIN_NUMBER_LENGTH


sents = [sent for sent in doc.sents if sent._.is_modified_numeric and has_complex_numeric_ent(sent)]

for sent in sents:
    training_data.append(
        (sent.text, [[ent.start, ent.end, ent.label_] for ent in sent.ents])
    )    

html =  displacy.render(doc, style='ent', jupyter=True)        
        
print(training_data)
        
        
# 

# = [
#   ("Tokyo Tower is 333m tall.", [(0, 11, "BUILDING")]),
# ]            

# for ent in doc.ents:
#     print(ent)
#     print(ent.label_)

# displacy.render(doc, style="ent")
            

INIT numeric


[('Herbs, perennial, 40-100 cm tall, with long rhizomes; stems erect, unbranched or branched in upper part, often with short sterile branches at leaf axils above middle, striate, usually white villous.', [[4, 8, 'QUANTITYX']]), ('Leaves sessile; leaf blade lanceolate, oblong-lanceolate, or sublinear, 5-20 x 1-2.5 cm, (2 or)3-pinnatisect, abaxially densely villous, adaxially densely depressed glandular punctuate; ultimate segments lanceolate to linear, 0.5-1.5 x 0.3-0.5 mm, apex cartilaginous-mucronulate.', [[55, 63, 'QUANTITYX'], [64, 66, 'CARDINALX'], [86, 94, 'QUANTITY']]), ('Involucres oblong or subovoid, ca.', []), ('4 x 3 mm; phyllaries in 3 rows, elliptic or oblong, 1.5-3 x 1-1.3 mm, scarious margin pale yellow or brown; midvein convex.', [[131, 132, 'CARDINAL'], [138, 146, 'QUANTITYX']]), ('Ray florets 5; lamina white, pink, or violet-red, suborbicular, 1.5-3 x 2-2.5 mm, apex 2- or 3-denticulate.', [[172, 173, 'CARDINALX'], [186, 194, 'QUANTITYX'], [196, 197, 'CARDINALX'], [198,

In [42]:
def is_complex_numeric_ent(ent):
    
    MIN_NUMBER_LENGTH = 1
    
    # We don't want to add a load of training data with cardinals of length 1
    # So get the maximum length of numeric chars, and skip if not > 1         
    num_len = sum([len(t) for t in ent if t.pos_ == 'NUM'])

    return num_len > MIN_NUMBER_LENGTH  


matcher = Matcher(nlp.vocab)
matcher.add('NUMERIC', [[{"POS": "NUM"}]])

expand_numeric = ExpandSpan(
    ['-', '(', ')', '+', 'cm', 'mm', 'm', '×', 'x', ('ca', '.')], 
    pos_tags=['NUM'],
    entity_types=['QUANTITY', 'CARDINAL']
)

doc = registry.nlp(text)
orig_ents = set(doc.ents)
ents = set()

for _, start, end in matcher(doc):  
    token = doc[start]      
    span = expand_numeric(doc, token)            
    has_unit = [t for t in span if t.lower_ in ['mm', 'cm', 'm']]            
    label = 'QUANTITYX' if has_unit else 'CARDINALX'
    ents.add(Span(doc, span.start, span.end, label=label))            

# Combine the new and original ents
# combined_ents = set(filter_spans(ents | orig_ents))

# Get all the new ents, and label the sent as a training sent         
new_ents = ents.difference(orig_ents)

new_ents = {e for e in new_ents if is_complex_numeric_ent(e)}

combined_ents = set(filter_spans(new_ents | orig_ents))                        
doc.ents = combined_ents

sents = {ent.sent for ent in new_ents}

displacy.render(doc4 x 3 mm, style='ent', jupyter=True)


In [73]:
nlp = spacy.load("en_core_web_trf")

In [74]:
class Sentencizer(BaseComponent):
    
    """
    Sentencizer, to split sentences on semicolons and periods.
    
    NB: If we just add is_sent_start for each semicolon, the default
    parser will split incorrectly
    """
    
    name = 'custom_sentencizers1'

    pipeline_config = {'first': True}

    def __call__(self, doc):
        
        print('CALLS')
        
        for token in doc[1:]:
            if self._is_sent_end(doc, doc[token.i - 1]):
                token.is_sent_start = True
            else:
                token.is_sent_start = False
                
        return doc
    
    def _is_sent_end(self, doc, token):
        return any([
            self._is_semicolon(doc, token),
            self._is_period(doc, token),
        ])
    
    @staticmethod
    def _is_semicolon(doc, token):
        return token.text == ";"
    
    @staticmethod
    def _is_period(doc, token):
        next_token = doc[token.i + 1]

        # Ensure the next word starts with a capital         
        return token.text == "." and next_token.shape_.startswith('X')

registry = ComponentsRegistry(nlp)
registry.add_components([
    Sentencizer,
])

doc = registry.nlp(text)

INIT custom_sentencizers1
CALLS


In [71]:
for token in doc:
    print(dir(token))
    print(token.shape_.startswith('X'))
    break

['_', '__bytes__', '__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__len__', '__lt__', '__ne__', '__new__', '__pyx_vtable__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__unicode__', 'ancestors', 'check_flag', 'children', 'cluster', 'conjuncts', 'dep', 'dep_', 'doc', 'ent_id', 'ent_id_', 'ent_iob', 'ent_iob_', 'ent_kb_id', 'ent_kb_id_', 'ent_type', 'ent_type_', 'get_extension', 'has_dep', 'has_extension', 'has_head', 'has_morph', 'has_vector', 'head', 'i', 'idx', 'iob_strings', 'is_alpha', 'is_ancestor', 'is_ascii', 'is_bracket', 'is_currency', 'is_digit', 'is_left_punct', 'is_lower', 'is_oov', 'is_punct', 'is_quote', 'is_right_punct', 'is_sent_end', 'is_sent_start', 'is_space', 'is_stop', 'is_title', 'is_upper', 'lang', 'lang_', 'left_edge', 'lefts', 'lemma', 'lemma_', 'lex', 'lex_id', 'like_email', 'like

In [75]:
for sent in doc.sents:
    print(sent)

Herbs, perennial, 40-100 cm tall, with long rhizomes;
stems erect, unbranched or branched in upper part, often with short sterile branches at leaf axils above middle, striate, usually white villous.
Leaves sessile;
leaf blade lanceolate, oblong-lanceolate, or sublinear, 5-20 x 1-2.5 cm, (2 or)3-pinnatisect, abaxially densely villous, adaxially densely depressed glandular punctuate;
ultimate segments lanceolate to linear, 0.5-1.5 x 0.3-0.5 mm, apex cartilaginous-mucronulate.
Synflorescence a terminal flat-topped panicle 2-6 cm in diam.
Capitula many.
Involucres oblong or subovoid, ca. 4 x 3 mm;
phyllaries in 3 rows, elliptic or oblong, 1.5-3 x 1-1.3 mm, scarious margin pale yellow or brown;
midvein convex.
Paleae oblong-elliptic, scarious, abaxially yellow gland-dotted.
Ray florets 5;
lamina white, pink, or violet-red, suborbicular, 1.5-3 x 2-2.5 mm, apex 2- or 3-denticulate.
Disk florets yellow, tubular, 2.2-3 mm, exterior gland-dotted, apex 5-lobed.
Achenes greenish, oblong, ca. 2 mm,

In [67]:
# for ent in doc.ents:
#     print(ent.label_, ent.text, ent.start)    

# def get_sentence_subject_nouns(sent):
#     # Try and identify which group match to use, by examining the sentence nouns
#     # Try nsubj, and then ROOT and NOUN
#     subject_nouns = [t for t in sent if t.dep_ == "nsubj"] + [t for t in sent if t.dep_ == "ROOT" and t.pos_ == "NOUN"]  
#     return subject_nouns      
    
def is_subject_noun(token):
    return token.dep_ == "nsubj" or (token.dep_ == "ROOT" and token.pos_ == "NOUN")
    
def sent_get_subject_part(sent, parts):
    for part in parts:
        if any([t for t in part if is_subject_noun(t)]):
            return part
            
    
for sent in doc.sents:
    parts = [e for e in sent.ents if e.label_ == 'PART']

    if not parts:
        continue
        
    subject_part = sent_get_subject_part(sent, parts)    

    if not subject_part:     
        first_part = parts[0]
        # Is the first part the first sentence, then use it as a subject        
        if first_part.start == sent.start:
            subject_part = parts[0]
        # Or if the first part is the second word, preceeded by a modifier         
        # For example: lower bract         
        elif first_part.start == sent.start + 1 and sent[0].dep_ == 'amod':
            subject_part = parts[0]
            
            
    print('--')
    print(subject_part)
    print(sent)
    

--
Rhizomes
Rhizomes far-creeping producing tufts of 1-3 shoots at ± regular intervals.
--
Roots
Roots pale yellow-brown.
--
Scales
Scales grey brown, often tinged wine red, soon becoming fibrous.
--
Stems
Stems trigonous, smooth below, rough above, ± decumbent at base.
--
Ligule
Ligule 1-4 mm, broadly ovate or almost truncate.
--
sheaths
Lower sheaths lfless, persistent, thick, white, tinged with red.
--
None
Infl 1/5-1/4 lngth of stem.
--
bract
Lower bract lf-like, about as long as infl, not sheathing, upper glumaceous.
--
spike
Male spike 1(-2), 10-15 x 3 mm, fusiform.
--
glumes
Male glumes 3-4 mm, ovate, acute or obtuse, dark purple-black with hyaline margin.
--
spikes
Female spikes 1-2(-3), ± contiguous, (5-)10-15(-20) x 4-5 mm, ovoid or subglobose, erect, lower peduncled, upper ±  sessile.
--
glumes
Female glumes 2-3 mm, ovate, acute, dark purplish-brown with pale midrib and hyaline margin, shorter than fr.  
--
Utricle
Utricle 3-3.5 mm, smooth, ±          inflated, dark purple g

In [68]:
nlp = spacy.load("en_core_web_trf")



In [69]:
t = 'Herbs, perennial, 40-100 cm tall, with long rhizomes; stems erect, unbranched or branched in upper part, often with short sterile branches at leaf axils above middle, striate, usually white villous. Leaves sessile; leaf blade lanceolate, oblong-lanceolate, or sublinear, 5-20 × 1-2.5 cm, (2 or)3-pinnatisect, abaxially densely villous, adaxially densely depressed glandular punctuate; ultimate segments lanceolate to linear, 0.5-1.5 × 0.3-0.5 mm, apex cartilaginous-mucronulate. Synflorescence a terminal flat-topped panicle 2-6 cm in diam. Capitula many. Involucres oblong or subovoid, ca. 4 × 3 mm; phyllaries in 3 rows, elliptic or oblong, 1.5-3 × 1-1.3 mm, scarious margin pale yellow or brown; midvein convex. Paleae oblong-elliptic, scarious, abaxially yellow gland-dotted. Ray florets 5; lamina white, pink, or violet-red, suborbicular, 1.5-3 × 2-2.5 mm, apex 2- or 3-denticulate. Disk florets yellow, tubular, 2.2-3 mm, exterior gland-dotted, apex 5-lobed. Achenes greenish, oblong, ca. 2 mm, with white lateral ribs. Corona absent. flower. and fruit. Jul-Sep. 2n = 18, 27, 36 + 0-2B, 45, 54 + 0-3B, 72.'

doc = nlp(t)



In [72]:
for ent in doc.ents:
    print(ent)
    print(ent.label_)

40-100 cm
QUANTITY
5-20 ×
QUANTITY
2
CARDINAL
0.5-1.5 ×
QUANTITY
2-6 cm
QUANTITY
ca. 4 × 3 mm
QUANTITY
3
CARDINAL
1.5-3 ×
QUANTITY
5
CARDINAL
1.5-3
QUANTITY
2-2.5 mm
QUANTITY
2-
CARDINAL
3
CARDINAL
2.2-3 mm
QUANTITY
5
CARDINAL
ca. 2 mm
QUANTITY
Jul-Sep. 2n
DATE
18
CARDINAL
27
CARDINAL
36
CARDINAL
45
CARDINAL
54
CARDINAL
0
CARDINAL
72
CARDINAL


In [73]:
x = '×'
repr(x)

"'×'"

In [74]:
!pip install unidecode

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Collecting unidecode
  Downloading Unidecode-1.3.4-py3-none-any.whl (235 kB)
[K     |████████████████████████████████| 235 kB 3.3 MB/s eta 0:00:01
[?25hInstalling collected packages: unidecode
Successfully installed unidecode-1.3.4
You should consider upgrading via the '/Users/ben/Library/Caches/pypoetry/virtualenvs/adept-igwYr31N-py3.9/bin/python -m pip install --upgrade pip' command.[0m


In [75]:
s = '5.5–33 cm × 1.4–4.7 mm'

In [76]:
from unidecode import unidecode
clean = unidecode(s)

In [77]:
clean

'5.5-33 cm x 1.4-4.7 mm'

In [78]:
for i in s:
    print(repr(i))

20154

In [80]:
s.encode("unicode_escape")

b'5.5\\u201333 cm \\xd7 1.4\\u20134.7 mm'

In [81]:
clean.encode("unicode_escape")

b'5.5-33 cm x 1.4-4.7 mm'

In [120]:
idx = [[1,2], [1,3], [1,4], [7,8], [6, 9]]

In [119]:
for y in x:
    r = range(*y)
    print(r)

range(1, 2)
range(1, 3)
range(1, 4)
range(7, 8)
range(6, 9)


In [130]:
from spacy.util import filter_spans

spans = [doc[x:y] for x,y in idx]

# for x,y in idx:
#     doc[x:y]

filter_spans(spans)

[glabrous perennial 15, ) cm.]

In [188]:
1*-1

-1

In [189]:
0 => -1
1 => 1

-1

In [190]:
0 - 0

0

In [207]:
0*2 -1

-1

In [314]:
from reportlab.graphics import renderPM

In [330]:
svg = """<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"   xmlns:svg="http://www.w3.org/2000/svg"><div class="entities" style="line-height: 2.5; direction: ltr">Stems 
<mark class="entity" style="background: #e4e7d2; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
    1(-4)
    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">CARDINAL</span>
</mark>
, erect, simple or branched, densely lanate-tomentose to glabrate. </div></svg>"""

In [331]:

i = 1
# output_path = Path(f"./{i}.png").open('w')
# output_path.open("w", encoding="utf-8").write(svg)
output_path = f"./{i}.png"
print(svg)

svg2png(bytestring=svg, write_to=output_path, parent_width=500, parent_height=500)

<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"   xmlns:svg="http://www.w3.org/2000/svg"><div class="entities" style="line-height: 2.5; direction: ltr">Stems 
<mark class="entity" style="background: #e4e7d2; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
    1(-4)
    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">CARDINAL</span>
</mark>
, erect, simple or branched, densely lanate-tomentose to glabrate. </div></svg>
