In [1]:
from chemdataextractor import Document
from chemdataextractor.model import BaseModel, Compound, StringType, ModelType
from chemdataextractor.model.units import TemperatureModel, LengthModel
from chemdataextractor.parse import R, I, W, Optional, merge, join, AutoSentenceParser
from chemdataextractor.doc import Paragraph, Heading, Sentence
from pprint import pprint
from lxml import etree

In [2]:
class WorkingTemperature(TemperatureModel):#inherits the temperature model to handle the units
    """ A working temperature property"""
    specifier_expr = ((I('heat')+I('-')+I('sink')+I('temperature')+I('of'))|(I('operating')+I('temperature'))|(I('operation')+I('temperature'))|(I('maximum')+I('temperature'))|(I('working')+I('temperature'))|(I('pulsed')+('mode'))|(I('continous')+I('wave')+I('mode'))|(I('maximum')+I('heat')+I('-')+I('sink')+I('temperature'))).add_action(join)
    specifier = StringType(parse_expression=specifier_expr,required=True, contextual=True)
    compound = ModelType(Compound,required=False,contextual=True)
    compound.model_class.fields['names'].required = False
    parsers = [AutoSentenceParser()]

In [3]:
# two sentences from the abstract, numbers changed a bit to better view the source of records
s1 = Sentence('The laser operated up to a heat-sink temperature of 110K in pulsed mode, 95K in continuous wave (cw) mode, and the threshold current density at 5K was ∼140A∕cm2.')
s2 = Sentence('In pulsed operation, the device lased up to a maximum heat-sink temperature (Tmax) of 111K. The threshold current density (Jth) at 6K was 142A∕cm2. ')
s1.models = [WorkingTemperature]
s2.models = [WorkingTemperature]

In [4]:
sentence_parser = AutoSentenceParser(lenient=True)
sentence_parser.model = WorkingTemperature

In [5]:
for result in sentence_parser.root.scan(s1.tagged_tokens):
    print(etree.tostring(result[0]), '\n')
pprint(s1.records.serialize())

b'<root_phrase><specifier>heat - sink temperature of</specifier><raw_value>110</raw_value><raw_units>K</raw_units><IN>in</IN><specifier>pulsed mode</specifier><COMMA>,</COMMA><raw_value>95</raw_value><raw_units>K</raw_units><IN>in</IN><JJ>continuous</JJ><NN>wave</NN><LRB>(</LRB><NN>cw</NN><RRB>)</RRB><NN>mode</NN><COMMA>,</COMMA><CC>and</CC><DT>the</DT><NN>threshold</NN><JJ>current</JJ><NN>density</NN><IN>at</IN><raw_value>5</raw_value><raw_units>K</raw_units></root_phrase>' 

[{'WorkingTemperature': {'raw_units': 'K',
                         'raw_value': '110',
                         'specifier': 'heat - sink temperature of',
                         'units': 'Kelvin^(1.0)',
                         'value': [110.0]}}]


In [6]:
for result in sentence_parser.root.scan(s2.tagged_tokens):
    print(etree.tostring(result[0]), '\n')
pprint(s2.records.serialize())

b'<root_phrase><specifier>maximum heat - sink temperature</specifier><LRB>(</LRB><NN>Tmax</NN><RRB>)</RRB><IN>of</IN><raw_value>111</raw_value><raw_units>K.</raw_units><DT>The</DT><NN>threshold</NN><JJ>current</JJ><NN>density</NN><LRB>(</LRB><NN>Jth</NN><RRB>)</RRB><IN>at</IN><raw_value>6</raw_value><raw_units>K</raw_units></root_phrase>' 

[{'WorkingTemperature': {'raw_units': 'K.',
                         'raw_value': '111',
                         'specifier': 'maximum heat - sink temperature',
                         'units': 'Kelvin^(1.0)',
                         'value': [111.0]}}]
