# Automated parsing

Automated parsers in ChemDataExtractor will extract data from tables and from simple sentences.
First we need to import the needed elements from ChemDataExtractor:

In [1]:
from chemdataextractor.doc import Document
from chemdataextractor.doc.table_new import Table
from chemdataextractor.model.units import TemperatureModel
from chemdataextractor.model.model import Compound, ModelType, StringType
from chemdataextractor.parse.elements import I
from chemdataextractor.parse.actions import join

Then we have to define a model. We are setting the mandatory element `specifier` and a `compound`. 

In [2]:
class GlassTransitionTemperature(TemperatureModel):
    specifier_expr = ((I('Glass') + I('transition') + I('temperature')) | I('Tg')).add_action(join)
    specifier = StringType(parse_expression=specifier_expr, required=True, contextual=True, updatable=True)
    compound = ModelType(Compound, required=True, contextual=True)

Finally, we can parse a paper:

In [3]:
doc = Document.from_file("./data/j.jallcom.2016.03.103.xml")
doc.models = [GlassTransitionTemperature]

for record in doc.records:
    print(record.serialize())

{'GlassTransitionTemperature': {'raw_value': '1000', 'raw_units': '°C', 'value': [1000.0], 'units': 'Celsius^(1.0)', 'specifier': 'glass transition temperature', 'compound': {'Compound': {'names': ['S0925-8388(16)30672-7']}}}}
{'GlassTransitionTemperature': {'raw_value': '485', 'raw_units': 'K', 'value': [485.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tg', 'compound': {'Compound': {'names': ['Ge20Se78Sn2']}}}}
{'GlassTransitionTemperature': {'raw_value': '670', 'raw_units': 'K', 'value': [670.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tc', 'compound': {'Compound': {'names': ['Ge20Se78Sn2']}}}}
{'GlassTransitionTemperature': {'raw_value': '185', 'raw_units': 'K', 'value': [185.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tc', 'compound': {'Compound': {'names': ['Ge20Se78Sn2']}}}}
{'GlassTransitionTemperature': {'raw_value': '493', 'raw_units': 'K', 'value': [493.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tg', 'compound': {'Compound': {'names': ['Ge20Se76Sn4']}}}}
{'GlassTransitionTemperat