Skip to content

Commit

Permalink
Renaming OmniParser to just Parser
Browse files Browse the repository at this point in the history
  • Loading branch information
lukehsiao committed Jul 18, 2018
1 parent 0b2c1bf commit 9a0b531
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 28 deletions.
4 changes: 2 additions & 2 deletions fonduer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from fonduer.candidates.models import candidate_subclass
from fonduer.learning import GenerativeModel, SparseLogisticRegression
from fonduer.meta import Meta
from fonduer.parser import OmniParser
from fonduer.parser import Parser
from fonduer.parser.models import Document, Figure, Sentence, Table
from fonduer.parser.preprocessors import HTMLDocPreprocessor
from fonduer.supervision.annotations import load_gold_labels
Expand Down Expand Up @@ -52,9 +52,9 @@
"NumberMatcher",
"OmniFigures",
"OmniNgrams",
"OmniParser",
"OrganizationMatcher",
"PDFPreprocessor",
"Parser",
"PersonMatcher",
"RegexMatchEach",
"RegexMatchSpan",
Expand Down
4 changes: 2 additions & 2 deletions fonduer/parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from fonduer.parser.parser import OmniParser
from fonduer.parser.parser import Parser

__all__ = ["OmniParser"]
__all__ = ["Parser"]
16 changes: 8 additions & 8 deletions fonduer/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
logger = logging.getLogger(__name__)


class OmniParser(UDFRunner):
class Parser(UDFRunner):
def __init__(
self,
structural=True, # structural information
Expand All @@ -43,8 +43,8 @@ def __init__(
# Use spaCy as our lingual parser
self.lingual_parser = Spacy()

super(OmniParser, self).__init__(
OmniParserUDF,
super(Parser, self).__init__(
ParserUDF,
structural=structural,
blacklist=blacklist,
flatten=flatten,
Expand All @@ -65,7 +65,7 @@ def clear(self, session, **kwargs):
session.query(Candidate).delete()


class OmniParserUDF(UDF):
class ParserUDF(UDF):
def __init__(
self,
structural,
Expand All @@ -89,7 +89,7 @@ def __init__(
a regex and _replace_ is a character string. All occurents of _pattern_ in the
text will be replaced by _replace_.
"""
super(OmniParserUDF, self).__init__(**kwargs)
super(ParserUDF, self).__init__(**kwargs)

# structural (html) setup
self.structural = structural
Expand Down Expand Up @@ -125,7 +125,7 @@ def apply(self, x, **kwargs):
if self.visual:
if not self.pdf_path:
logger.error("Visual parsing failed: pdf_path is required")
for _ in self.parse_structure(document, text):
for _ in self.parse(document, text):
pass
# Add visual attributes
filename = self.pdf_path + document.name
Expand All @@ -141,7 +141,7 @@ def apply(self, x, **kwargs):
document.name, document.sentences, self.pdf_path
)
else:
yield from self.parse_structure(document, text)
yield from self.parse(document, text)

def _flatten(self, node):
"""Construct a string containing the child's text/tail appended to the node.
Expand Down Expand Up @@ -433,7 +433,7 @@ def _parse_node(self, node, state):
# Now, process the Sentence
yield from self._parse_sentence(node, state)

def parse_structure(self, document, text):
def parse(self, document, text):
"""Depth-first search over the provided tree.
Implemented as an iterative procedure. The structure of the state
Expand Down
4 changes: 2 additions & 2 deletions tests/e2e/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
Intersect,
LambdaFunctionMatcher,
Meta,
OmniParser,
Parser,
RegexMatchSpan,
Sentence,
SparseLogisticRegression,
Expand Down Expand Up @@ -57,7 +57,7 @@ def test_e2e(caplog):

doc_preprocessor = HTMLDocPreprocessor(docs_path, max_docs=max_docs)

corpus_parser = OmniParser(
corpus_parser = Parser(
structural=True, lingual=True, visual=True, pdf_path=pdf_path
)
corpus_parser.apply(doc_preprocessor, parallelism=PARALLEL)
Expand Down
28 changes: 14 additions & 14 deletions tests/parser/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
import os

from fonduer import Meta
from fonduer.parser import OmniParser
from fonduer.parser import Parser
from fonduer.parser.models import Document, Sentence
from fonduer.parser.parser import OmniParserUDF
from fonduer.parser.parser import ParserUDF
from fonduer.parser.preprocessors import HTMLDocPreprocessor
from fonduer.parser.spacy_parser import Spacy

Expand All @@ -22,7 +22,7 @@ def test_parse_md_details(caplog):
"""Unit test of the final results stored in the database of the md document.
This test only looks at the final results such that the implementation of
the OmniParserUDF's apply() can be modified.
the ParserUDF's apply() can be modified.
"""
caplog.set_level(logging.INFO)
logger = logging.getLogger(__name__)
Expand All @@ -36,8 +36,8 @@ def test_parse_md_details(caplog):
# Preprocessor for the Docs
preprocessor = HTMLDocPreprocessor(docs_path, max_docs=max_docs)

# Create an OmniParser and parse the md document
omni = OmniParser(
# Create an Parser and parse the md document
omni = Parser(
structural=True, tabular=True, lingual=True, visual=True, pdf_path=pdf_path
)
omni.apply(preprocessor, parallelism=PARALLEL)
Expand Down Expand Up @@ -104,7 +104,7 @@ def test_parse_md_details(caplog):


def test_simple_tokenizer(caplog):
"""Unit test of OmniParser on a single document with lingual features off."""
"""Unit test of Parser on a single document with lingual features off."""
caplog.set_level(logging.INFO)
logger = logging.getLogger(__name__)
session = Meta.init("postgres://localhost:5432/" + ATTRIBUTE).Session()
Expand All @@ -122,7 +122,7 @@ def test_simple_tokenizer(caplog):
# Preprocessor for the Docs
preprocessor = HTMLDocPreprocessor(docs_path, max_docs=max_docs)

omni = OmniParser(structural=True, lingual=False, visual=True, pdf_path=pdf_path)
omni = Parser(structural=True, lingual=False, visual=True, pdf_path=pdf_path)
omni.apply(preprocessor, parallelism=PARALLEL)

doc = session.query(Document).order_by(Document.name).all()[1]
Expand All @@ -148,7 +148,7 @@ def test_simple_tokenizer(caplog):


def test_parse_document_diseases(caplog):
"""Unit test of OmniParser on a single document.
"""Unit test of Parser on a single document.
This tests both the structural and visual parse of the document.
"""
Expand All @@ -169,8 +169,8 @@ def test_parse_document_diseases(caplog):
# Preprocessor for the Docs
preprocessor = HTMLDocPreprocessor(docs_path, max_docs=max_docs)

# Create an OmniParser and parse the diseases document
omni = OmniParser(structural=True, lingual=True, visual=True, pdf_path=pdf_path)
# Create an Parser and parse the diseases document
omni = Parser(structural=True, lingual=True, visual=True, pdf_path=pdf_path)
omni.apply(preprocessor, parallelism=PARALLEL)

# Grab the diseases document
Expand Down Expand Up @@ -241,7 +241,7 @@ def test_spacy_integration(caplog):
max_docs = 2
doc_preprocessor = HTMLDocPreprocessor(docs_path, max_docs=max_docs)

corpus_parser = OmniParser(
corpus_parser = Parser(
structural=True, lingual=True, visual=False, pdf_path=pdf_path
)
corpus_parser.apply(doc_preprocessor, parallelism=PARALLEL)
Expand Down Expand Up @@ -276,14 +276,14 @@ def test_parse_style(caplog):
# Preprocessor for the Docs
preprocessor = HTMLDocPreprocessor(docs_path, max_docs=max_docs)

# Create an OmniParser and parse the md document
omni = OmniParser(structural=True, lingual=True, visual=True, pdf_path=pdf_path)
# Create an Parser and parse the md document
omni = Parser(structural=True, lingual=True, visual=True, pdf_path=pdf_path)
omni.apply(preprocessor, parallelism=PARALLEL)

# Grab the document
doc = session.query(Document).order_by(Document.name).all()[0]

# Grab the sentences parsed by the OmniParser
# Grab the sentences parsed by the Parser
sentences = list(session.query(Sentence).order_by(Sentence.sentence_num).all())

logger.warning("Doc: {}".format(doc))
Expand Down

0 comments on commit 9a0b531

Please sign in to comment.