Skip to content

Commit

Permalink
fixed "formasaurus run" CLI command
Browse files Browse the repository at this point in the history
  • Loading branch information
kmike committed Dec 18, 2015
1 parent c0d50f9 commit 0334b71
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 10 deletions.
28 changes: 18 additions & 10 deletions formasaurus/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
Usage:
formasaurus train <modelfile> [--data-folder <path>]
formasaurus run <modelfile> <url> [--threshold <probability>]
formasaurus run <url> [modelfile] [--threshold <probability>]
formasaurus check-data [--data-folder <path>]
formasaurus evaluate (forms|fields|all) [--cv <n_folds>] [--data-folder <path>]
formasaurus -h | --help
Expand All @@ -14,7 +14,7 @@
Options:
--data-folder <path> path to the data folder
--cv <n_folds> use <n_folds> for cross-validation [default: 20]
--threshold <probability> don't display predictions with probability below this threshold [default: 0.01]
--threshold <probability> don't display predictions with probability below this threshold [default: 0.05]
To train an extractor for HTML form classification use "train" command.
Expand Down Expand Up @@ -73,13 +73,21 @@ def main():
print("No forms found.")
return

for form, probs in result:
print("-"*40)
for form, info in result:
print("\n")
print("="*60)
print(get_cleaned_form_html(form))
print("")
for tp, prob in Counter(probs).most_common():
tp_full = ex.form_types_inv[tp]
print("%s %0.1f%%" % (tp_full, prob * 100), end=' ')
print("-"*60)
print("Form type: ", end="")
for form_tp, prob in Counter(info['form']).most_common():
print("%s %0.1f%%" % (form_tp, prob * 100), end=' ')

print("\n\nField types:")
for field_name, probs in info['fields'].items():
print(field_name, end=': ')
for field_tp, prob in Counter(probs).most_common():
print("%s %0.1f%%" % (field_tp, prob * 100), end=' ')
print("")

print("")

Expand All @@ -92,13 +100,13 @@ def main():
)

if args['forms'] or args['all']:
print("Evaluating form classifier...")
print("Evaluating form classifier...\n")
formtype_model.print_classification_report(annotations,
n_folds=n_folds)
print("")

if args['fields'] or args['all']:
print("Evaluating form field classifier...")
print("Evaluating form field classifier...\n")
fieldtype_model.print_classification_report(annotations,
n_folds=n_folds)

Expand Down
1 change: 1 addition & 0 deletions formasaurus/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,5 @@ def download(url):
"""
Download a web page from url, return its content as unicode.
"""
url = add_scheme_if_missing(url)
return requests.get(url).text

0 comments on commit 0334b71

Please sign in to comment.