Merge branch 'develop' into 'master'

Develop See merge request tbic/etude!52
MUSC-TBIC · May 2, 2019 · f0ca132 · f0ca132
2 parents b99a2ec + 6049b59
commit f0ca132
Show file tree

Hide file tree

Showing 7 changed files with 599 additions and 31 deletions.
diff --git a/.gitignore b/.gitignore
@@ -19,7 +19,7 @@ __pycache__
 .coverage
 coverage.xml
 junit.xml
-htmlcov/
+cov_*/
 
 # ############################
 # pyinstaller related files

diff --git a/README.rst b/README.rst
@@ -277,6 +277,62 @@ such as the parent class or long description.
 | macro-average by type          | 340.0 | 8.0 | 0.0 | 105.0 |
 +--------------------------------+-------+-----+-----+-------+
 
+Custom Evaluation Print-Outs
+--------------------------------
+
+The majority of you evaluation output customization can be handled by the above command-line arguments.
+However, sometimes you'll need to generate output that exactly matches some very specific formatting requirements.
+For these instances, ETUDE supports custom print functions.
+Currently, those print functions must be hard-coded into `scoring_metrics.py`.
+Our roadmap includes the ability to load and trigger these print functions from a standard folder to make the system much more modular.
+Until that point, you can see an example custom print-out that targets the `2018 n2c2 Track 1 <https://www.aclweb.org/portal/content/2018-n2c2-nlp-shared-task-and-workshop>`_ output format.
+The configurations for this sample are in our sister repository:
+`ETUDE Engine Configs for n2c2 <https://github.com/MUSC-TBIC/etude-engine-configs/tree/master/n2c2>`_
+The original evaluation script for the competition, used as a point of reference, can be found on github:
+`Evaluation scripts for the 2018 N2C2 shared tasks on clinical NLP  <https://github.com/filannim/2018_n2c2_evaluation_scripts>`_
+
+.. code:: bash
+
+   export ETUDE_DIR=etude-engine
+   export ETUDE_CONFIGS_DIR=etude-engine-configs
+   
+   export N2C2_DATA=/tmp/n2c2
+
+   python ${ETUDE_DIR}/etude.py \
+     --reference-input ${N2C2_DATA}/train_annotations \
+      --reference-config ${ETUDE_CONFIGS_DIR}/n2c2/2018_n2c2_track-1.conf \
+      --test-input ${N2C2_DATA}/train_annotations \
+      --test-config ${ETUDE_CONFIGS_DIR}/n2c2/2018_n2c2_track-1.conf \
+      --no-metrics \
+      --print-custom "2018 n2c2 track 1" \
+      --fuzzy-match-flag exact \
+      --file-suffix ".xml" \
+      --empty-value 0.0
+
+
+   ******************************************* TRACK 1 ********************************************
+                         ------------ met -------------    ------ not met -------    -- overall ---
+                         Prec.   Rec.    Speci.  F(b=1)    Prec.   Rec.    F(b=1)    F(b=1)  AUC   
+              Abdominal  1.0000  1.0000  1.0000  1.0000    1.0000  1.0000  1.0000    1.0000  1.0000
+           Advanced-cad  1.0000  1.0000  0.0000  1.0000    0.0000  0.0000  0.0000    0.5000  0.5000
+          Alcohol-abuse  0.0000  0.0000  1.0000  0.0000    1.0000  1.0000  1.0000    0.5000  0.5000
+             Asp-for-mi  1.0000  1.0000  0.0000  1.0000    0.0000  0.0000  0.0000    0.5000  0.5000
+             Creatinine  1.0000  1.0000  1.0000  1.0000    1.0000  1.0000  1.0000    1.0000  1.0000
+          Dietsupp-2mos  1.0000  1.0000  1.0000  1.0000    1.0000  1.0000  1.0000    1.0000  1.0000
+             Drug-abuse  0.0000  0.0000  1.0000  0.0000    1.0000  1.0000  1.0000    0.5000  0.5000
+                English  1.0000  1.0000  0.0000  1.0000    0.0000  0.0000  0.0000    0.5000  0.5000
+                  Hba1c  1.0000  1.0000  1.0000  1.0000    1.0000  1.0000  1.0000    1.0000  1.0000
+               Keto-1yr  0.0000  0.0000  1.0000  0.0000    1.0000  1.0000  1.0000    0.5000  0.5000
+         Major-diabetes  1.0000  1.0000  1.0000  1.0000    1.0000  1.0000  1.0000    1.0000  1.0000
+        Makes-decisions  1.0000  1.0000  0.0000  1.0000    0.0000  0.0000  0.0000    0.5000  0.5000
+                Mi-6mos  1.0000  1.0000  1.0000  1.0000    1.0000  1.0000  1.0000    1.0000  1.0000
+                         ------------------------------    ----------------------    --------------
+        Overall (micro)  1.0000  1.0000  1.0000  1.0000    1.0000  1.0000  1.0000    1.0000  1.0000
+        Overall (macro)  0.7692  0.7692  0.6923  0.7692    0.6923  0.6923  0.6923    0.7308  0.7308
+   
+                                                       10 files found
+
+
 Configuring Annotation Extraction
 =================================
 
@@ -297,6 +353,11 @@ and end attribute are required for a pattern to be scorable.
    End Attr:       (required; end offset attribute name)
    Text Attr:      (optional; not used by anything currently)
 
+
+Additional interesting or useful configuration files can be found in
+our sister repository:
+`ETUDE Engine Configs <https://github.com/MUSC-TBIC/etude-engine-configs>`_
+
 Dependencies
 ============
 
@@ -320,7 +381,8 @@ rather than directly:
    python -m pytest tests/
 
    ## You can also generate a coverate report in html format
-   python -m pytest --cov-report html --cov=./ tests/
-
+   python2.7 -m pytest --cov-report html:cov_html_py2.7 --cov=./ tests/
+   python3.7 -m pytest --cov-report html:cov_html_py3.7 --cov=./ tests/
+   
    ## The junit file is helpful for automated systems or CI pipelines
    python -m pytest --junitxml=junit.xml tests
diff --git a/args_and_configs.py b/args_and_configs.py
@@ -64,7 +64,7 @@ def initialize_arg_parser():
 
     parser.add_argument( '--empty-value' ,
                          dest = 'empty_value' ,
-                         default = '' ,
+                         default = None ,
                          help = "Value to print when metrics are undefined or values are null" )
 
     parser.add_argument( "--fuzzy-match-flags" , nargs = "+" ,
@@ -99,11 +99,11 @@ def initialize_arg_parser():
     parser.add_argument( '--by-type-and-file' , dest = 'by_type_and_file' ,
                          help = "Print metrics by file nested within annotation type" ,
                          action = "store_true" )
-
+    
     parser.add_argument( '--by-attribute' , dest = 'by_attribute' ,
                          help = "Print metrics by annotation attribute" ,
                          action = "store_true" )
-
+    
     parser.add_argument( "--reference-config", 
                          dest = 'reference_config' ,
                          default = 'config/i2b2_2016_track-1.conf' ,
@@ -208,6 +208,14 @@ def initialize_arg_parser():
                          dest = 'print_metrics' ,
                          help = "Suppress the metrics (provided via --metrics-list) scored" ,
                          action = "store_false" )
+
+    ## TODO - make it easy to load / reference these special print functions
+    ##        from separate files
+    parser.add_argument( "--print-custom" , nargs = '+' ,
+                         dest = 'print_custom' ,
+                         default = [ ] ,
+                         choices = [ '2018 n2c2 track 1' ] ,
+                         help = "Use one of any custom output print functions.  Usually, these are created to replicate the output of a different tool." )
 
     parser.add_argument( '--align-tokens' ,
                          dest = 'align_tokens' ,
@@ -356,8 +364,17 @@ def extract_xpath_spanless_patterns( annotations ,
                                   short_name = config.get( sect ,
                                                            'Short Name' ) ,
                                   pivot_attr = config.get( sect ,
-                                                         'Pivot Attr' ) ,
+                                                           'Pivot Attr' ) ,
+                                  parity = config.get( sect ,
+                                                       'Parity' ) ,
                                   optional_attributes = [] )
+            if( pattern_entry[ 'parity' ] not in [ 'First' , 'Last' ,
+                                                   'Unique' , 'Any' ] ):
+                log.warn( '{} {} ( {} , {} )'.format( 
+                    'Unexpected setting for annotation parity.' ,
+                    'This may have unpredictable consequences:' ,
+                    pattern_entry[ 'long_name' ] ,
+                    pattern_entry[ 'parity' ] ) )
             if( config.has_option( sect , 'Opt Attr' ) ):
                 optional_attributes = config.get( sect , 'Opt Attr' )
                 pattern_entry[ 'optional_attributes' ] = \

diff --git a/etude.py b/etude.py
@@ -430,14 +430,20 @@ def score_ref_set( reference_ns , reference_dd , reference_patterns , reference_
     #                                             file_mapping ,
     #                                             reference_patterns , test_patterns ,
     #                                             args = args )
-    scoring_metrics.print_confusion_matrix_shell( confusion_matrix ,
-                                                  file_mapping ,
-                                                  reference_patterns , test_patterns ,
-                                                  args = args )
-    scoring_metrics.print_score_summary_shell( score_card ,
-                                               file_mapping ,
-                                               reference_patterns , test_patterns ,
-                                               args = args )
+    if( args.print_confusion_matrix ):
+        scoring_metrics.print_confusion_matrix_shell( confusion_matrix ,
+                                                      file_mapping ,
+                                                      reference_patterns , test_patterns ,
+                                                      args = args )
+    if( args.print_metrics ):
+        scoring_metrics.print_score_summary_shell( score_card ,
+                                                   file_mapping ,
+                                                   reference_patterns , test_patterns ,
+                                                   args = args )
+    if( '2018 n2c2 track 1' in args.print_custom ):
+        scoring_metrics.print_2018_n2c2_track1( score_card ,
+                                                file_mapping ,
+                                                args = args )
     #########
     log.debug( "-- Leaving '{}'".format( sys._getframe().f_code.co_name ) )
 
@@ -487,6 +493,21 @@ def init_args():
         if( 'F{}'.format( common_beta ) in args.metrics_list ):
             if( common_beta not in args.f_beta_values ):
                 args.f_beta_values.append( common_beta )
+    ## The command line parameters are always initially cast as strings.
+    ## That works fine for some empty values.  Sometimes we want to use
+    ## 0 (int) or 0.0 (float) or -1 as empty values.  In this case,
+    ## it's best to cast the string to the appropriate numerical
+    ## type for formatting later.
+    if( args.empty_value is not None and
+        args.empty_value != '' ):
+        try:
+            args.empty_value = int( args.empty_value )
+        except ValueError:
+            log.debug( 'Default empty_value is not an int' )
+            try:
+                args.empty_value = float( args.empty_value )
+            except ValueError:
+                log.debug( 'Default empty_value is not a float' )
     ## Resolve conflicts between --ignore-whitespace, --heed-whitespace,
     ## and --ignore-regex flags.  Essentially, if we set something in
     ## skip_chars, use that.  Otherwise, if we tripped --ignore_whitespace
@@ -717,7 +738,9 @@ def init_args():
                     log.error( 'Uncaught exception in count_ref_set for system output corpus:  {}'.format( e ) )
 
         ##
-        if( args.print_confusion_matrix or args.print_metrics ):
+        if( args.print_confusion_matrix or
+            args.print_metrics or
+            len( args.print_custom ) > 0 ):
             try:
                 score_ref_set( reference_ns = reference_ns ,
                                reference_dd = reference_dd ,