stanfordnlp
diff --git a/‎README.md
Lines changed: 21 additions & 7 deletions b/‎README.md
Lines changed: 21 additions & 7 deletions
diff --git a/‎build.xml
Lines changed: 1 addition & 1 deletion b/‎build.xml
Lines changed: 1 addition & 1 deletion
diff --git a/‎data/edu/stanford/nlp/upos/ENUniversalPOS.tsurgeon
Lines changed: 5 additions & 0 deletions b/‎data/edu/stanford/nlp/upos/ENUniversalPOS.tsurgeon
Lines changed: 5 additions & 0 deletions
diff --git a/‎doc/corenlp/README.txt
Lines changed: 3 additions & 0 deletions b/‎doc/corenlp/README.txt
Lines changed: 3 additions & 0 deletions
diff --git a/‎doc/corenlp/pom-full.xml
Lines changed: 4 additions & 4 deletions b/‎doc/corenlp/pom-full.xml
Lines changed: 4 additions & 4 deletions
diff --git a/‎doc/tagger/README-Models.txt
Lines changed: 1 addition & 5 deletions b/‎doc/tagger/README-Models.txt
Lines changed: 1 addition & 5 deletions
diff --git a/‎itest/src/edu/stanford/nlp/coref/hybrid/ChineseCorefBenchmarkSlowITest.java
Lines changed: 14 additions & 14 deletions b/‎itest/src/edu/stanford/nlp/coref/hybrid/ChineseCorefBenchmarkSlowITest.java
Lines changed: 14 additions & 14 deletions
diff --git a/‎itest/src/edu/stanford/nlp/ie/crf/TestThreadedCRFClassifier.java
Lines changed: 22 additions & 20 deletions b/‎itest/src/edu/stanford/nlp/ie/crf/TestThreadedCRFClassifier.java
Lines changed: 22 additions & 20 deletions
diff --git a/‎itest/src/edu/stanford/nlp/ie/crf/ThreadedCRFClassifierITest.java
Lines changed: 21 additions & 17 deletions b/‎itest/src/edu/stanford/nlp/ie/crf/ThreadedCRFClassifierITest.java
Lines changed: 21 additions & 17 deletions
@@ -5,25 +5,39 @@ Stanford CoreNLP provides a set of natural language analysis tools written in Ja
 
 The Stanford CoreNLP code is written in Java and licensed under the GNU General Public License (v3 or later). Note that this is the full GPL, which allows many free uses, but not its use in proprietary software that you distribute to others.
 
-#### How To Compile (with ant)
+#### Build Instructions
 
-1. cd CoreNLP ; ant
+Several times a year we distribute a new version of the software, which corresponds to a stable commit.
 
-#### How To Create A Jar 
+During the time between releases, one can always use the latest, under development version of our code.
 
-1. compile the code
-2. cd CoreNLP/classes ; jar -cf ../stanford-corenlp.jar edu
+Here are some helfpul instructions to use the latest code:
+
+1. Make sure you have ant installed.
+2. Compile the code with this command: `cd CoreNLP ; ant`
+3. Then run this command to build a jar with the latest version of the code: `cd CoreNLP/classes ; jar -cf ../stanford-corenlp.jar edu`
+4. This will create a new jar called stanford-corenlp.jar in the CoreNLP folder which contains the latest code
+5. The dependencies that work with the latest code are in CoreNLP/lib and CoreNLP/liblocal, so make sure to include those in your CLASSPATH.
+6. Also make sure to download the latest versions of the [corenlp-models](http://nlp.stanford.edu/software/stanford-corenlp-models-current.jar), 
+and [english-models](http://nlp.stanford.edu/software/stanford-english-corenlp-models-current.jar), and include them in your CLASSPATH.  If you
+are processing languages other than English, make sure to download the latest version of the models jar for the language you are interested in.
 
 You can find releases of Stanford CoreNLP on [Maven Central](http://search.maven.org/#browse%7C11864822).
 
 You can find more explanation and documentation on [the Stanford CoreNLP homepage](http://nlp.stanford.edu/software/corenlp.shtml#Demo).
 
 The most recent models associated with the code in the HEAD of this repository can be found [here](http://nlp.stanford.edu/software/stanford-corenlp-models-current.jar).
 
-Some of the larger (English) models -- like the shift-reduce parser and WikiDict -- are not distributed with our default models jar. 
+Some of the larger (English) models -- like the shift-reduce parser and WikiDict -- are not distributed with our default models jar.
 The most recent version of these models can be found [here](http://nlp.stanford.edu/software/stanford-english-corenlp-models-current.jar).
 
+We distribute resources for other languages as well, including [Arabic models](http://nlp.stanford.edu/software/stanford-arabic-corenlp-models-current.jar),
+[Chinese models](http://nlp.stanford.edu/software/stanford-chinese-corenlp-models-current.jar),
+[French models](http://nlp.stanford.edu/software/stanford-french-corenlp-models-current.jar),
+[German models](http://nlp.stanford.edu/software/stanford-german-corenlp-models-current.jar),
+and [Spanish models](http://nlp.stanford.edu/software/stanford-spanish-corenlp-models-current.jar).
+
 For information about making contributions to Stanford CoreNLP, see the file [CONTRIBUTING.md](CONTRIBUTING.md).
 
-Questions about CoreNLP can either be posted on StackOverflow with the tag [stanford-nlp](http://stackoverflow.com/questions/tagged/stanford-nlp), 
+Questions about CoreNLP can either be posted on StackOverflow with the tag [stanford-nlp](http://stackoverflow.com/questions/tagged/stanford-nlp),
   or on the [mailing lists](http://nlp.stanford.edu/software/corenlp.shtml#Mail).
@@ -160,7 +160,7 @@
   <target name="itest" depends="classpath,compile"
           description="Run core integration tests">
     <echo message="${ant.project.name}" />
-    <junit fork="yes" maxmemory="8g" printsummary="off" outputtoformatters="false" forkmode="perTest" haltonfailure="true">
+    <junit fork="yes" maxmemory="10g" printsummary="off" outputtoformatters="false" forkmode="perTest" haltonfailure="true">
       <classpath refid="classpath"/>
       <classpath path="${build.path}"/>
       <classpath path="${data.path}"/>
 
@@ -98,6 +98,11 @@ NN=target <... {/\\%/}
 
 relabel target SYM
 
+% fused det-noun pronouns -> PRON
+NN=target < (/^(?i:(somebody|something|someone|anybody|anything|anyone|everybody|everything|everyone|nobody|nothing))$/)
+
+relabel target PRON
+
 % NN -> NOUN (otherwise)
 NN=target <... {/.*/}
 
 
@@ -42,6 +42,9 @@ LICENSE
 CHANGES
 ---------------------------------
 
+2016-10-30    3.7.0     KBP Annotator, improved coreference, Arabic 
+                        pipeline 
+
 2015-12-09    3.6.0     Improved coreference, OpenIE integration, 
                         Stanford CoreNLP server 
 
 
@@ -2,7 +2,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>edu.stanford.nlp</groupId>
   <artifactId>stanford-corenlp</artifactId>
-  <version>3.6.0</version>
+  <version>3.7.0</version>
   <packaging>jar</packaging>
   <name>Stanford CoreNLP</name>
   <description>Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.</description>
@@ -14,8 +14,8 @@
     </license>
   </licenses>
   <scm>
-    <url>http://nlp.stanford.edu/software/stanford-corenlp-2015-12-06.zip</url>
-    <connection>http://nlp.stanford.edu/software/stanford-corenlp-2015-12-06.zip</connection>
+    <url>http://nlp.stanford.edu/software/stanford-corenlp-2016-10-30.zip</url>
+    <connection>http://nlp.stanford.edu/software/stanford-corenlp-2016-10-30.zip</connection>
   </scm>
   <developers>
     <developer>
@@ -88,7 +88,7 @@
             <configuration>
               <artifacts>
                 <artifact>
-                  <file>${project.basedir}/stanford-corenlp-3.6.0-models.jar</file>
+                  <file>${project.basedir}/stanford-corenlp-3.7.0-models.jar</file>
                   <type>jar</type>
                   <classifier>models</classifier>
                 </artifact>
 
@@ -105,15 +105,11 @@ University of Stuttgart and the Seminar für Sprachwissenschaft of the
 University of Tübingen. See: 
 http://www.ims.uni-stuttgart.de/projekte/CQPDemos/Bundestag/help-tagset.html
 This model uses features from the distributional similarity clusters
-built over the HGC.
+built over the HGC (Huge German Corpus).
 Performance:
 96.90% on the first half of the remaining 20% of the Negra corpus (dev set)
 (90.33% on unknown words)
 
-german-dewac.tagger
-This model uses features from the distributional similarity clusters
-built from the deWac web corpus.
-
 german-fast.tagger
 Lacks distributional similarity features, but is several times faster
 than the other alternatives.
 
@@ -47,9 +47,9 @@ private static String runCorefTest(boolean deleteOnExit) throws Exception {
     String currentDir = System.getProperty("user.dir");
     System.err.println("Current dir using System:" +currentDir);
 
-    String[] corefArgs = { "-props", "edu/stanford/nlp/coref/hybrid/properties/zh-conll.properties",
-            '-' + CorefProperties.LOG_PROP, baseLogFile,
-            '-' + CorefProperties.PATH_OUTPUT_PROP, WORK_DIR_FILE.toString()+File.separator };
+    String[] corefArgs = { "-props", "edu/stanford/nlp/coref/hybrid/properties/zh-dcoref-conll.properties",
+            '-' + HybridCorefProperties.LOG_PROP, baseLogFile,
+            '-' + CorefProperties.OUTPUT_PATH_PROP, WORK_DIR_FILE.toString()+File.separator };
 
     Properties props = StringUtils.argsToProperties(corefArgs);
     System.err.println("Running coref with arguments:");
@@ -107,24 +107,24 @@ public void testChineseDcoref() throws Exception {
     Counter<String> highResults = new ClassicCounter<String>();
     Counter<String> expectedResults = new ClassicCounter<String>();
 
-    setLowHighExpected(lowResults, highResults, expectedResults, MENTION_TP, 12550, 12700, 12600); // In 2015 was: 12370
+    setLowHighExpected(lowResults, highResults, expectedResults, MENTION_TP, 12550, 12700, 12596); // In 2015 was: 12370
     setLowHighExpected(lowResults, highResults, expectedResults, MENTION_F1, 55.7, 56.0, 55.88); // In 2015 was: 55.59
 
-    setLowHighExpected(lowResults, highResults, expectedResults, MUC_TP, 6050, 6100, 6063);  // In 2015 was: 5958
-    setLowHighExpected(lowResults, highResults, expectedResults, MUC_F1, 58.30, 58.80, 58.48); // In 2015 was: 57.87
+    setLowHighExpected(lowResults, highResults, expectedResults, MUC_TP, 6050, 6100, 6065);  // In 2015 was: 5958
+    setLowHighExpected(lowResults, highResults, expectedResults, MUC_F1, 58.30, 58.80, 58.52); // In 2015 was: 57.87
 
-    setLowHighExpected(lowResults, highResults, expectedResults, BCUBED_TP, 6990, 7110.00, 7100.92); // In 2015 was: 6936.32
-    setLowHighExpected(lowResults, highResults, expectedResults, BCUBED_F1, 51.60, 52.00, 51.86); // In 2015 was: 51.07
+    setLowHighExpected(lowResults, highResults, expectedResults, BCUBED_TP, 6990, 7110.00, 7026.39); // In 2015 was: 6936.32
+    setLowHighExpected(lowResults, highResults, expectedResults, BCUBED_F1, 51.60, 52.20, 52.11); // In 2015 was: 51.07
 
-    setLowHighExpected(lowResults, highResults, expectedResults, CEAFM_TP, 8220, 8260, 8242); // In 2015 was: 8074
-    setLowHighExpected(lowResults, highResults, expectedResults, CEAFM_F1, 55.50, 56.00, 55.77); // In 2015 was: 55.10
+    setLowHighExpected(lowResults, highResults, expectedResults, CEAFM_TP, 8220, 8260, 8224); // In 2015 was: 8074
+    setLowHighExpected(lowResults, highResults, expectedResults, CEAFM_F1, 55.40, 56.00, 55.43); // In 2015 was: 55.10
 
-    setLowHighExpected(lowResults, highResults, expectedResults, CEAFE_TP, 2250.00, 2300.00, 2272.52); // In 2015 was: 2205.72
-    setLowHighExpected(lowResults, highResults, expectedResults, CEAFE_F1, 51.50, 52.00, 51.52); // In 2015 was: 50.62
+    setLowHighExpected(lowResults, highResults, expectedResults, CEAFE_TP, 2250.00, 2300.00, 2296.06); // In 2015 was: 2205.72
+    setLowHighExpected(lowResults, highResults, expectedResults, CEAFE_F1, 51.30, 52.00, 51.33); // In 2015 was: 50.62
 
-    setLowHighExpected(lowResults, highResults, expectedResults, BLANC_F1, 46.75, 47.25, 47.00); // In 2015 was: 46.19
+    setLowHighExpected(lowResults, highResults, expectedResults, BLANC_F1, 46.00, 47.25, 46.68); // In 2015 was: 46.19
 
-    setLowHighExpected(lowResults, highResults, expectedResults, CONLL_SCORE, 53.75, 54.00, 53.95); // In 2015 was: 53.19
+    setLowHighExpected(lowResults, highResults, expectedResults, CONLL_SCORE, 53.75, 54.10, 54.01); // In 2015 was: 53.19
 
     BenchmarkingHelper.benchmarkResults(results, lowResults, highResults, expectedResults);
   }
 
@@ -12,6 +12,7 @@
 import edu.stanford.nlp.util.Timing;
 
 public class TestThreadedCRFClassifier {
+
   TestThreadedCRFClassifier(Properties props) {
     inputEncoding = props.getProperty("inputEncoding", "UTF-8");
   }
@@ -23,8 +24,8 @@ public class TestThreadedCRFClassifier {
 
   private final String inputEncoding;
 
-  CRFClassifier loadClassifier(String loadPath, Properties props) {
-    CRFClassifier crf = new CRFClassifier(props);    
+  static CRFClassifier loadClassifier(String loadPath, Properties props) {
+    CRFClassifier crf = new CRFClassifier(props);
     crf.loadClassifierNoExceptions(loadPath, props);
     return crf;
   }
@@ -58,9 +59,9 @@ public void run() {
       Timing t = new Timing();
       resultsString = runClassifier(crf, filename);
       long millis = t.stop();
-      System.out.println("Thread " + threadName + " took " + millis + 
+      System.out.println("Thread " + threadName + " took " + millis +
                          "ms to tag file " + filename);
-    }        
+    }
   }
 
   /**
@@ -71,7 +72,7 @@ public void run() {
    * -crf2 ../stanford-releases/stanford-ner-models/dewac_175m_600.ser.gz
    * -testFile ../data/german-ner/deu.testa -inputEncoding iso-8859-1
    */
-  static public void main(String[] args) {
+  public static void main(String[] args) {
     try {
       System.setOut(new PrintStream(System.out, true, "UTF-8"));
       System.setErr(new PrintStream(System.err, true, "UTF-8"));
@@ -81,10 +82,10 @@ static public void main(String[] args) {
 
     runTest(StringUtils.argsToProperties(args));
   }
-  
+
   static public void runTest(Properties props) {
     TestThreadedCRFClassifier test = new TestThreadedCRFClassifier(props);
-    test.runThreadedTest(props);    
+    test.runThreadedTest(props);
   }
 
 
@@ -95,7 +96,7 @@ void runThreadedTest(Properties props) {
     ArrayList<String> modelNames = new ArrayList<String>();
     ArrayList<CRFClassifier> classifiers = new ArrayList<CRFClassifier>();
 
-    for (int i = 1; 
+    for (int i = 1;
          props.getProperty("crf" + Integer.toString(i)) != null; ++i) {
       String model = props.getProperty("crf" + Integer.toString(i));
       CRFClassifier crf = loadClassifier(model, props);
@@ -107,7 +108,7 @@ void runThreadedTest(Properties props) {
       // must run twice to account for "transductive learning"
       results = runClassifier(crf, testFile);
       baseResults.add(results);
-      System.out.println("Stored base results for " + model + 
+      System.out.println("Stored base results for " + model +
                          "; length " + results.length());
     }
 
@@ -121,13 +122,13 @@ void runThreadedTest(Properties props) {
       String repeated = runClassifier(crf, testFile);
       if (!base.equals(repeated)) {
         throw new RuntimeException("Repeated unthreaded results " +
-                                   "not the same for " + model + 
+                                   "not the same for " + model +
                                    " run on file " + testFile);
       }
     }
 
     // test the first classifier in several simultaneous threads
-    int numThreads = PropertiesUtils.getInt(props, "simThreads", 
+    int numThreads = PropertiesUtils.getInt(props, "simThreads",
                                             DEFAULT_SIM_THREADS);
 
     ArrayList<CRFThread> threads = new ArrayList<CRFThread>();
@@ -148,11 +149,11 @@ void runThreadedTest(Properties props) {
         System.out.println("Yay!");
       } else {
         throw new RuntimeException("Results not equal when running " +
-                                   modelNames.get(0) + " under " + 
+                                   modelNames.get(0) + " under " +
                                    numThreads + " simultaneous threads");
       }
     }
-    
+
     // test multiple classifiers (if given) in multiple threads each
     if (classifiers.size() > 1) {
       numThreads = PropertiesUtils.getInt(props, "multipleThreads",
@@ -162,11 +163,11 @@ void runThreadedTest(Properties props) {
         int classifierNum = i % classifiers.size();
         int repeatNum = i / classifiers.size();
         threads.add(new CRFThread(classifiers.get(classifierNum), testFile,
-                                  ("Simultaneous-" + classifierNum + 
+                                  ("Simultaneous-" + classifierNum +
                                    "-" + repeatNum)));
       }
-      for (int i = 0; i < threads.size(); ++i) {
-        threads.get(i).start();
+      for (CRFThread thread : threads) {
+        thread.start();
       }
       for (int i = 0; i < threads.size(); ++i) {
         int classifierNum = i % classifiers.size();
@@ -182,16 +183,17 @@ void runThreadedTest(Properties props) {
           System.out.println("Yay!");
         } else {
           throw new RuntimeException("Results not equal when running " +
-                                     modelNames.get(classifierNum) + 
-                                     " under " + numThreads + 
+                                     modelNames.get(classifierNum) +
+                                     " under " + numThreads +
                                      " threads with " +
-                                     classifiers.size() + 
+                                     classifiers.size() +
                                      " total classifiers");
         }
-      }      
+      }
     }
 
     // if no exceptions thrown, great success
     System.out.println("Everything worked!");
   }
+
 }
@@ -4,30 +4,33 @@
 
 import java.util.Properties;
 
-/** 
+/**
  * Test that the CRFClassifier works when multiple classifiers are run
  * in multiple threads.
  *
  *  @author John Bauer
  */
 public class ThreadedCRFClassifierITest extends TestCase {
+
   Properties props;
 
-  private String german1 = 
-    "/u/nlp/data/ner/goodClassifiers/german.hgc_175m_600.crf.ser.gz";
-  private String german2 = 
+  private static final String german1 =
+    "edu/stanford/nlp/models/ner/german.conll.hgc_175m_600.crf.ser.gz";
+  /** -- We're no longer supporting this one
+  private String german2 =
     "/u/nlp/data/ner/goodClassifiers/german.dewac_175m_600.crf.ser.gz";
-  private String germanTestFile = "/u/nlp/data/german/ner/deu.testa";
+  */
+  private static final String germanTestFile = "/u/nlp/data/german/ner/2016/deu.utf8.testa";
 
-  private String english1 = 
+  private static final String english1 =
     "/u/nlp/data/ner/goodClassifiers/english.all.3class.nodistsim.crf.ser.gz";
-  private String english2 = 
-    "/u/nlp/data/ner/goodClassifiers/english.all.3class.distsim.crf.ser.gz";
-  private String englishTestFile = "/u/nlp/data/ner/column_data/conll.testa";
+  private static final String english2 =
+    "/u/nlp/data/ner/goodClassifiers/english.conll.4class.distsim.crf.ser.gz";
+  private static final String englishTestFile = "/u/nlp/data/ner/column_data/conll.4class.testa";
+
+  private static final String germanEncoding = "utf-8";
+  private static final String englishEncoding = "utf-8";
 
-  private String germanEncoding = "iso-8859-1";
-  private String englishEncoding = "utf-8";
-  
   @Override
   public void setUp() {
     props = new Properties();
@@ -47,12 +50,13 @@ public void testOneGermanCRF() {
     TestThreadedCRFClassifier.runTest(props);
   }
 
-  public void testTwoGermanCRFs() {
-    props.setProperty("crf1", german1);
-    props.setProperty("crf2", german2);
-    props.setProperty("testFile", germanTestFile);
-    props.setProperty("inputEncoding", germanEncoding);
+  public void testTwoEnglishCRFs() {
+    props.setProperty("crf1", english1);
+    props.setProperty("crf2", english2);
+    props.setProperty("testFile", englishTestFile);
+    props.setProperty("inputEncoding", englishEncoding);
     TestThreadedCRFClassifier.runTest(props);
   }
+
 }