Skip to content

Commit 732dfd0

Browse files
Jonathan BerantStanford NLP
authored andcommitted
merging master into bioprocess
1 parent 1d616ad commit 732dfd0

File tree

659 files changed

+106234
-25170
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

659 files changed

+106234
-25170
lines changed

JavaNLP-core.eml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<?xml version="1.0" encoding="UTF-8"?>
2-
<component LANGUAGE_LEVEL="JDK_1_6" inheritJdk="true">
2+
<component inheritJdk="true">
33
<exclude-output/>
44
<contentEntry url="file://$MODULE_DIR$">
55
<testFolder url="file://$MODULE_DIR$/test/src"/>

build.xml

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
<property name="compile.debug" value="true"/>
1616
<property name="compile.deprecation" value="false"/>
1717
<property name="compile.optimize" value="true"/>
18-
<property name="compile.source" value="1.7" />
19-
<property name="compile.target" value="1.7" />
18+
<property name="compile.source" value="1.8" />
19+
<property name="compile.target" value="1.8" />
2020
<property name="compile.encoding" value="utf-8" />
2121

2222
<target name="classpath" description="Sets the classpath">
@@ -305,7 +305,11 @@
305305
<buildjsp webapp.path="edu/stanford/nlp/parser/webapp"
306306
webapp.war="parser.war"
307307
webapp.jar="javanlp-core.jar">
308-
<webapp.lib/> <!-- don't need anything! -->
308+
<webapp.lib>
309+
<lib dir="/u/nlp/data/StanfordCoreNLPModels">
310+
<include name="stanford-spanish-corenlp-models-current.jar"/>
311+
</lib>
312+
</webapp.lib>
309313
<webapp.data>
310314
<zipfileset prefix="WEB-INF/data"
311315
file="/u/nlp/data/lexparser/englishPCFG.ser.gz"/>
@@ -317,6 +321,8 @@
317321
file="/u/nlp/data/gale/segtool/stanford-seg/classifiers-2010/05202008-ctb6.processed-chris6.lex.gz"/>
318322
<zipfileset prefix="WEB-INF/data/chinesesegmenter"
319323
dir="/u/nlp/data/gale/segtool/stanford-seg/releasedata"/>
324+
<zipfileset prefix="WEB-INF/data"
325+
file="/u/nlp/data/lexparser/spanishPCFG.ser.gz"/>
320326
<zipfileset file="${data.path}/webapps/favicon.ico"/>
321327
</webapp.data>
322328
</buildjsp>
@@ -364,17 +370,15 @@
364370
</lib>
365371
<lib dir="${basedir}/lib">
366372
<include name="commons-lang3-3.1.jar"/>
367-
<include name="xom-1.2.8.jar"/>
368-
<include name="xalan.jar"/>
369-
<include name="serializer.jar"/>
370-
<include name="xercesImpl.jar"/>
373+
<include name="xom-1.2.10.jar"/>
371374
<include name="xml-apis.jar"/>
372375
<include name="joda-time.jar"/>
373376
<include name="jollyday-0.4.7.jar"/>
374377
</lib>
375378
<!-- note for John: c:/Users/John Bauer/nlp/stanford-releases -->
376379
<lib dir="/u/nlp/data/StanfordCoreNLPModels">
377380
<include name="stanford-corenlp-models-current.jar"/>
381+
<include name="stanford-chinese-corenlp-models-current.jar"/>
378382
</lib>
379383
<classes dir="${source.path}/edu/stanford/nlp/pipeline">
380384
<include name="StanfordCoreNLP.properties"/>

data/edu/stanford/nlp/patterns/surface/example.properties

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#### NOTE: for all flags and their description, see the javadoc. Important parameters (in our experience) that you should tune for your dataset are marked with ***
1+
#### NOTE: for all flags and their description, see the javadoc. Important parameters (in our experience) that you should tune for your dataset are marked with ***
22

33
#name for the saved files for the output of the system (useful for comparing results of different experiments with different variables etc
44
identifier=useNERRestriction
@@ -30,7 +30,7 @@ file=${DIR}/presidents.txt
3030
#fileFormat=ser
3131
#file= ${DIR}/presidents_sents.ser
3232

33-
#We are learning names of presidential candidates, places, and other names
33+
#We are learning names of presidential candidates, places, and other names
3434
seedWordsFiles=NAME,${DIR}/names.txt;PLACE,${DIR}/places.txt;OTHER,${DIR}/otherpeople.txt
3535

3636
#You can evaluate two ways; both presented here.
@@ -74,13 +74,13 @@ usePOS4Pattern = true
7474
#Ignore words {a, an, the} while matching the patterns to text (advisable true)
7575
useFillerWordsInPat = false
7676

77-
#***Specific allowed tags' initials for the target phrase for each label while creating the patterns (if not specified, every tag is acceptable to create a pattern). Tag initials can be written as N or NN or J or N,J etc. E.g.: NAME,N,J;PLACE,N. If
77+
#***Specific allowed tags' initials for the target phrase for each label while creating the patterns (if not specified, every tag is acceptable to create a pattern). Tag initials can be written as N or NN or J or N,J etc. E.g.: NAME,N,J;PLACE,N. If
7878
targetAllowedTagsInitialsStr=NAME,N;OTHER,N
7979

8080
#You can save all possible patterns for all tokens in the flag allPatternsFile so you wouldn't need to calculate them everytime.
8181
computeAllPatterns = true
8282

83-
#Save or read (if computeAllPatterns is false) from here
83+
#Save or read (if computeAllPatterns is false) from here
8484
allPatternsFile= ${DIR}/${identifier}_allpatterns.ser
8585

8686
#***maximum Num of allowed words in the target phrase
@@ -127,7 +127,7 @@ usePatternResultAsLabel=true
127127
#remove common stop words from phrases to get clean phrases (for example, "disease" instead of "some disease")
128128
removeStopWordsFromSelectedPhrases = true
129129

130-
#Do not learn phrases that have any stop word
130+
#Do not learn phrases that have any stop word
131131
removePhrasesWithStopWords = false
132132

133133

@@ -182,10 +182,10 @@ useAvgInsteadofMinPhraseScoring=true
182182
#only if wordClassClusterFile is provided
183183
usePhraseEvalWordClass=false
184184

185-
#tf-idf scoring w.r.t to the domain
185+
#tf-idf scoring w.r.t to the domain
186186
usePhraseEvalDomainNgram=false
187187

188-
#use pattern weights in scoring phrases extracted by them, if usePhraseEvalPatWtByFreq is true. otherwise it's just a tfidf like score
188+
#use pattern weights in scoring phrases extracted by them, if usePhraseEvalPatWtByFreq is true. otherwise it's just a tfidf like score
189189
usePatternWeights=true
190190

191191
#basically patwt/log(freq), patwt = 1 if usePatternWeights is false
@@ -231,8 +231,8 @@ debug = 3
231231
#stop words file
232232
stopWordsPatternFiles=${DIR}/stopwords.txt
233233

234-
englishWordsFiles=${stopWordsPatternFiles}
235-
commonWordsPatternFiles= ${stopWordsPatternFiles}
234+
englishWordsFiles=${stopWordsPatternFiles}
235+
commonWordsPatternFiles= ${stopWordsPatternFiles}
236236
#You can give some common words like this
237237
#commonWordsPatternFiles =${DIR}/lists/commonEngWords1k.txt
238238

doc/classify/README.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Stanford Classifier v3.4 - 2014-06-16
1+
Stanford Classifier v3.4.1 - 2014-08-27
22
-------------------------------------------------
33

44
Copyright (c) 2003-2012 The Board of Trustees of
@@ -76,6 +76,8 @@ LICENSE
7676
CHANGES
7777
-------------------------
7878

79+
2014-08-27 3.4.1 Update for compatibility
80+
7981
2014-06-16 3.4 Update for compatibility
8082

8183
2014-01-04 3.3.1 Bugfix release

doc/corenlp/README.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ LICENSE
4242
CHANGES
4343
---------------------------------
4444

45+
2014-08-27 3.4.1 Add Spanish models
46+
4547
2014-06-16 3.4 Add shift reduce parser
4648

4749
2014-01-04 3.3.1 Bugfix release

doc/corenlp/pom-full.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<modelVersion>4.0.0</modelVersion>
33
<groupId>edu.stanford.nlp</groupId>
44
<artifactId>stanford-corenlp</artifactId>
5-
<version>3.4</version>
5+
<version>3.4.1</version>
66
<packaging>jar</packaging>
77
<name>Stanford CoreNLP</name>
88
<description>Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.</description>
@@ -14,8 +14,8 @@
1414
</license>
1515
</licenses>
1616
<scm>
17-
<url>http://nlp.stanford.edu/software/stanford-corenlp-2014-06-16.zip</url>
18-
<connection>http://nlp.stanford.edu/software/stanford-corenlp-2014-06-16.zip</connection>
17+
<url>http://nlp.stanford.edu/software/stanford-corenlp-2014-08-27.zip</url>
18+
<connection>http://nlp.stanford.edu/software/stanford-corenlp-2014-08-27.zip</connection>
1919
</scm>
2020
<developers>
2121
<developer>

doc/lexparser/README.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Stanford Lexicalized Parser v3.4 - 2014-06-16
1+
Stanford Lexicalized Parser v3.4.1 - 2014-08-27
22
-----------------------------------------------
33

44
Copyright (c) 2002-2012 The Board of Trustees of The Leland Stanford Junior
@@ -206,6 +206,8 @@ LICENSE
206206
CHANGES
207207
---------------------------------
208208

209+
2014-08-27 3.4.1 Add Spanish models
210+
209211
2014-06-16 3.4 Shift-reduce parser
210212

211213
2014-01-04 3.3.1 Bugfix release, dependency improvements

doc/lexparser/pom.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<modelVersion>4.0.0</modelVersion>
33
<groupId>edu.stanford.nlp</groupId>
44
<artifactId>stanford-parser</artifactId>
5-
<version>3.4</version>
5+
<version>3.4.1</version>
66
<packaging>jar</packaging>
77
<name>Stanford Parser</name>
88
<description>Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.</description>
@@ -14,8 +14,8 @@
1414
</license>
1515
</licenses>
1616
<scm>
17-
<url>http://nlp.stanford.edu/software/stanford-parser-2014-06-16.zip</url>
18-
<connection>http://nlp.stanford.edu/software/stanford-parser-2014-06-16.zip</connection>
17+
<url>http://nlp.stanford.edu/software/stanford-parser-2014-08-27.zip</url>
18+
<connection>http://nlp.stanford.edu/software/stanford-parser-2014-08-27.zip</connection>
1919
</scm>
2020
<developers>
2121
<developer>

doc/ner/README.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Stanford NER - v3.4 - 2014-06-16
1+
Stanford NER - v3.4.1 - 2014-08-27
22
----------------------------------------------
33

44
This package provides a high-performance machine learning based named
@@ -165,6 +165,8 @@ PERSON ORGANIZATION LOCATION
165165
CHANGES
166166
--------------------
167167

168+
2014-08-27 3.4.1 Add Spanish models
169+
168170
2014-06-16 3.4 Fix serialization bug
169171

170172
2014-01-04 3.3.1 Bugfix release

0 commit comments

Comments
 (0)