Skip to content

Commit 2420fe4

Browse files
committed
Train WSJ-only models as well
1 parent 4c90eea commit 2420fe4

File tree

1 file changed

+13
-6
lines changed

1 file changed

+13
-6
lines changed

scripts/srparser/Makefile

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,16 @@
11

2-
WSJ_TRAIN = /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-2199
3-
WSJ_DEV = /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 2200-2219
4-
WSJ_TEST = /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 2300-2399
5-
WSJ_TAGGER = /u/nlp/data/pos-tagger/distrib/wsj-0-18-bidirectional-nodistsim.tagger
2+
ifndef CONSTITUENCY_HOME
3+
WSJ_TRAIN = /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-2199
4+
WSJ_DEV = /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 2200-2219
5+
WSJ_TEST = /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 2300-2399
6+
else
7+
WSJ_TRAIN = $(CONSTITUENCY_HOME)/english/train/wsj-train.mrg
8+
WSJ_DEV = $(CONSTITUENCY_HOME)/english/dev/wsj-dev.mrg
9+
WSJ_TEST = $(CONSTITUENCY_HOME)/english/test/wsj-test.mrg
10+
endif
11+
#WSJ_TAGGER = /u/nlp/data/pos-tagger/distrib/wsj-0-18-bidirectional-nodistsim.tagger
12+
# TODO: technically we want a wsj-only tagger here
13+
WSJ_TAGGER = edu/stanford/nlp/models/pos-tagger/english-left3words-distsim.tagger
614
WSJ_TLPP = edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams
715

816

@@ -45,7 +53,6 @@ else
4553
CHINESE_DEV = $(CONSTITUENCY_HOME)/chinese/ctb7/dev_small.mrg
4654
CHINESE_TEST = $(CONSTITUENCY_HOME)/chinese/ctb7/test.mrg
4755
endif
48-
4956
#CHINESE_TAGGER = /u/nlp/data/pos-tagger/distrib/chinese-nodistsim.tagger
5057
CHINESE_TAGGER = edu/stanford/nlp/models/pos-tagger/chinese-distsim.tagger
5158
CHINESE_TLPP = edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams
@@ -106,7 +113,7 @@ all: wsjSR.ser.gz wsjSR.beam.ser.gz englishSR.ser.gz englishSR.beam.ser.gz frenc
106113
wsjSR.ser.gz:
107114
@echo Training $@
108115
@echo Will test on $(WSJ_TEST)
109-
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(WSJ_TRAIN) -devTreebank $(WSJ_DEV) -serializedPath $@ -trainingThreads 4 -batchSize 24 -preTag -taggerSerializedFile $(WSJ_TAGGER) -trainingIterations 100 -stalledIterationLimit 25 -tlpp $(WSJ_TLPP) > $@.out 2>&1
116+
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(WSJ_TRAIN) -devTreebank $(WSJ_DEV) -serializedPath $@ -trainingThreads 4 -batchSize 24 -preTag -taggerSerializedFile $(WSJ_TAGGER) -trainingIterations 200 -stalledIterationLimit 40 -l1Reg 0.25 -featureFrequencyCutoff 25 -trainingMethod REORDER_ORACLE -tlpp $(WSJ_TLPP) > $@.out 2>&1
110117
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(WSJ_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(WSJ_TAGGER) >> $@.out 2>&1
111118

112119
wsjSR.beam.ser.gz:

0 commit comments

Comments
 (0)