Skip to content

Commit c60c7f6

Browse files
committed
Unify & simplify options
Use REORDER_ORACLE for English, since it seems to be better
1 parent 3d76c34 commit c60c7f6

File tree

1 file changed

+32
-24
lines changed

1 file changed

+32
-24
lines changed

scripts/srparser/Makefile

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -104,92 +104,100 @@ SPANISH_TAGGER= edu/stanford/nlp/models/pos-tagger/spanish-ud.tagger
104104
SPANISH_TLPP = edu.stanford.nlp.parser.lexparser.SpanishTreebankParserParams
105105

106106

107-
TRAIN_BEAM = -trainingMethod BEAM -trainBeamSize 8
108-
CUTOFF = -featureFrequencyCutoff 5
107+
DEFAULT_OPTIONS = -trainingThreads 8 -batchSize 24 -trainingIterations 200 -stalledIterationLimit 40 -featureFrequencyCutoff 5
108+
109+
TRAIN_BEAM = -featureFrequencyCutoff 10 -trainingMethod BEAM -trainBeamSize 8
110+
TRAIN_ORACLE_BEAM = -l1Reg 0.10 -featureFrequencyCutoff 10 -trainingMethod REORDER_BEAM -trainBeamSize 8
111+
112+
TRAIN_ORACLE_10 = -l1Reg 0.10 -featureFrequencyCutoff 10 -trainingMethod REORDER_ORACLE
113+
# This is more suitable for larger datasets, such as the English datasets
114+
TRAIN_ORACLE_25 = -l1Reg 0.25 -featureFrequencyCutoff 25 -trainingMethod REORDER_ORACLE
115+
116+
SHARDS_5 = -retrainShards 5
109117

110118
all: wsjSR.ser.gz wsjSR.beam.ser.gz englishSR.ser.gz englishSR.beam.ser.gz frenchSR.ser.gz frenchSR.beam.ser.gz chineseSR.ser.gz chineseSR.beam.ser.gz germanSR.ser.gz germanSR.beam.ser.gz arabicSR.ser.gz arabicSR.beam.ser.gz spanishSR.ser.gz spanishSR.beam.ser.gz
111119
.PHONY: all
112120

113121
wsjSR.ser.gz:
114122
@echo Training $@
115123
@echo Will test on $(WSJ_TEST)
116-
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(WSJ_TRAIN) -devTreebank $(WSJ_DEV) -serializedPath $@ -trainingThreads 4 -batchSize 24 -preTag -taggerSerializedFile $(WSJ_TAGGER) -trainingIterations 200 -stalledIterationLimit 40 -l1Reg 0.25 -featureFrequencyCutoff 25 -trainingMethod REORDER_ORACLE -tlpp $(WSJ_TLPP) > $@.out 2>&1
117-
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(WSJ_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(WSJ_TAGGER) >> $@.out 2>&1
124+
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(WSJ_TRAIN) -devTreebank $(WSJ_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(WSJ_TAGGER) -tlpp $(WSJ_TLPP) $(TRAIN_ORACLE_25) > $@.out 2>&1
125+
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(WSJ_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(WSJ_TAGGER) >> $@.out 2>&1
118126

119127
wsjSR.beam.ser.gz:
120128
@echo Training $@
121129
@echo Will test on $(WSJ_TEST)
122-
java -mx20g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(WSJ_TRAIN) -devTreebank $(WSJ_DEV) -serializedPath $@ -trainingThreads 4 -batchSize 24 -preTag -taggerSerializedFile $(WSJ_TAGGER) -trainingIterations 100 -stalledIterationLimit 25 -tlpp $(WSJ_TLPP) $(TRAIN_BEAM) $(CUTOFF) > $@.out 2>&1
123-
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(WSJ_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(WSJ_TAGGER) >> $@.out 2>&1
130+
java -mx40g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(WSJ_TRAIN) -devTreebank $(WSJ_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(WSJ_TAGGER) -tlpp $(WSJ_TLPP) $(TRAIN_BEAM) > $@.out 2>&1
131+
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(WSJ_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(WSJ_TAGGER) >> $@.out 2>&1
124132

125133
englishSR.ser.gz:
126134
@echo Training $@
127135
@echo Will test on $(ENGLISH_TEST)
128-
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(ENGLISH_TRAIN) -devTreebank $(ENGLISH_DEV) -serializedPath $@ -trainingThreads 4 -batchSize 24 -preTag -taggerSerializedFile $(ENGLISH_TAGGER) -trainingIterations 200 -stalledIterationLimit 40 -l1Reg 0.25 -featureFrequencyCutoff 25 -trainingMethod REORDER_ORACLE -tlpp $(ENGLISH_TLPP) > $@.out 2>&1
136+
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(ENGLISH_TRAIN) -devTreebank $(ENGLISH_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(ENGLISH_TAGGER) -tlpp $(ENGLISH_TLPP) $(TRAIN_ORACLE_25) > $@.out 2>&1
129137
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(ENGLISH_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(ENGLISH_TAGGER) >> $@.out 2>&1
130138

131139
englishSR.beam.ser.gz:
132140
@echo Training $@
133141
@echo Will test on $(ENGLISH_TEST)
134-
java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(ENGLISH_TRAIN) -devTreebank $(ENGLISH_DEV) -serializedPath $@ -trainingThreads 8 -batchSize 24 -preTag -taggerSerializedFile $(ENGLISH_TAGGER) -trainingIterations 200 -stalledIterationLimit 25 -tlpp $(ENGLISH_TLPP) $(TRAIN_BEAM) $(CUTOFF) > $@.out 2>&1
142+
java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(ENGLISH_TRAIN) -devTreebank $(ENGLISH_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(ENGLISH_TAGGER) -tlpp $(ENGLISH_TLPP) $(TRAIN_BEAM) > $@.out 2>&1
135143
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(ENGLISH_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(ENGLISH_TAGGER) >> $@.out 2>&1
136144

137145
frenchSR.ser.gz:
138146
@echo Training $@
139147
@echo Will test on $(FRENCH_TEST)
140-
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(FRENCH_TRAIN) -devTreebank $(FRENCH_DEV) -serializedPath $@ -trainingThreads 4 -batchSize 24 -preTag -taggerSerializedFile $(FRENCH_TAGGER) -trainingIterations 200 -stalledIterationLimit 25 -tlpp $(FRENCH_TLPP) > $@.out 2>&1
141-
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(FRENCH_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(FRENCH_TAGGER) >> $@.out 2>&1
148+
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(FRENCH_TRAIN) -devTreebank $(FRENCH_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(FRENCH_TAGGER) -tlpp $(FRENCH_TLPP) $(TRAIN_ORACLE_10) > $@.out 2>&1
149+
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(FRENCH_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(FRENCH_TAGGER) >> $@.out 2>&1
142150

143151
frenchSR.beam.ser.gz:
144152
@echo Training $@
145153
@echo Will test on $(FRENCH_TEST)
146-
java -mx40g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(FRENCH_TRAIN) -devTreebank $(FRENCH_DEV) -serializedPath $@ -trainingThreads 8 -batchSize 24 -preTag -taggerSerializedFile $(FRENCH_TAGGER) -trainingIterations 200 -stalledIterationLimit 25 -tlpp $(FRENCH_TLPP) $(TRAIN_BEAM) $(CUTOFF) > $@.out 2>&1
147-
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(FRENCH_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(FRENCH_TAGGER) >> $@.out 2>&1
154+
java -mx40g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(FRENCH_TRAIN) -devTreebank $(FRENCH_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(FRENCH_TAGGER) -tlpp $(FRENCH_TLPP) $(TRAIN_BEAM) > $@.out 2>&1
155+
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(FRENCH_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(FRENCH_TAGGER) >> $@.out 2>&1
148156

149157
chineseSR.ser.gz:
150158
@echo Training $@
151159
@echo Will test on $(CHINESE_TEST)
152-
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(CHINESE_TRAIN) -devTreebank $(CHINESE_DEV) -serializedPath $@ -trainingThreads 4 -batchSize 24 -preTag -taggerSerializedFile $(CHINESE_TAGGER) -trainingIterations 200 -stalledIterationLimit 25 -tlpp $(CHINESE_TLPP) > $@.out 2>&1
160+
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(CHINESE_TRAIN) -devTreebank $(CHINESE_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(CHINESE_TAGGER) -tlpp $(CHINESE_TLPP) $(TRAIN_ORACLE_10) > $@.out 2>&1
153161
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(CHINESE_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(CHINESE_TAGGER) >> $@.out 2>&1
154162

155163
chineseSR.beam.ser.gz:
156164
@echo Training $@
157165
@echo Will test on $(CHINESE_TEST)
158-
java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(CHINESE_TRAIN) -devTreebank $(CHINESE_DEV) -serializedPath $@ -trainingThreads 8 -batchSize 24 -preTag -taggerSerializedFile $(CHINESE_TAGGER) -trainingIterations 200 -stalledIterationLimit 25 -tlpp $(CHINESE_TLPP) $(TRAIN_BEAM) $(CUTOFF) > $@.out 2>&1
166+
java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(CHINESE_TRAIN) -devTreebank $(CHINESE_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(CHINESE_TAGGER) -tlpp $(CHINESE_TLPP) $(TRAIN_BEAM) > $@.out 2>&1
159167
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(CHINESE_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(CHINESE_TAGGER) >> $@.out 2>&1
160168

161169
germanSR.ser.gz:
162170
@echo Training $@
163171
@echo Will test on $(GERMAN_TEST)
164-
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(GERMAN_TRAIN) -devTreebank $(GERMAN_DEV) -serializedPath $@ -trainingThreads 4 -batchSize 24 -preTag -taggerSerializedFile $(GERMAN_TAGGER) -trainingIterations 200 -stalledIterationLimit 25 -tlpp $(GERMAN_TLPP) > $@.out 2>&1
165-
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(GERMAN_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(GERMAN_TAGGER) >> $@.out 2>&1
172+
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(GERMAN_TRAIN) -devTreebank $(GERMAN_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(GERMAN_TAGGER) -tlpp $(GERMAN_TLPP) $(TRAIN_ORACLE_10) > $@.out 2>&1
173+
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(GERMAN_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(GERMAN_TAGGER) >> $@.out 2>&1
166174

167175
germanSR.beam.ser.gz:
168176
@echo Training $@
169177
@echo Will test on $(GERMAN_TEST)
170-
java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(GERMAN_TRAIN) -devTreebank $(GERMAN_DEV) -serializedPath $@ -trainingThreads 8 -batchSize 24 -preTag -taggerSerializedFile $(GERMAN_TAGGER) -trainingIterations 200 -stalledIterationLimit 25 -tlpp $(GERMAN_TLPP) $(TRAIN_BEAM) $(CUTOFF) > $@.out 2>&1
171-
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(GERMAN_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(GERMAN_TAGGER) >> $@.out 2>&1
178+
java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(GERMAN_TRAIN) -devTreebank $(GERMAN_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(GERMAN_TAGGER) -tlpp $(GERMAN_TLPP) $(TRAIN_BEAM) > $@.out 2>&1
179+
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(GERMAN_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(GERMAN_TAGGER) >> $@.out 2>&1
172180

173181
arabicSR.ser.gz:
174182
@echo Training $@
175183
@echo Will test on $(ARABIC_TEST)
176-
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(ARABIC_TRAIN) -devTreebank $(ARABIC_DEV) -serializedPath $@ -trainingThreads 4 -batchSize 24 -preTag -taggerSerializedFile $(ARABIC_TAGGER) -trainingIterations 200 -stalledIterationLimit 25 -tlpp $(ARABIC_TLPP) > $@.out 2>&1
177-
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(ARABIC_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(ARABIC_TAGGER) >> $@.out 2>&1
184+
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(ARABIC_TRAIN) -devTreebank $(ARABIC_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(ARABIC_TAGGER) -tlpp $(ARABIC_TLPP) $(TRAIN_ORACLE_10) > $@.out 2>&1
185+
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(ARABIC_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(ARABIC_TAGGER) >> $@.out 2>&1
178186

179187
arabicSR.beam.ser.gz:
180188
@echo Training $@
181189
@echo Will test on $(ARABIC_TEST)
182-
java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(ARABIC_TRAIN) -devTreebank $(ARABIC_DEV) -serializedPath $@ -trainingThreads 8 -batchSize 24 -preTag -taggerSerializedFile $(ARABIC_TAGGER) -trainingIterations 200 -stalledIterationLimit 25 -tlpp $(ARABIC_TLPP) $(TRAIN_BEAM) $(CUTOFF) > $@.out 2>&1
183-
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(ARABIC_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(ARABIC_TAGGER) >> $@.out 2>&1
190+
java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(ARABIC_TRAIN) -devTreebank $(ARABIC_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(ARABIC_TAGGER) -tlpp $(ARABIC_TLPP) $(TRAIN_BEAM) > $@.out 2>&1
191+
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(ARABIC_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(ARABIC_TAGGER) >> $@.out 2>&1
184192

185193
spanishSR.ser.gz:
186194
@echo Training $@
187195
@echo Will test on $(SPANISH_TEST)
188-
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(SPANISH_TRAIN) -devTreebank $(SPANISH_DEV) -serializedPath $@ -trainingThreads 4 -batchSize 24 -preTag -taggerSerializedFile $(SPANISH_TAGGER) -trainingIterations 200 -stalledIterationLimit 25 -tlpp $(SPANISH_TLPP) > $@.out 2>&1
196+
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(SPANISH_TRAIN) -devTreebank $(SPANISH_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(SPANISH_TAGGER) -tlpp $(SPANISH_TLPP) $(TRAIN_ORACLE_10) > $@.out 2>&1
189197
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(SPANISH_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(SPANISH_TAGGER) >> $@.out 2>&1
190198

191199
spanishSR.beam.ser.gz:
192200
@echo Training $@
193201
@echo Will test on $(SPANISH_TEST)
194-
java -mx20g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(SPANISH_TRAIN) -devTreebank $(SPANISH_DEV) -serializedPath $@ -trainingThreads 8 -batchSize 24 -preTag -taggerSerializedFile $(SPANISH_TAGGER) -trainingIterations 200 -stalledIterationLimit 25 -tlpp $(SPANISH_TLPP) $(TRAIN_BEAM) $(CUTOFF) > $@.out 2>&1
202+
java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(SPANISH_TRAIN) -devTreebank $(SPANISH_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(SPANISH_TAGGER) -tlpp $(SPANISH_TLPP) $(TRAIN_BEAM) > $@.out 2>&1
195203
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(SPANISH_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(SPANISH_TAGGER) >> $@.out 2>&1

0 commit comments

Comments
 (0)