@@ -104,92 +104,100 @@ SPANISH_TAGGER= edu/stanford/nlp/models/pos-tagger/spanish-ud.tagger
104
104
SPANISH_TLPP = edu.stanford.nlp.parser.lexparser.SpanishTreebankParserParams
105
105
106
106
107
- TRAIN_BEAM = -trainingMethod BEAM -trainBeamSize 8
108
- CUTOFF = -featureFrequencyCutoff 5
107
+ DEFAULT_OPTIONS = -trainingThreads 8 -batchSize 24 -trainingIterations 200 -stalledIterationLimit 40 -featureFrequencyCutoff 5
108
+
109
+ TRAIN_BEAM = -featureFrequencyCutoff 10 -trainingMethod BEAM -trainBeamSize 8
110
+ TRAIN_ORACLE_BEAM = -l1Reg 0.10 -featureFrequencyCutoff 10 -trainingMethod REORDER_BEAM -trainBeamSize 8
111
+
112
+ TRAIN_ORACLE_10 = -l1Reg 0.10 -featureFrequencyCutoff 10 -trainingMethod REORDER_ORACLE
113
+ # This is more suitable for larger datasets, such as the English datasets
114
+ TRAIN_ORACLE_25 = -l1Reg 0.25 -featureFrequencyCutoff 25 -trainingMethod REORDER_ORACLE
115
+
116
+ SHARDS_5 = -retrainShards 5
109
117
110
118
all : wsjSR.ser.gz wsjSR.beam.ser.gz englishSR.ser.gz englishSR.beam.ser.gz frenchSR.ser.gz frenchSR.beam.ser.gz chineseSR.ser.gz chineseSR.beam.ser.gz germanSR.ser.gz germanSR.beam.ser.gz arabicSR.ser.gz arabicSR.beam.ser.gz spanishSR.ser.gz spanishSR.beam.ser.gz
111
119
.PHONY : all
112
120
113
121
wsjSR.ser.gz :
114
122
@echo Training $@
115
123
@echo Will test on $(WSJ_TEST )
116
- java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(WSJ_TRAIN ) -devTreebank $(WSJ_DEV ) -serializedPath $@ -trainingThreads 4 -batchSize 24 - preTag -taggerSerializedFile $(WSJ_TAGGER ) -trainingIterations 200 -stalledIterationLimit 40 -l1Reg 0.25 -featureFrequencyCutoff 25 -trainingMethod REORDER_ORACLE -tlpp $(WSJ_TLPP ) > $@ .out 2>&1
117
- java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(WSJ_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(WSJ_TAGGER ) >> $@ .out 2>&1
124
+ java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(WSJ_TRAIN ) -devTreebank $(WSJ_DEV ) -serializedPath $@ $( DEFAULT_OPTIONS ) - preTag -taggerSerializedFile $(WSJ_TAGGER ) -tlpp $(WSJ_TLPP ) $( TRAIN_ORACLE_25 ) > $@ .out 2>&1
125
+ java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(WSJ_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(WSJ_TAGGER ) >> $@ .out 2>&1
118
126
119
127
wsjSR.beam.ser.gz :
120
128
@echo Training $@
121
129
@echo Will test on $(WSJ_TEST )
122
- java -mx20g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(WSJ_TRAIN ) -devTreebank $(WSJ_DEV ) -serializedPath $@ -trainingThreads 4 -batchSize 24 - preTag -taggerSerializedFile $(WSJ_TAGGER ) -trainingIterations 100 -stalledIterationLimit 25 -tlpp $(WSJ_TLPP ) $(TRAIN_BEAM ) $( CUTOFF ) > $@ .out 2>&1
123
- java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(WSJ_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(WSJ_TAGGER ) >> $@ .out 2>&1
130
+ java -mx40g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(WSJ_TRAIN ) -devTreebank $(WSJ_DEV ) -serializedPath $@ $( DEFAULT_OPTIONS ) - preTag -taggerSerializedFile $(WSJ_TAGGER ) -tlpp $(WSJ_TLPP ) $(TRAIN_BEAM ) > $@ .out 2>&1
131
+ java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(WSJ_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(WSJ_TAGGER ) >> $@ .out 2>&1
124
132
125
133
englishSR.ser.gz :
126
134
@echo Training $@
127
135
@echo Will test on $(ENGLISH_TEST )
128
- java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(ENGLISH_TRAIN ) -devTreebank $(ENGLISH_DEV ) -serializedPath $@ -trainingThreads 4 -batchSize 24 - preTag -taggerSerializedFile $(ENGLISH_TAGGER ) -trainingIterations 200 -stalledIterationLimit 40 -l1Reg 0.25 -featureFrequencyCutoff 25 -trainingMethod REORDER_ORACLE - tlpp $(ENGLISH_TLPP ) > $@ .out 2>&1
136
+ java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(ENGLISH_TRAIN ) -devTreebank $(ENGLISH_DEV ) -serializedPath $@ $( DEFAULT_OPTIONS ) - preTag -taggerSerializedFile $(ENGLISH_TAGGER ) -tlpp $(ENGLISH_TLPP ) $( TRAIN_ORACLE_25 ) > $@ .out 2>&1
129
137
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(ENGLISH_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(ENGLISH_TAGGER ) >> $@ .out 2>&1
130
138
131
139
englishSR.beam.ser.gz :
132
140
@echo Training $@
133
141
@echo Will test on $(ENGLISH_TEST )
134
- java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(ENGLISH_TRAIN ) -devTreebank $(ENGLISH_DEV ) -serializedPath $@ -trainingThreads 8 -batchSize 24 - preTag -taggerSerializedFile $(ENGLISH_TAGGER ) -trainingIterations 200 -stalledIterationLimit 25 - tlpp $(ENGLISH_TLPP ) $(TRAIN_BEAM ) $( CUTOFF ) > $@ .out 2>&1
142
+ java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(ENGLISH_TRAIN ) -devTreebank $(ENGLISH_DEV ) -serializedPath $@ $( DEFAULT_OPTIONS ) - preTag -taggerSerializedFile $(ENGLISH_TAGGER ) -tlpp $(ENGLISH_TLPP ) $(TRAIN_BEAM ) > $@ .out 2>&1
135
143
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(ENGLISH_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(ENGLISH_TAGGER ) >> $@ .out 2>&1
136
144
137
145
frenchSR.ser.gz :
138
146
@echo Training $@
139
147
@echo Will test on $(FRENCH_TEST )
140
- java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(FRENCH_TRAIN ) -devTreebank $(FRENCH_DEV ) -serializedPath $@ -trainingThreads 4 -batchSize 24 - preTag -taggerSerializedFile $(FRENCH_TAGGER ) -trainingIterations 200 -stalledIterationLimit 25 - tlpp $(FRENCH_TLPP ) > $@ .out 2>&1
141
- java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(FRENCH_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(FRENCH_TAGGER ) >> $@ .out 2>&1
148
+ java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(FRENCH_TRAIN ) -devTreebank $(FRENCH_DEV ) -serializedPath $@ $( DEFAULT_OPTIONS ) - preTag -taggerSerializedFile $(FRENCH_TAGGER ) -tlpp $(FRENCH_TLPP ) $( TRAIN_ORACLE_10 ) > $@ .out 2>&1
149
+ java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(FRENCH_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(FRENCH_TAGGER ) >> $@ .out 2>&1
142
150
143
151
frenchSR.beam.ser.gz :
144
152
@echo Training $@
145
153
@echo Will test on $(FRENCH_TEST )
146
- java -mx40g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(FRENCH_TRAIN ) -devTreebank $(FRENCH_DEV ) -serializedPath $@ -trainingThreads 8 -batchSize 24 - preTag -taggerSerializedFile $(FRENCH_TAGGER ) -trainingIterations 200 -stalledIterationLimit 25 - tlpp $(FRENCH_TLPP ) $(TRAIN_BEAM ) $( CUTOFF ) > $@ .out 2>&1
147
- java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(FRENCH_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(FRENCH_TAGGER ) >> $@ .out 2>&1
154
+ java -mx40g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(FRENCH_TRAIN ) -devTreebank $(FRENCH_DEV ) -serializedPath $@ $( DEFAULT_OPTIONS ) - preTag -taggerSerializedFile $(FRENCH_TAGGER ) -tlpp $(FRENCH_TLPP ) $(TRAIN_BEAM ) > $@ .out 2>&1
155
+ java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(FRENCH_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(FRENCH_TAGGER ) >> $@ .out 2>&1
148
156
149
157
chineseSR.ser.gz :
150
158
@echo Training $@
151
159
@echo Will test on $(CHINESE_TEST )
152
- java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(CHINESE_TRAIN ) -devTreebank $(CHINESE_DEV ) -serializedPath $@ -trainingThreads 4 -batchSize 24 - preTag -taggerSerializedFile $(CHINESE_TAGGER ) -trainingIterations 200 -stalledIterationLimit 25 - tlpp $(CHINESE_TLPP ) > $@ .out 2>&1
160
+ java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(CHINESE_TRAIN ) -devTreebank $(CHINESE_DEV ) -serializedPath $@ $( DEFAULT_OPTIONS ) - preTag -taggerSerializedFile $(CHINESE_TAGGER ) -tlpp $(CHINESE_TLPP ) $( TRAIN_ORACLE_10 ) > $@ .out 2>&1
153
161
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(CHINESE_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(CHINESE_TAGGER ) >> $@ .out 2>&1
154
162
155
163
chineseSR.beam.ser.gz :
156
164
@echo Training $@
157
165
@echo Will test on $(CHINESE_TEST )
158
- java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(CHINESE_TRAIN ) -devTreebank $(CHINESE_DEV ) -serializedPath $@ -trainingThreads 8 -batchSize 24 - preTag -taggerSerializedFile $(CHINESE_TAGGER ) -trainingIterations 200 -stalledIterationLimit 25 - tlpp $(CHINESE_TLPP ) $(TRAIN_BEAM ) $( CUTOFF ) > $@ .out 2>&1
166
+ java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(CHINESE_TRAIN ) -devTreebank $(CHINESE_DEV ) -serializedPath $@ $( DEFAULT_OPTIONS ) - preTag -taggerSerializedFile $(CHINESE_TAGGER ) -tlpp $(CHINESE_TLPP ) $(TRAIN_BEAM ) > $@ .out 2>&1
159
167
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(CHINESE_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(CHINESE_TAGGER ) >> $@ .out 2>&1
160
168
161
169
germanSR.ser.gz :
162
170
@echo Training $@
163
171
@echo Will test on $(GERMAN_TEST )
164
- java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(GERMAN_TRAIN ) -devTreebank $(GERMAN_DEV ) -serializedPath $@ -trainingThreads 4 -batchSize 24 - preTag -taggerSerializedFile $(GERMAN_TAGGER ) -trainingIterations 200 -stalledIterationLimit 25 - tlpp $(GERMAN_TLPP ) > $@ .out 2>&1
165
- java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(GERMAN_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(GERMAN_TAGGER ) >> $@ .out 2>&1
172
+ java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(GERMAN_TRAIN ) -devTreebank $(GERMAN_DEV ) -serializedPath $@ $( DEFAULT_OPTIONS ) - preTag -taggerSerializedFile $(GERMAN_TAGGER ) -tlpp $(GERMAN_TLPP ) $( TRAIN_ORACLE_10 ) > $@ .out 2>&1
173
+ java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(GERMAN_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(GERMAN_TAGGER ) >> $@ .out 2>&1
166
174
167
175
germanSR.beam.ser.gz :
168
176
@echo Training $@
169
177
@echo Will test on $(GERMAN_TEST )
170
- java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(GERMAN_TRAIN ) -devTreebank $(GERMAN_DEV ) -serializedPath $@ -trainingThreads 8 -batchSize 24 - preTag -taggerSerializedFile $(GERMAN_TAGGER ) -trainingIterations 200 -stalledIterationLimit 25 - tlpp $(GERMAN_TLPP ) $(TRAIN_BEAM ) $( CUTOFF ) > $@ .out 2>&1
171
- java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(GERMAN_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(GERMAN_TAGGER ) >> $@ .out 2>&1
178
+ java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(GERMAN_TRAIN ) -devTreebank $(GERMAN_DEV ) -serializedPath $@ $( DEFAULT_OPTIONS ) - preTag -taggerSerializedFile $(GERMAN_TAGGER ) -tlpp $(GERMAN_TLPP ) $(TRAIN_BEAM ) > $@ .out 2>&1
179
+ java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(GERMAN_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(GERMAN_TAGGER ) >> $@ .out 2>&1
172
180
173
181
arabicSR.ser.gz :
174
182
@echo Training $@
175
183
@echo Will test on $(ARABIC_TEST )
176
- java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(ARABIC_TRAIN ) -devTreebank $(ARABIC_DEV ) -serializedPath $@ -trainingThreads 4 -batchSize 24 - preTag -taggerSerializedFile $(ARABIC_TAGGER ) -trainingIterations 200 -stalledIterationLimit 25 - tlpp $(ARABIC_TLPP ) > $@ .out 2>&1
177
- java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(ARABIC_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(ARABIC_TAGGER ) >> $@ .out 2>&1
184
+ java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(ARABIC_TRAIN ) -devTreebank $(ARABIC_DEV ) -serializedPath $@ $( DEFAULT_OPTIONS ) - preTag -taggerSerializedFile $(ARABIC_TAGGER ) -tlpp $(ARABIC_TLPP ) $( TRAIN_ORACLE_10 ) > $@ .out 2>&1
185
+ java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(ARABIC_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(ARABIC_TAGGER ) >> $@ .out 2>&1
178
186
179
187
arabicSR.beam.ser.gz :
180
188
@echo Training $@
181
189
@echo Will test on $(ARABIC_TEST )
182
- java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(ARABIC_TRAIN ) -devTreebank $(ARABIC_DEV ) -serializedPath $@ -trainingThreads 8 -batchSize 24 - preTag -taggerSerializedFile $(ARABIC_TAGGER ) -trainingIterations 200 -stalledIterationLimit 25 - tlpp $(ARABIC_TLPP ) $(TRAIN_BEAM ) $( CUTOFF ) > $@ .out 2>&1
183
- java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(ARABIC_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(ARABIC_TAGGER ) >> $@ .out 2>&1
190
+ java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(ARABIC_TRAIN ) -devTreebank $(ARABIC_DEV ) -serializedPath $@ $( DEFAULT_OPTIONS ) - preTag -taggerSerializedFile $(ARABIC_TAGGER ) -tlpp $(ARABIC_TLPP ) $(TRAIN_BEAM ) > $@ .out 2>&1
191
+ java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(ARABIC_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(ARABIC_TAGGER ) >> $@ .out 2>&1
184
192
185
193
spanishSR.ser.gz :
186
194
@echo Training $@
187
195
@echo Will test on $(SPANISH_TEST )
188
- java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(SPANISH_TRAIN ) -devTreebank $(SPANISH_DEV ) -serializedPath $@ -trainingThreads 4 -batchSize 24 - preTag -taggerSerializedFile $(SPANISH_TAGGER ) -trainingIterations 200 -stalledIterationLimit 25 - tlpp $(SPANISH_TLPP ) > $@ .out 2>&1
196
+ java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(SPANISH_TRAIN ) -devTreebank $(SPANISH_DEV ) -serializedPath $@ $( DEFAULT_OPTIONS ) - preTag -taggerSerializedFile $(SPANISH_TAGGER ) -tlpp $(SPANISH_TLPP ) $( TRAIN_ORACLE_10 ) > $@ .out 2>&1
189
197
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(SPANISH_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(SPANISH_TAGGER ) >> $@ .out 2>&1
190
198
191
199
spanishSR.beam.ser.gz :
192
200
@echo Training $@
193
201
@echo Will test on $(SPANISH_TEST )
194
- java -mx20g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(SPANISH_TRAIN ) -devTreebank $(SPANISH_DEV ) -serializedPath $@ -trainingThreads 8 -batchSize 24 - preTag -taggerSerializedFile $(SPANISH_TAGGER ) -trainingIterations 200 -stalledIterationLimit 25 - tlpp $(SPANISH_TLPP ) $(TRAIN_BEAM ) $( CUTOFF ) > $@ .out 2>&1
202
+ java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(SPANISH_TRAIN ) -devTreebank $(SPANISH_DEV ) -serializedPath $@ $( DEFAULT_OPTIONS ) - preTag -taggerSerializedFile $(SPANISH_TAGGER ) -tlpp $(SPANISH_TLPP ) $(TRAIN_BEAM ) > $@ .out 2>&1
195
203
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(SPANISH_TEST ) -serializedPath $@ -preTag -taggerSerializedFile $(SPANISH_TAGGER ) >> $@ .out 2>&1
0 commit comments