Skip to content

Commit 81c1b71

Browse files
committed
Add a block to create a Hungarian constituency parser
1 parent a863b6c commit 81c1b71

File tree

1 file changed

+27
-2
lines changed

1 file changed

+27
-2
lines changed

scripts/srparser/Makefile

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,18 @@ endif
103103
SPANISH_TAGGER= edu/stanford/nlp/models/pos-tagger/spanish-ud.tagger
104104
SPANISH_TLPP = edu.stanford.nlp.parser.lexparser.SpanishTreebankParserParams
105105

106+
ifndef CONSTITUENCY_HOME
107+
HUNGARIAN_TRAIN = /u/nlp/data/constituency-parser/spmrl/SPMRL_SHARED_2014/HUNGARIAN_SPMRL/gold/ptb/train/train.Hungarian.gold.ptb
108+
HUNGARIAN_DEV = /u/nlp/data/constituency-parser/spmrl/SPMRL_SHARED_2014/HUNGARIAN_SPMRL/gold/ptb/dev/dev.Hungarian.gold.ptb
109+
HUNGARIAN_TEST = /u/nlp/data/constituency-parser/spmrl/SPMRL_SHARED_2014/HUNGARIAN_SPMRL/gold/ptb/test/test.Hungarian.gold.ptb
110+
else
111+
HUNGARIAN_TRAIN = $(CONSTITUENCY_HOME)/spmrl/SPMRL_SHARED_2014/HUNGARIAN_SPMRL/gold/ptb/train/train.Hungarian.gold.ptb
112+
HUNGARIAN_DEV = $(CONSTITUENCY_HOME)/spmrl/SPMRL_SHARED_2014/HUNGARIAN_SPMRL/gold/ptb/dev/dev.Hungarian.gold.ptb
113+
HUNGARIAN_TEST = $(CONSTITUENCY_HOME)/spmrl/SPMRL_SHARED_2014/HUNGARIAN_SPMRL/gold/ptb/test/test.Hungarian.gold.ptb
114+
endif
115+
#HUNGARIAN_TAGGER = edu/stanford/nlp/models/pos-tagger/hungarian-ud.tagger
116+
HUNGARIAN_TAGGER = edu/stanford/nlp/models/pos-tagger/hungarian.tagger
117+
HUNGARIAN_TLPP = edu.stanford.nlp.parser.lexparser.HungarianTreebankParserParams
106118

107119
DEFAULT_OPTIONS = -trainingThreads 8 -batchSize 24 -trainingIterations 200 -stalledIterationLimit 40 -featureFrequencyCutoff 5
108120

@@ -118,8 +130,8 @@ AUGMENT_LESS = -augmentSubsentences 0.1
118130

119131
TEST_ARGS = -quietEvaluation -recordTransitionTypes
120132

121-
all-beam: wsjSR.beam.ser.gz englishSR.beam.ser.gz frenchSR.beam.ser.gz chineseSR.beam.ser.gz germanSR.beam.ser.gz arabicSR.beam.ser.gz spanishSR.beam.ser.gz
122-
all-nobeam: wsjSR.ser.gz englishSR.ser.gz frenchSR.ser.gz chineseSR.ser.gz germanSR.ser.gz arabicSR.ser.gz spanishSR.ser.gz
133+
all-beam: wsjSR.beam.ser.gz englishSR.beam.ser.gz frenchSR.beam.ser.gz chineseSR.beam.ser.gz germanSR.beam.ser.gz arabicSR.beam.ser.gz spanishSR.beam.ser.gz hungarianSR.beam.ser.gz
134+
all-nobeam: wsjSR.ser.gz englishSR.ser.gz frenchSR.ser.gz chineseSR.ser.gz germanSR.ser.gz arabicSR.ser.gz spanishSR.ser.gz hungarianSR.ser.gz
123135
all: all-beam all-nobeam
124136
.PHONY: all
125137

@@ -208,3 +220,16 @@ spanishSR.beam.ser.gz:
208220
@echo Will test on $(SPANISH_TEST)
209221
java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(SPANISH_TRAIN) -devTreebank $(SPANISH_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(SPANISH_TAGGER) -tlpp $(SPANISH_TLPP) $(TRAIN_BEAM) > $@.out 2>&1
210222
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser $(TEST_ARGS) -testTreebank $(SPANISH_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(SPANISH_TAGGER) >> $@.out 2>&1
223+
224+
hungarianSR.ser.gz:
225+
@echo Training $@
226+
@echo Will test on $(HUNGARIAN_TEST)
227+
java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(HUNGARIAN_TRAIN) -devTreebank $(HUNGARIAN_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(HUNGARIAN_TAGGER) -tlpp $(HUNGARIAN_TLPP) $(TRAIN_ORACLE_10) > $@.out 2>&1
228+
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser $(TEST_ARGS) -testTreebank $(HUNGARIAN_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(HUNGARIAN_TAGGER) >> $@.out 2>&1
229+
230+
hungarianSR.beam.ser.gz:
231+
@echo Training $@
232+
@echo Will test on $(HUNGARIAN_TEST)
233+
java -mx50g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(HUNGARIAN_TRAIN) -devTreebank $(HUNGARIAN_DEV) -serializedPath $@ $(DEFAULT_OPTIONS) -preTag -taggerSerializedFile $(HUNGARIAN_TAGGER) -tlpp $(HUNGARIAN_TLPP) $(TRAIN_BEAM) > $@.out 2>&1
234+
java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser $(TEST_ARGS) -testTreebank $(HUNGARIAN_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(HUNGARIAN_TAGGER) >> $@.out 2>&1
235+

0 commit comments

Comments
 (0)