|
1 | 1 |
|
2 |
| -WSJ_TRAIN = /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-2199 |
3 |
| -WSJ_DEV = /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 2200-2219 |
4 |
| -WSJ_TEST = /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 2300-2399 |
5 |
| -WSJ_TAGGER = /u/nlp/data/pos-tagger/distrib/wsj-0-18-bidirectional-nodistsim.tagger |
| 2 | +ifndef CONSTITUENCY_HOME |
| 3 | + WSJ_TRAIN = /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-2199 |
| 4 | + WSJ_DEV = /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 2200-2219 |
| 5 | + WSJ_TEST = /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 2300-2399 |
| 6 | +else |
| 7 | + WSJ_TRAIN = $(CONSTITUENCY_HOME)/english/train/wsj-train.mrg |
| 8 | + WSJ_DEV = $(CONSTITUENCY_HOME)/english/dev/wsj-dev.mrg |
| 9 | + WSJ_TEST = $(CONSTITUENCY_HOME)/english/test/wsj-test.mrg |
| 10 | +endif |
| 11 | +#WSJ_TAGGER = /u/nlp/data/pos-tagger/distrib/wsj-0-18-bidirectional-nodistsim.tagger |
| 12 | +# TODO: technically we want a wsj-only tagger here |
| 13 | +WSJ_TAGGER = edu/stanford/nlp/models/pos-tagger/english-left3words-distsim.tagger |
6 | 14 | WSJ_TLPP = edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams
|
7 | 15 |
|
8 | 16 |
|
|
45 | 53 | CHINESE_DEV = $(CONSTITUENCY_HOME)/chinese/ctb7/dev_small.mrg
|
46 | 54 | CHINESE_TEST = $(CONSTITUENCY_HOME)/chinese/ctb7/test.mrg
|
47 | 55 | endif
|
48 |
| - |
49 | 56 | #CHINESE_TAGGER = /u/nlp/data/pos-tagger/distrib/chinese-nodistsim.tagger
|
50 | 57 | CHINESE_TAGGER = edu/stanford/nlp/models/pos-tagger/chinese-distsim.tagger
|
51 | 58 | CHINESE_TLPP = edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams
|
@@ -106,7 +113,7 @@ all: wsjSR.ser.gz wsjSR.beam.ser.gz englishSR.ser.gz englishSR.beam.ser.gz frenc
|
106 | 113 | wsjSR.ser.gz:
|
107 | 114 | @echo Training $@
|
108 | 115 | @echo Will test on $(WSJ_TEST)
|
109 |
| - java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(WSJ_TRAIN) -devTreebank $(WSJ_DEV) -serializedPath $@ -trainingThreads 4 -batchSize 24 -preTag -taggerSerializedFile $(WSJ_TAGGER) -trainingIterations 100 -stalledIterationLimit 25 -tlpp $(WSJ_TLPP) > $@.out 2>&1 |
| 116 | + java -mx10g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -trainTreebank $(WSJ_TRAIN) -devTreebank $(WSJ_DEV) -serializedPath $@ -trainingThreads 4 -batchSize 24 -preTag -taggerSerializedFile $(WSJ_TAGGER) -trainingIterations 200 -stalledIterationLimit 40 -l1Reg 0.25 -featureFrequencyCutoff 25 -trainingMethod REORDER_ORACLE -tlpp $(WSJ_TLPP) > $@.out 2>&1 |
110 | 117 | java -mx5g edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser -testTreebank $(WSJ_TEST) -serializedPath $@ -preTag -taggerSerializedFile $(WSJ_TAGGER) >> $@.out 2>&1
|
111 | 118 |
|
112 | 119 | wsjSR.beam.ser.gz:
|
|
0 commit comments