File tree Expand file tree Collapse file tree 1 file changed +2
-0
lines changed
scripts/chinese-segmenter Expand file tree Collapse file tree 1 file changed +2
-0
lines changed Original file line number Diff line number Diff line change @@ -88,6 +88,8 @@ ctb9.train-small.chris6.ser.gz: dict-chris6.ser.gz
88
88
time java -mx5g edu.stanford.nlp.ie.crf.CRFClassifier -sighanCorporaDict $(SIGHAN2007_CORPORA_DICT ) -loadClassifier $@ -testFile $(CTB9_TEST ) -inputEncoding UTF-8 -sighanPostProcessing true -serDictionary $+ -keepAllWhitespaces false >> $@ .log 2>> $@ .err
89
89
90
90
# train on train GSD, with all external lexicons, without training lexicon
91
+ # there is a script in Stanza which converts the UD GSD treebank to a segmenter training file:
92
+ # stanza/utils/datasets/corenlp_segmenter_dataset.py
91
93
gsd.ser.gz : dict-chris6.ser.gz
92
94
time java -mx60g edu.stanford.nlp.ie.crf.CRFClassifier -prop ctb9-chris6.prop -serDictionary $+ -sighanCorporaDict $(SIGHAN2007_CORPORA_DICT ) -trainFile $(GSD_TRAIN ) -serializeTo $@ > $@ .log 2> $@ .err
93
95
time java -mx5g edu.stanford.nlp.ie.crf.CRFClassifier -sighanCorporaDict $(SIGHAN2007_CORPORA_DICT ) -loadClassifier $@ -testFile $(GSD_TEST ) -inputEncoding UTF-8 -sighanPostProcessing true -serDictionary $+ -keepAllWhitespaces false >> $@ .log 2>> $@ .err
You can’t perform that action at this time.
0 commit comments