This value builds a model of comparable size to the old ctb7 models

AngledLuffa · Stanford NLP · commit 6f5e1ce0d8f8 · 2020-07-17T10:57:55.000-07:00
diff --git a/scripts/chinese-segmenter/Makefile b/scripts/chinese-segmenter/Makefile
@@ -65,7 +65,7 @@ ctb9.train.chris6.ser.gz: dict-chris6.ser.gz
 
 # train on train CTB9 + extras, with all external lexicons, without training lexicon, use the threshold to make it smaller
 ctb9.train-small.chris6.ser.gz: dict-chris6.ser.gz
-	time java -mx60g edu.stanford.nlp.ie.crf.CRFClassifier -prop ctb9-chris6.prop -serDictionary $+ -sighanCorporaDict /u/nlp/data/chinese-segmenter/gale2007/ctb6/ -featureDiffThresh 0.015 -trainFile $(CTB9_ALL) -serializeTo $@ > $@.log 2> $@.err
+	time java -mx60g edu.stanford.nlp.ie.crf.CRFClassifier -prop ctb9-chris6.prop -serDictionary $+ -sighanCorporaDict /u/nlp/data/chinese-segmenter/gale2007/ctb6/ -featureDiffThresh 0.005 -trainFile $(CTB9_ALL) -serializeTo $@ > $@.log 2> $@.err
 
 # train on all CTB7, with all external lexicons, without training lexicon
 bolt.chris6.ser.gz: dict-chris6.ser.gz
diff --git a/scripts/chinese-segmenter/ctb9-chris6.prop b/scripts/chinese-segmenter/ctb9-chris6.prop
@@ -84,4 +84,4 @@ sighanPostProcessing = true
 
 # This would make the resulting model smaller
 # It can also be set as a command line arg, which is what the Makefile does
-# featureDiffThresh=0.015
+# featureDiffThresh=0.005