From e49353b5e929452c3029cebf15eb94687468a165 Mon Sep 17 00:00:00 2001 From: Robert Meyer Date: Tue, 20 Feb 2018 17:03:39 +0100 Subject: [PATCH] setting minimum sentences to 5 -.- --- trufflepig/preprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trufflepig/preprocessing.py b/trufflepig/preprocessing.py index c49abf7..33bec66 100644 --- a/trufflepig/preprocessing.py +++ b/trufflepig/preprocessing.py @@ -42,7 +42,7 @@ def apply_parallel(function, iterable, ncores, chunksize=1000): def preprocess(post_df, ncores=4, chunksize=500, detect_seed=42, detect_max_length=2000, - grammar_max_sentences=10, + grammar_max_sentences=5, min_en_prob=0.9, min_max_body_length=(500, 25000), min_max_letter_ratio=(0.5, 0.85),