From 8f51345a1d88cc23656f4f102ee14d9b0aa75af6 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Tue, 28 Mar 2023 16:58:46 +0200 Subject: [PATCH] Add experiment runner tool and got rid of experiments module in processes. --- code/processes/experimental/readme.md | 3 - .../experimental/AdblockTesterTool.java | 57 ----------- .../experimental/ConverterLogicTestTool.java | 97 ------------------- .../experimental/CrawlDataExtractorTool.java | 93 ------------------ .../experiment-runner}/build.gradle | 31 ++++-- code/tools/experiment-runner/readme.md | 7 ++ .../java/nu/marginalia/tools/Experiment.java | 17 ++++ .../tools/ExperimentRunnerMain.java | 49 ++++++++++ .../tools/experiments/AdblockExperiment.java | 45 +++++++++ .../SentenceStatisticsExperiment.java | 74 ++++++++++++++ .../tools/experiments/TestExperiment.java | 16 +++ .../tools/experiments/TopicExperiment.java | 69 +++++++++++++ run/readme.md | 27 +++++- settings.gradle | 2 +- 14 files changed, 326 insertions(+), 261 deletions(-) delete mode 100644 code/processes/experimental/readme.md delete mode 100644 code/processes/experimental/src/main/java/nu/marginalia/experimental/AdblockTesterTool.java delete mode 100644 code/processes/experimental/src/main/java/nu/marginalia/experimental/ConverterLogicTestTool.java delete mode 100644 code/processes/experimental/src/main/java/nu/marginalia/experimental/CrawlDataExtractorTool.java rename code/{processes/experimental => tools/experiment-runner}/build.gradle (67%) create mode 100644 code/tools/experiment-runner/readme.md create mode 100644 code/tools/experiment-runner/src/main/java/nu/marginalia/tools/Experiment.java create mode 100644 code/tools/experiment-runner/src/main/java/nu/marginalia/tools/ExperimentRunnerMain.java create mode 100644 code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/AdblockExperiment.java create mode 100644 code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/SentenceStatisticsExperiment.java create mode 100644 code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/TestExperiment.java create mode 100644 code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/TopicExperiment.java diff --git a/code/processes/experimental/readme.md b/code/processes/experimental/readme.md deleted file mode 100644 index d15f09245..000000000 --- a/code/processes/experimental/readme.md +++ /dev/null @@ -1,3 +0,0 @@ -# Experimental - -Contains tools for running classification experiments on crawl data. \ No newline at end of file diff --git a/code/processes/experimental/src/main/java/nu/marginalia/experimental/AdblockTesterTool.java b/code/processes/experimental/src/main/java/nu/marginalia/experimental/AdblockTesterTool.java deleted file mode 100644 index 1473b663a..000000000 --- a/code/processes/experimental/src/main/java/nu/marginalia/experimental/AdblockTesterTool.java +++ /dev/null @@ -1,57 +0,0 @@ -package nu.marginalia.experimental; - -import nu.marginalia.adblock.AdblockSimulator; -import nu.marginalia.converting.processor.DocumentProcessor; -import plan.CrawlPlanLoader; -import plan.CrawlPlan; -import nu.marginalia.crawling.model.CrawledDocument; -import nu.marginalia.crawling.model.CrawledDomain; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; - -import java.io.IOException; -import java.nio.file.Path; - - -public class AdblockTesterTool { - - static AdblockSimulator simulator; - - static { - try { - simulator = new AdblockSimulator(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - - public static void main(String... args) throws IOException { - CrawlPlan plan = new CrawlPlanLoader().load(Path.of(args[0])); - - try (var iterable = plan.domainsIterable()) { - for (var domain : iterable) { - processDomain(domain); - } - } - - } - - private static void processDomain(CrawledDomain domain) { - if (domain.doc == null) return; - for (var doc : domain.doc) { - if (DocumentProcessor.isAcceptedContentType(doc) && "OK".equals(doc.crawlerStatus)) { - processDocument(doc); - } - } - } - - - private static void processDocument(CrawledDocument doc) { - Document parsedDocument = Jsoup.parse(doc.documentBody.decode()); - - if (simulator.hasAds(parsedDocument)) { - System.out.println(doc.url); - } - } -} diff --git a/code/processes/experimental/src/main/java/nu/marginalia/experimental/ConverterLogicTestTool.java b/code/processes/experimental/src/main/java/nu/marginalia/experimental/ConverterLogicTestTool.java deleted file mode 100644 index 465d469d1..000000000 --- a/code/processes/experimental/src/main/java/nu/marginalia/experimental/ConverterLogicTestTool.java +++ /dev/null @@ -1,97 +0,0 @@ -package nu.marginalia.experimental; - -import com.google.inject.Guice; -import com.google.inject.Inject; -import com.google.inject.Injector; -import nu.marginalia.converting.ConverterModule; -import plan.CrawlPlanLoader; -import plan.CrawlPlan; -import nu.marginalia.converting.processor.logic.dom.DomPruningFilter; -import nu.marginalia.language.sentence.SentenceExtractor; -import nu.marginalia.WmsaHome; -import nu.marginalia.converting.processor.DomainProcessor; -import nu.marginalia.adblock.GoogleAnwersSpamDetector; -import nu.marginalia.topic.RecipeDetector; -import nu.marginalia.topic.TextileCraftDetector; -import nu.marginalia.topic.WoodworkingDetector; -import org.jsoup.Jsoup; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.nio.file.Path; -import java.util.concurrent.ForkJoinPool; - -public class ConverterLogicTestTool { - - private final Logger logger = LoggerFactory.getLogger(getClass()); - - RecipeDetector recipeDetector = new RecipeDetector(); - WoodworkingDetector woodworkingDetector = new WoodworkingDetector(); - TextileCraftDetector textileCraftDetector = new TextileCraftDetector(); - GoogleAnwersSpamDetector spamDetector = new GoogleAnwersSpamDetector(); - - SentenceExtractor se = new SentenceExtractor(WmsaHome.getLanguageModels()); - - public static void main(String... args) throws IOException { - - if (args.length != 1) { - System.err.println("Arguments: crawl-plan.yaml"); - System.exit(0); - } - var plan = new CrawlPlanLoader().load(Path.of(args[0])); - - Injector injector = Guice.createInjector( - new ConverterModule(plan) - ); - - injector.getInstance(ConverterLogicTestTool.class); - } - - @Inject - public ConverterLogicTestTool( - CrawlPlan plan, - DomainProcessor processor - ) throws Exception { - var cp = new ForkJoinPool(16); - - plan.forEachCrawledDomain(domain -> { - if (domain.doc == null) return; - - - for (var doc : domain.doc) { - if (doc.documentBody == null) continue; - - Runnable task = () -> { - var parsed = Jsoup.parse(doc.documentBody.decode()); - - parsed.body().filter(new DomPruningFilter(0.5)); - var dld = se.extractSentences(parsed); - - if (dld.totalNumWords() < 250) - return; - - if (textileCraftDetector.testP(dld) > 0.3) { - System.out.println("textilecraft\t" + doc.url); - } - if (woodworkingDetector.testP(dld) > 0.1) { - System.out.println("woodworking\t" + doc.url); - } - if (recipeDetector.testP(dld) > 0.5) { - System.out.println("recipe\t" + doc.url); - } - if (spamDetector.testP(parsed) > 0.5) { - System.out.println("GA spam\t" + doc.url); - } - }; - - if (cp.getQueuedSubmissionCount() > 32) { - task.run(); - } else { - cp.execute(task); - } - } - }); - } - -} diff --git a/code/processes/experimental/src/main/java/nu/marginalia/experimental/CrawlDataExtractorTool.java b/code/processes/experimental/src/main/java/nu/marginalia/experimental/CrawlDataExtractorTool.java deleted file mode 100644 index 8c13c0db5..000000000 --- a/code/processes/experimental/src/main/java/nu/marginalia/experimental/CrawlDataExtractorTool.java +++ /dev/null @@ -1,93 +0,0 @@ -package nu.marginalia.experimental; - -import lombok.SneakyThrows; -import nu.marginalia.adblock.AdblockSimulator; -import nu.marginalia.converting.processor.DocumentProcessor; -import plan.CrawlPlanLoader; -import plan.CrawlPlan; -import nu.marginalia.crawling.model.CrawledDocument; -import nu.marginalia.crawling.model.CrawledDomain; -import nu.marginalia.service.module.DatabaseModule; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; - -import java.io.IOException; -import java.nio.file.Path; -import java.sql.SQLException; -import java.util.HashSet; -import java.util.Set; -import java.util.concurrent.*; - - -public class CrawlDataExtractorTool { - private static final AdblockSimulator abs; - - static { - try { - abs = new AdblockSimulator(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private static final Set urls = new HashSet<>(50_000_000); - - @SneakyThrows - public static void main(String... args) throws IOException { - CrawlPlan plan = new CrawlPlanLoader().load(Path.of(args[0])); - DatabaseModule module = new DatabaseModule(); - - try (var ds = module.provideConnection(); - var conn = ds.getConnection(); - var stmt = conn.createStatement()) { - var rsp = stmt.executeQuery("SELECT URL FROM EC_URL_VIEW WHERE TITLE IS NOT NULL"); - while (rsp.next()) { - urls.add(rsp.getString(1)); - } - } - catch (SQLException ex) { - ex.printStackTrace(); - } - - LinkedBlockingQueue queue = new LinkedBlockingQueue<>(10); - ExecutorService pool = new ThreadPoolExecutor(10, 20, 5, TimeUnit.MINUTES, queue); - Semaphore sem = new Semaphore(20); - - try (var iterable = plan.domainsIterable()) { - for (var domain : iterable) { - sem.acquire(); - pool.execute(() -> { - try { processDomain(domain); } - finally { sem.release(); } - }); - } - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - - pool.shutdown(); - - while (!pool.awaitTermination(1, TimeUnit.MINUTES)); - } - - private static void processDomain(CrawledDomain domain) { - if (domain.doc == null) return; - for (var doc : domain.doc) { - if (!urls.contains(doc.url)) - continue; - - if (DocumentProcessor.isAcceptedContentType(doc) && "OK".equals(doc.crawlerStatus)) { - processDocument(doc); - } - } - } - - - private static void processDocument(CrawledDocument doc) { - Document parsedDocument = Jsoup.parse(doc.documentBody.decode()); - - if (abs.hasAds(parsedDocument)) { - System.out.println(doc.url); - } - } -} diff --git a/code/processes/experimental/build.gradle b/code/tools/experiment-runner/build.gradle similarity index 67% rename from code/processes/experimental/build.gradle rename to code/tools/experiment-runner/build.gradle index 33399ae9d..70790c63a 100644 --- a/code/processes/experimental/build.gradle +++ b/code/tools/experiment-runner/build.gradle @@ -1,6 +1,7 @@ plugins { id 'java' id "io.freefair.lombok" version "5.3.3.3" + id 'application' id 'jvm-test-suite' } @@ -11,37 +12,49 @@ java { } } -dependencies { - implementation project(':code:common:process') +application { + mainClass = 'nu.marginalia.tools.ExperimentRunnerMain' + applicationName = 'experiment-runner' +} + +tasks.distZip.enabled = false +dependencies { + implementation project(':third-party:rdrpostagger') + implementation project(':third-party:porterstemmer') + implementation project(':third-party:monkey-patch-opennlp') implementation project(':code:common:model') implementation project(':code:common:config') + implementation project(':code:common:process') implementation project(':code:common:service') - implementation project(':code:libraries:big-string') - implementation project(':code:api:index-api') - implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') implementation project(':code:libraries:language-processing') - + implementation project(':code:libraries:term-frequency-dict') + implementation project(':code:libraries:big-string') + implementation project(':code:processes:converting-process') implementation project(':code:process-models:crawling-model') - implementation project(':code:processes:converting-process') implementation project(':code:features-convert:adblock') implementation project(':code:features-convert:topic-detection') implementation libs.lombok annotationProcessor libs.lombok implementation libs.bundles.slf4j + implementation libs.notnull implementation libs.guice implementation libs.jsoup - implementation libs.bundles.mariadb + implementation libs.trove + implementation libs.fastutil + + implementation libs.bundles.nlp + implementation libs.commons.lang3 testImplementation libs.bundles.slf4j.test testImplementation libs.bundles.junit testImplementation libs.mockito } + test { useJUnitPlatform() } diff --git a/code/tools/experiment-runner/readme.md b/code/tools/experiment-runner/readme.md new file mode 100644 index 000000000..42f1ade5f --- /dev/null +++ b/code/tools/experiment-runner/readme.md @@ -0,0 +1,7 @@ +# Experiment Runner + +This tool is a means of launching crawl data processing experiments, +for interacting with crawl data. + +It's launched with `run/experiment.sh`. New experiments need to be added to +`ExperimentRunnerMain` in order for the script to be able to run them. \ No newline at end of file diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/Experiment.java b/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/Experiment.java new file mode 100644 index 000000000..3fffb463d --- /dev/null +++ b/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/Experiment.java @@ -0,0 +1,17 @@ +package nu.marginalia.tools; + +import nu.marginalia.crawling.model.CrawledDomain; + +public interface Experiment { + + /** The experiment processes the domain here. + * + * @return true to continue, false to terminate. + */ + boolean process(CrawledDomain domain); + + /** Invoked after all domains are processed + * + */ + void onFinish(); +} diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/ExperimentRunnerMain.java b/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/ExperimentRunnerMain.java new file mode 100644 index 000000000..e6ff6db4e --- /dev/null +++ b/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/ExperimentRunnerMain.java @@ -0,0 +1,49 @@ +package nu.marginalia.tools; + +import com.google.inject.Guice; +import com.google.inject.Injector; +import nu.marginalia.service.module.DatabaseModule; +import nu.marginalia.tools.experiments.*; +import plan.CrawlPlanLoader; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Map; + +public class ExperimentRunnerMain { + + private static Map> experiments = Map.of( + "test", TestExperiment.class, + "adblock", AdblockExperiment.class, + "topic", TopicExperiment.class, + "statistics", SentenceStatisticsExperiment.class + ); + + public static void main(String... args) throws IOException { + if (args.length != 2) { + System.err.println("Expected arguments: plan.yaml experiment-name"); + return; + } + + if (!experiments.containsKey(args[1])) { + System.err.println("Valid experiment names: " + experiments.keySet()); + return; + } + + Injector injector = Guice.createInjector( + new DatabaseModule() + ); + + Experiment experiment = injector.getInstance(experiments.get(args[1])); + + var plan = new CrawlPlanLoader().load(Path.of(args[0])); + + for (var domain : plan.domainsIterable()) { // leaks file descriptor, is fine + if (!experiment.process(domain)) { + break; + } + } + experiment.onFinish(); + + } +} diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/AdblockExperiment.java b/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/AdblockExperiment.java new file mode 100644 index 000000000..76500fac5 --- /dev/null +++ b/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/AdblockExperiment.java @@ -0,0 +1,45 @@ +package nu.marginalia.tools.experiments; + +import com.google.inject.Inject; +import nu.marginalia.adblock.AdblockSimulator; +import nu.marginalia.converting.processor.DocumentProcessor; +import nu.marginalia.crawling.model.CrawledDocument; +import nu.marginalia.crawling.model.CrawledDomain; +import nu.marginalia.tools.Experiment; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; + +public class AdblockExperiment implements Experiment { + + private final AdblockSimulator simulator; + + @Inject + public AdblockExperiment(AdblockSimulator simulator) { + this.simulator = simulator; + } + + @Override + public boolean process(CrawledDomain domain) { + if (domain.doc == null) return true; + + for (var doc : domain.doc) { + if (DocumentProcessor.isAcceptedContentType(doc) && "OK".equals(doc.crawlerStatus)) { + processDocument(doc); + } + } + + return true; + } + + private void processDocument(CrawledDocument doc) { + Document parsedDocument = Jsoup.parse(doc.documentBody.decode()); + + if (simulator.hasAds(parsedDocument)) { + System.out.println(doc.url); + } + } + + @Override + public void onFinish() { + } +} diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/SentenceStatisticsExperiment.java b/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/SentenceStatisticsExperiment.java new file mode 100644 index 000000000..21eda1456 --- /dev/null +++ b/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/SentenceStatisticsExperiment.java @@ -0,0 +1,74 @@ +package nu.marginalia.tools.experiments; + +import com.google.inject.Inject; +import nu.marginalia.WmsaHome; +import nu.marginalia.adblock.GoogleAnwersSpamDetector; +import nu.marginalia.converting.processor.logic.dom.DomPruningFilter; +import nu.marginalia.crawling.model.CrawledDomain; +import nu.marginalia.language.sentence.SentenceExtractor; +import nu.marginalia.tools.Experiment; +import nu.marginalia.topic.RecipeDetector; +import nu.marginalia.topic.TextileCraftDetector; +import nu.marginalia.topic.WoodworkingDetector; +import org.jsoup.Jsoup; + +import java.io.BufferedOutputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.file.Files; +import java.nio.file.Path; + +public class SentenceStatisticsExperiment implements Experiment { + + SentenceExtractor se = new SentenceExtractor(WmsaHome.getLanguageModels()); + Path filename; + PrintWriter writer; + + @Inject + public SentenceStatisticsExperiment() throws IOException { + filename = Files.createTempFile(getClass().getSimpleName(), ".csv"); + System.out.println("Writing to " + filename); + + writer = new PrintWriter(new BufferedOutputStream(new FileOutputStream(filename.toFile()))); + } + + private void logLine(String message) { + System.out.printf("\u001b[2K\r%s", message); + } + @Override + public boolean process(CrawledDomain domain) { + if (domain.doc == null) return true; + + logLine("Processing: " + domain.domain); + + for (var doc : domain.doc) { + if (doc.documentBody == null) continue; + + var parsed = Jsoup.parse(doc.documentBody.decode()); + + parsed.body().filter(new DomPruningFilter(0.5)); + + var dld = se.extractSentences(parsed); + + + int numSentences = dld.sentences.length; + if (numSentences == 0) { + continue; + } + + double avgLength = dld.totalNumWords() / (double) numSentences; + if (avgLength < 50) { + writer.printf("%s\t%d\t%f\n", doc.url, dld.totalNumWords(), avgLength); + } + } + + return true; + } + + @Override + public void onFinish() { + logLine("Done!\n"); + writer.close(); + } +} diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/TestExperiment.java b/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/TestExperiment.java new file mode 100644 index 000000000..ac3ede458 --- /dev/null +++ b/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/TestExperiment.java @@ -0,0 +1,16 @@ +package nu.marginalia.tools.experiments; + +import nu.marginalia.crawling.model.CrawledDomain; +import nu.marginalia.tools.Experiment; + +public class TestExperiment implements Experiment { + @Override + public boolean process(CrawledDomain domain) { + return true; + } + + @Override + public void onFinish() { + System.out.println("Tada!"); + } +} diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/TopicExperiment.java b/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/TopicExperiment.java new file mode 100644 index 000000000..a11ee7f58 --- /dev/null +++ b/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/TopicExperiment.java @@ -0,0 +1,69 @@ +package nu.marginalia.tools.experiments; + +import com.google.inject.Inject; +import nu.marginalia.WmsaHome; +import nu.marginalia.adblock.AdblockSimulator; +import nu.marginalia.adblock.GoogleAnwersSpamDetector; +import nu.marginalia.converting.processor.DocumentProcessor; +import nu.marginalia.converting.processor.logic.dom.DomPruningFilter; +import nu.marginalia.crawling.model.CrawledDocument; +import nu.marginalia.crawling.model.CrawledDomain; +import nu.marginalia.language.sentence.SentenceExtractor; +import nu.marginalia.tools.Experiment; +import nu.marginalia.topic.RecipeDetector; +import nu.marginalia.topic.TextileCraftDetector; +import nu.marginalia.topic.WoodworkingDetector; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; + +public class TopicExperiment implements Experiment { + + RecipeDetector recipeDetector = new RecipeDetector(); + WoodworkingDetector woodworkingDetector = new WoodworkingDetector(); + TextileCraftDetector textileCraftDetector = new TextileCraftDetector(); + GoogleAnwersSpamDetector spamDetector = new GoogleAnwersSpamDetector(); + + SentenceExtractor se = new SentenceExtractor(WmsaHome.getLanguageModels()); + + @Inject + public TopicExperiment() { + } + + @Override + public boolean process(CrawledDomain domain) { + if (domain.doc == null) return true; + + + for (var doc : domain.doc) { + if (doc.documentBody == null) continue; + + var parsed = Jsoup.parse(doc.documentBody.decode()); + + parsed.body().filter(new DomPruningFilter(0.5)); + var dld = se.extractSentences(parsed); + + if (dld.totalNumWords() < 250) + continue; + + if (textileCraftDetector.testP(dld) > 0.3) { + System.out.println("textilecraft\t" + doc.url); + } + if (woodworkingDetector.testP(dld) > 0.1) { + System.out.println("woodworking\t" + doc.url); + } + if (recipeDetector.testP(dld) > 0.5) { + System.out.println("recipe\t" + doc.url); + } + if (spamDetector.testP(parsed) > 0.5) { + System.out.println("GA spam\t" + doc.url); + } + + } + + return true; + } + + @Override + public void onFinish() { + } +} diff --git a/run/readme.md b/run/readme.md index 0a8f600c2..3249c8d7e 100644 --- a/run/readme.md +++ b/run/readme.md @@ -50,4 +50,29 @@ $ docker-compose up indexes. Wait for the line 'Auto-conversion finished!' When all is done, it should be possible to visit -[http://localhost:8080](http://localhost:8080) and try a few searches! \ No newline at end of file +[http://localhost:8080](http://localhost:8080) and try a few searches! + + +## Other Crawl Data + +By default, `reconvert.sh` will load the medium dataset. This is appropriate for a demo, +but other datasets also exist. + +| Set | Description | +|-----|----------------------------------------------------------------------------| +| s | 1000 domains, suitable for low-end machines | +| m | 2000 domains | +| l | 5000 domains | +| xl | 50,000 domains, basically pre-prod.
Warning: 5h+ processing time | + +To switch datasets, run e.g. + +```shell +$ docker-compose up -d mariadb +$ ./run/reconvert.sh l +``` + +## Experiment Runner + +The script `experiment.sh` is a launcher for the experiment runner, which is useful when +evaluating new algorithms in processing crawl data. \ No newline at end of file diff --git a/settings.gradle b/settings.gradle index 1fa43e135..a0908d990 100644 --- a/settings.gradle +++ b/settings.gradle @@ -56,13 +56,13 @@ include 'code:common:process' include 'code:processes:converting-process' include 'code:processes:crawling-process' include 'code:processes:loading-process' -include 'code:processes:experimental' include 'code:process-models:converting-model' include 'code:process-models:crawling-model' include 'code:tools:term-frequency-extractor' include 'code:tools:crawl-job-extractor' +include 'code:tools:experiment-runner' include 'third-party:porterstemmer' include 'third-party:xz'