Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add experiment runner tool and got rid of experiments module in proce…
…sses.
- Loading branch information
Showing
14 changed files
with
326 additions
and
261 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
57 changes: 0 additions & 57 deletions
57
code/processes/experimental/src/main/java/nu/marginalia/experimental/AdblockTesterTool.java
This file was deleted.
Oops, something went wrong.
97 changes: 0 additions & 97 deletions
97
...ocesses/experimental/src/main/java/nu/marginalia/experimental/ConverterLogicTestTool.java
This file was deleted.
Oops, something went wrong.
93 changes: 0 additions & 93 deletions
93
...ocesses/experimental/src/main/java/nu/marginalia/experimental/CrawlDataExtractorTool.java
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Experiment Runner | ||
|
||
This tool is a means of launching crawl data processing experiments, | ||
for interacting with crawl data. | ||
|
||
It's launched with `run/experiment.sh`. New experiments need to be added to | ||
`ExperimentRunnerMain` in order for the script to be able to run them. |
17 changes: 17 additions & 0 deletions
17
code/tools/experiment-runner/src/main/java/nu/marginalia/tools/Experiment.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package nu.marginalia.tools; | ||
|
||
import nu.marginalia.crawling.model.CrawledDomain; | ||
|
||
public interface Experiment { | ||
|
||
/** The experiment processes the domain here. | ||
* | ||
* @return true to continue, false to terminate. | ||
*/ | ||
boolean process(CrawledDomain domain); | ||
|
||
/** Invoked after all domains are processed | ||
* | ||
*/ | ||
void onFinish(); | ||
} |
49 changes: 49 additions & 0 deletions
49
code/tools/experiment-runner/src/main/java/nu/marginalia/tools/ExperimentRunnerMain.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
package nu.marginalia.tools; | ||
|
||
import com.google.inject.Guice; | ||
import com.google.inject.Injector; | ||
import nu.marginalia.service.module.DatabaseModule; | ||
import nu.marginalia.tools.experiments.*; | ||
import plan.CrawlPlanLoader; | ||
|
||
import java.io.IOException; | ||
import java.nio.file.Path; | ||
import java.util.Map; | ||
|
||
public class ExperimentRunnerMain { | ||
|
||
private static Map<String, Class<? extends Experiment>> experiments = Map.of( | ||
"test", TestExperiment.class, | ||
"adblock", AdblockExperiment.class, | ||
"topic", TopicExperiment.class, | ||
"statistics", SentenceStatisticsExperiment.class | ||
); | ||
|
||
public static void main(String... args) throws IOException { | ||
if (args.length != 2) { | ||
System.err.println("Expected arguments: plan.yaml experiment-name"); | ||
return; | ||
} | ||
|
||
if (!experiments.containsKey(args[1])) { | ||
System.err.println("Valid experiment names: " + experiments.keySet()); | ||
return; | ||
} | ||
|
||
Injector injector = Guice.createInjector( | ||
new DatabaseModule() | ||
); | ||
|
||
Experiment experiment = injector.getInstance(experiments.get(args[1])); | ||
|
||
var plan = new CrawlPlanLoader().load(Path.of(args[0])); | ||
|
||
for (var domain : plan.domainsIterable()) { // leaks file descriptor, is fine | ||
if (!experiment.process(domain)) { | ||
break; | ||
} | ||
} | ||
experiment.onFinish(); | ||
|
||
} | ||
} |
Oops, something went wrong.