diff --git a/doc/sphinx-guides/requirements.txt b/doc/sphinx-guides/requirements.txt index 9c74ed75f6d..5a188b3cacb 100755 --- a/doc/sphinx-guides/requirements.txt +++ b/doc/sphinx-guides/requirements.txt @@ -4,7 +4,7 @@ Sphinx==7.4.0 sphinx-icon==0.1.2 # Markdown support -myst-parser==2.0.0 +myst-parser==4.0.0 # tabs sphinx-tabs==3.4.5 diff --git a/doc/sphinx-guides/source/_static/developers/testing/SamplePerformanceIT.java b/doc/sphinx-guides/source/_static/developers/testing/SamplePerformanceIT.java new file mode 100644 index 00000000000..847093433ec --- /dev/null +++ b/doc/sphinx-guides/source/_static/developers/testing/SamplePerformanceIT.java @@ -0,0 +1,64 @@ +package edu.harvard.iq.dataverse.somepackage; + +import edu.harvard.iq.dataverse.util.testing.performance.JpaEntityManagerService; +import edu.harvard.iq.dataverse.util.testing.performance.JpaPerformanceTest; +import net.ttddyy.dsproxy.QueryCount; +import net.ttddyy.dsproxy.QueryCountHolder; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import jakarta.persistence.EntityManager; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +// Single annotation for automatic setup of +// 1) basic tags for JUnit groups, +// 2) shared PostgreSQL server via Testcontainers, and +// 3) creation and injection of JPA entity manager service. +@JpaPerformanceTest +class SamplePerformanceIT { + + static JpaEntityManagerService jpa; + + @BeforeAll + static void setUp() { + // A manual start is necessary to allow you to selectively enable service features as necessary + jpa.start(); + + // inTransactionVoid: Use this when you only need to execute database operations + // (e.g., persisting test fixtures) without returning a value. + jpa.inTransactionVoid(em -> { + // EntityManager em is provided here. + // em.persist(myEntity); + }); + } + + @Test + void shouldMeasureOperationPerformance() { + // Clear any previous query statistics + QueryCountHolder.clear(); + Instant start = Instant.now(); + + // inTransaction: Use this when your operation returns a result that needs + // to be asserted or measured. + Object result = jpa.inTransaction(em -> { + // Execute your performance-critical operation using the EntityManager. + // return result; + return null; // Placeholder + }); + + Instant end = Instant.now(); + assertNotNull(result); + + // Retrieve and log ORM statistics + QueryCount count = QueryCountHolder.getGrandTotal(); + System.out.println("Elapsed ms: " + start.until(end, ChronoUnit.MILLIS)); + System.out.println("Total queries: " + count.getTotal()); + System.out.println("Select queries: " + count.getSelect()); + System.out.println("Insert queries: " + count.getInsert()); + System.out.println("Update queries: " + count.getUpdate()); + System.out.println("Delete queries: " + count.getDelete()); + } +} \ No newline at end of file diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py index 6ecaeebaf54..a0ef9edd31f 100755 --- a/doc/sphinx-guides/source/conf.py +++ b/doc/sphinx-guides/source/conf.py @@ -53,7 +53,10 @@ templates_path = ['_templates'] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = { + ".rst": "restructuredtext", + ".md": "markdown", +} # The encoding of source files. #source_encoding = 'utf-8-sig' diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst index 28b1fbaae82..c829901bead 100755 --- a/doc/sphinx-guides/source/developers/index.rst +++ b/doc/sphinx-guides/source/developers/index.rst @@ -47,4 +47,6 @@ Developer Guide fontcustom classic-dev-env search-services + testing/fixtures.md + testing/performance.md diff --git a/doc/sphinx-guides/source/developers/testing/fixtures.md b/doc/sphinx-guides/source/developers/testing/fixtures.md new file mode 100644 index 00000000000..06f8edfc25a --- /dev/null +++ b/doc/sphinx-guides/source/developers/testing/fixtures.md @@ -0,0 +1,383 @@ +# Fixtures For Tests + +Most Dataverse test fixtures are based on JSON files stored in the test resources of the codebase. + +In addition, (as of Dataverse 6.11) you can use a generator utility to create dataset-centered fixtures programmatically. +This is most useful for local integration and performance tests but may be of use for unit tests as well. + +```{contents} Contents: +:local: +:depth: 3 +``` + +(fixture-generator)= +## Dataset Fixture Generator + +The dataset fixture generator is a test utility for creating connected dataset entity graphs with configurable size and shape. +It is located in the core testing utilities at `edu.harvard.iq.dataverse.util.testing.fixtures` and `edu.harvard.iq.dataverse.util.testing.recipes`. + +The fixture generator is useful when tests need one or more datasets with many files, tabular files, variables, and optional variable metadata, while still keeping the test setup readable. +It is primarily intended for integration and performance tests where hand-building entities would be too verbose, brittle, or too uniform to uncover ORM and serialization issues. + +The generator creates an in-memory entity graph. +Persisting that graph to a database is optional and requires the usual JPA persistence rules to be respected (see below). + + +### Architecture + +The fixture generator is built around three main concepts: a builder, recipes for it, and field populators. +This separation keeps entity graph shape, relationship wiring, and scalar field population independent of each other. + +#### Fixture Builder + +The builder creates the connected *entity graph* by consuming recipes. It is responsible for: + +- Creating the entities +- Wiring relationships +- Keeping both sides of relationships in sync where needed +- Returning a `DatasetFixture` with convenient references to generated objects + +#### Recipes + +Recipes *describe* the *shape* of the fixture's entity graph and should not manually wire entity relationships: + +- How many files should exist? +- Which files are tabular? +- How many variables should a tabular file contain? +- Should variable metadata be created? + +**Available Recipes:** + +Recipes are composable using a fluent API and work together. + +```text +DatasetRecipe + -> DatasetTypeRecipe + -> VersionRecipe + -> FileRecipe + -> VariableSetRecipe + -> VariableMetadataRecipe +``` + +`DatasetRecipe` +Top-level recipe for creating a dataset fixture. It combines a `DatasetTypeRecipe` and a `VersionRecipe`. + +`DatasetTypeRecipe` +Provides the dataset type assigned to the generated dataset. +Can create a dataset type from scalar values or wrap an existing instance. + +Note: the recipe provides the type object but does not persist it. +Tests that persist generated fixtures must ensure the dataset type is managed before the dataset is flushed. + +`VersionRecipe` +Describes the current dataset version. At the moment, this mainly means providing one or more file recipes. + +`FileRecipe` +Describes file populations. A file recipe may create regular files or tabular files. + +`VariableSetRecipe` +Describes how many variables to create for tabular files. It supports uniform and skewed variable populations. + +`VariableMetadataRecipe` +Decides whether a `VariableMetadata` row should be created for a generated `(FileMetadata, DataVariable)` pair. +At most one metadata row is generated for each such pair. + + +#### Fixture Populator + +The populator fills scalar and non-relationship *fields*, which are not primarily about graph shape. + +It sets values such as: + +- Identifiers +- Timestamps +- File labels +- Content types +- Checksums +- Variable names +- Required fields +- Null-sensitive collections + +The default *minimal* populator is conservative. +It creates enough data for serialization and persistence tests, but it does not try to simulate fully realistic production metadata. + + + +### Full Example + +The following example creates a small but non-uniform dataset fixture. It's suitable +- for a smoke test of a serializer, +- for an integration test with assertions on the result, +- for a performance test with benchmarking speed of different implementations, and other scenarios. + +```java +var recipe = DatasetRecipe.of( + DatasetTypeRecipe.dataset(), + VersionRecipe.of( + FileRecipe.regular(20), + FileRecipe.tabular(30, + VariableSetRecipe + .byPredicate(VariableMetadataRecipe.byPredicate(ctx -> ctx.variableIndex() < 5)) + .when(ctx -> ctx.fileIndex() % 10 == 0, 1_000) + .otherwise(25) + )) +); + +DatasetFixture fixture = DatasetFixtureBuilder.builder() + .recipe(recipe) + .populator(FixturePopulator.minimal()) + .build(); + +JsonArrayBuilder files = Json.createArrayBuilder(); + +for (FileMetadata fileMetadata : fixture.fileMetadatas()) { + files.add(JsonPrinter.json(fileMetadata.getDataFile(), fileMetadata, true)); +} + +var json = files.build(); +``` + +This creates: +- 20 regular files +- 30 tabular files + - some tabular files with 1,000 variables + - other tabular files with 25 variables + - variable metadata only for the first few variables in each tabular file + +This helps exercise code paths that traverse files, file metadata, data tables, data variables, variable metadata. +All of this happends without the need to pre-produce an enormous fixture as a JSON file. +Its deterministic nature allows running the test anywhere without depending on seeded randomness, offering reliable and reproducible results. + + + +### Basic Usage + +#### Small Dataset + +This example creates: + +- one dataset +- one current version +- 10 tabular files +- 10 variables per tabular file +- 1 regular file + +```java +var recipe = DatasetRecipe.of( + DatasetTypeRecipe.dataset(), + VersionRecipe.of( + FileRecipe.tabular(10, VariableSetRecipe.uniform(10)), + FileRecipe.regular(1) + ) +); + +DatasetFixture fixture = DatasetFixtureBuilder.builder() + .recipe(recipe) + .populator(FixturePopulator.minimal()) + .build(); + +Dataset dataset = fixture.dataset(); +DatasetVersion version = fixture.currentVersion(); +``` + +#### Skewed Variable Populations + +Skewed data is useful for performance testing because real datasets are rarely uniform. +Some files may have only a few variables, while others may be very large. + +This example creates 500 tabular files: + +- one dataset +- one current version +- 500 tabular files + - every 100th file receives 100,000 variables + - every 10th file receives 10,000 variables + - all others receive 250 variables + +```java +var variables = VariableSetRecipe.byPredicate() + .when(ctx -> ctx.fileIndex() % 100 == 0, 100_000) + .when(ctx -> ctx.fileIndex() % 10 == 0, 10_000) + .otherwise(250); + +var recipe = DatasetRecipe.of( + DatasetTypeRecipe.dataset(), + VersionRecipe.of( + FileRecipe.tabular(500, variables) + ) +); + +DatasetFixture fixture = DatasetFixtureBuilder.builder() + .recipe(recipe) + .build(); +``` + +#### Adding Variable Metadata + +Variable Metadata is optional and controlled by `VariableMetadataRecipe`. +The metadata recipe is evaluated for each generated `(FileMetadata, DataVariable)` pair. +This matters because `VariableMetadata` is versioned indirectly through `FileMetadata`. + +*No variable metadata (default):* + +```java +VariableSetRecipe.uniform(1_000) +- or - +VariableSetRecipe.uniform(1_000, VariableMetadataRecipe.noop()) +``` + +*Metadata for every variable:* + +```java +VariableSetRecipe.uniform(1_000, VariableMetadataRecipe.always()) +``` + +*Metadata for selected variables:* + +```java +VariableSetRecipe.uniform(1_000, VariableMetadataRecipe.byPredicate(ctx -> ctx.variableIndex() % 10 == 0)) +``` + + + +### Persistence Usage + +The generator creates an in-memory entity graph. Persisting that graph is optional and follows normal JPA rules. + +When persisting a generated fixture to a database, remember that not all relationships cascade from `Dataset` to every object. +In particular, `DataFile` instances usually need to be persisted explicitly before persisting the dataset graph. +The `DatasetType` must also be managed, either by persisting the generated type or by looking up an existing one in the same persistence context. + +A typical persistence sequence is: + +```java +jpa.inTransactionVoid(em -> { + em.persist(fixture.datasetType()); + for (DataFile dataFile : fixture.dataFiles()) { + em.persist(dataFile); + } + em.persist(fixture.dataset()); +}); +``` + +The exact order may evolve as the fixture generator grows, may depend on the exact usage scenario, and +is influenced by the evolution of the entity classes themselves, but the important point is: +**Shared/reference entities and non-cascaded entities must be managed (persisted) before the dataset graph is flushed**. + + + +### Discussion and Limitations + +#### Benefits + +1. **Readable scenarios:** tests describe intent at a high level. + For example: `FileRecipe.tabular(500, VariableSetRecipe.uniform(1_000))` is easier to understand than manually creating thousands of entities. +2. **Composable graph shape:** different recipes can be combined to describe mixed datasets. +3. **Deterministic output:** the build context carries fixture-wide values such as sequence and timestamp, making generated data easier to debug and compare. +4. **Reduced boilerplate:** relationship wiring and null-sensitive defaults are centralized. +5. **Better performance testing:** skewed fixtures can expose ORM issues that uniform data may hide, such as N+1 query expansion over large variable collections. +6. **Serialization safety:** the minimal populator initializes fields and collections that serializers commonly traverse. + +#### Tradeoffs + +1. **More concepts to learn:** developers need to understand builders, recipes, populators, and resulting fixture objects vs. a static factory. +2. **Not a full production object factory:** the minimal populator creates safe test data, not necessarily realistic production data. +3. **Persistence still requires care:** some entities must be persisted explicitly because the production model does not cascade every relationship. +4. **Hardcoded defaults:** the minimal populator uses deterministic placeholder values, tests that need realistic metadata should provide a custom populator. + +#### Limitations + +1. **Minimalistic:** The current fixture generator is intentionally minimal. +2. **Single dataset version only:** the fixture currently models one current dataset version and does not generate multiple versions. +3. **No version evolution recipes:** there is no support yet for deriving later versions from earlier versions, modeling change over time. +4. **Limited dataset metadata:** dataset fields and metadata blocks are not generated in detail. +5. **Simple dataset type handling:** a `DatasetType` can be generated or supplied, but persistence of shared types is still the responsibility of the test. +6. **No persistence manager:** the fixture system builds graphs, but it does not yet provide a dedicated persister that knows the correct persistence order. +7. **One table per tabular file:** tabular files currently get one `DataTable`. The domain model can allow more, but the fixture generator does not expose that yet. +8. **One variable group per tabular file:** each non-empty tabular file currently gets one `VarGroup` containing all variables, there is no `VarGroupRecipe` yet. +9. **Limited variable metadata content:** variable metadata can be present or absent, but the minimal populator only fills basic scalar values. +10. **No category or statistics recipes:** the fixture generator does not yet provide recipes for variable categories, summary statistics, invalid ranges, or category metadata. + +#### Unsupported Usage Scenarios + +The following scenarios are not yet directly expressible: + +- multiple dataset versions sharing the same `DataFile` objects +- metadata-only changes between versions +- version-specific `VariableMetadata` changes across versions +- files added or removed between versions +- multiple `DataTable` objects per file +- different variable group distributions per file +- weighted random or seeded random file populations +- Zipf-like or heavy-tail distributions as first-class recipes +- realistic dataset field metadata +- fixture graphs that mimic a fully published dataset lifecycle + + + +### Extending The Fixture Generator + +When extending the fixture generator, first decide which responsibility your change belongs to. + +#### Add Recipes For Graph Shaping + +Use a new recipe when the test needs to describe what shape should be created. + +Examples: + +- number of var groups +- number of data tables per file +- whether categories should exist +- how many variables receive summary statistics +- how versions evolve over time + +Recipe changes usually belong in the `edu.harvard.iq.dataverse.util.testing.recipes` package. + +#### Add Populator Behavior For Scalar Values + +Use a new or custom populator when entities should be filled differently, but the graph shape is the same. +Extend the populator interface if new types of scalar data are required. + +Examples: + +- more realistic file names +- different content types +- richer variable labels +- custom checksums +- realistic variable metadata text + +Populator changes usually belong in the `edu.harvard.iq.dataverse.util.testing.fixtures` package. + +#### Change Builder For Wiring + +Change the builder when new relationships must be created or maintained. + +Examples: + +- adding support for `VariableCategory` +- wiring category metadata +- creating multiple data tables per file +- linking version-evolved file metadata back to shared data files + +Builder changes should be kept small and split into helper methods where possible. + +#### Recommended Extension Path + +A practical roadmap for further evolution is: + +1. Add a `VarGroupRecipe` to control group count and membership. +2. Add category and summary statistic recipes for variable-level enrichment. +3. Add a fixture persister that knows the correct persistence order. +4. Add version evolution recipes for multi-version datasets. +5. Add richer dataset metadata generation. +6. Add (seeded!) random distribution recipes if a deterministic skew is not enough. +7. Add fuzzy testing by generating fixtures with targeted chaos. + +#### Guidelines For Contributions + +1. Keep recipes declarative: recipes should describe shape, not manually wire entity relationships. +2. Keep populators focused: populators should fill fields, not decide how many entities exist. +3. Keep builders responsible for wiring: relationship consistency belongs in the builder. +4. Prefer deterministic generation: deterministic data makes performance tests easier to reproduce and debug. +5. Avoid hiding persistence requirements: if an entity must be persisted before another, document it clearly or add a dedicated persister. +6. Start minimal: add the smallest recipe or populator extension needed for the scenario. Avoid making the DSL generic before there is a concrete test need. diff --git a/doc/sphinx-guides/source/developers/testing/performance.md b/doc/sphinx-guides/source/developers/testing/performance.md new file mode 100644 index 00000000000..e981cee5d80 --- /dev/null +++ b/doc/sphinx-guides/source/developers/testing/performance.md @@ -0,0 +1,89 @@ +# Performance Testing + +## Introduction +Performance tests measure how your application behaves under load, focusing on execution time, resource consumption, and database efficiency. +Unlike *unit tests*, which verify isolated logic, or *integration* or *API tests*, which validate component interactions and full request lifecycles, performance tests quantify *how fast* operations complete and *how many* database queries they trigger. + +## Running Performance Tests +Performance tests are excluded from the default test run to save CI/CD time and local resources. +To execute them, use the Maven `verify` lifecycle phase and override the `it.groups` property: + +```shell +mvn verify -Dit.groups=performance +``` + +```{note} +The `it.groups` property accepts a comma-separated list. +You can combine groups (e.g., `-Dit.groups=integration,performance`) as necessary. +However, it is highly recommended to run them in isolation due to their computational intensity and sensitivity to system load. +``` + +## Testing database-bound code +Performance tests for code relying on retrieving entities from a database are essential for catching regressions in ORM efficiency. +They can identify N+1 query problems or ensure that heavy data processing pipelines (e.g., exporting large datasets) remain responsive as the codebase evolves. + +### Prerequisites +Any tests around database-bound code rely on [Testcontainers](https://www.testcontainers.org/) to spin up ephemeral database instances. +Avoiding in-memory databases for such tests allow for more realistic testing as seen in actual deployments. +Consequently, you must have **Docker** installed and running, allowing Testcontainer to start a PostgreSQL server. + +- If you use a local Docker daemon, ensure it has sufficient memory allocated (typically 1GB+ is recommended for running Postgres containers alongside your tests). +- If your Docker daemon runs remotely, ensure the `DOCKER_HOST` environment variable is correctly configured in your shell so Testcontainers can locate it. + +The automated testing setup will look up a system property `postgresql.server.version` to determine which container image tag to use. +The property is injected from `pom.xml` by Maven Failsafe and use a reasonable fallback value if missing. +To test with a different version of PostgreSQL, you may set the Maven property `postgresql.server.version` for a run. + +### Example +Performance test classes must follow specific conventions to be discovered and executed correctly: + +1. **Package Location:** + Place your test class in `src/test/java`, mirroring the package structure of the code you want to test (e.g., `edu.harvard.iq.dataverse.export`). + This placement grants the test class access to package private members in `src/main/java`, which is often necessary when testing internal services directly without going through the full API layer. +2. **Naming Convention:** + Name the class `*IT.java` so that the Maven Failsafe plugin automatically picks it up during the `integration-test` phase. +3. **Setup Annotation:** + Annotate the class with `@JpaPerformanceTest` to have everything set up automatically for you. + A `JpaEntityManagerService` will be injected into a static class field for you, allowing interaction with a JPA Entity Manager. + +Below is a minimal, generic example [`SamplePerformanceIT`](/_static/developers/testing/SamplePerformanceIT.java) demonstrating the structure and how to run a transaction with or without a return value. + +```{literalinclude} /_static/developers/testing/SamplePerformanceIT.java +:name: sample-performance-test +:language: java +:start-at: // +``` + +### Understanding JpaEntityManagerService +The `JpaEntityManagerService` class abstracts away the boilerplate required to set up a JPA environment for testing. +Here is what it does under the hood: + +1. **Automatic PostgreSQL Server Setup:** + The involved JUnit Test Extension makes sure to create a single server instance to speed up test setups. + Nonetheless, any test class will run within its own database on the server, guaranteeing test database isolation. + +2. **Automatic Schema Generation:** + When you call `.start()` on a `JpaEntityManagerService` instance, it initializes an EclipseLink `EntityManagerFactory` configured to automatically generate the database schema (`schema-generation.database.action=create`). + This guarantees that every test run begins with a pristine database structure derived directly from your current JPA entity mappings. + You do not need to run Flyway migrations or seed the database beforehand. + +3. **Transaction Management:** + The service handles the lifecycle of JPA transactions automatically. + You simply pass a lambda to `inTransaction()` or `inTransactionVoid()`. + The service will: + 1. Create an `EntityManager` and begin a transaction. + 2. Execute your lambda. + 3. Commit the transaction on success, or roll it back if a `RuntimeException` is thrown. + 4. Close the `EntityManager` in a `finally` block to prevent resource leaks. + +4. **Query Statistics via Wrapped DataSource:** + To make it easy to profile ORM behavior, `JpaEntityManagerService` wraps the underlying PostgreSQL `DataSource` using a proxy that intercepts all SQL statements. + + By default, the proxy tracks query counts, which you can retrieve via `QueryCountHolder.getGrandTotal()`. + This provides immediate, programmatic insight into database efficiency without needing to parse verbose SQL logs. + It is particularly useful for: + - Verifying that a batch operation executes in a single query rather than a loop. + - Catching N+1 query problems by asserting on the number of `SELECT` statements. + + *Advanced Usage:* The default service only tracks query counts. + If you need detailed SQL logging (including bound parameters) or custom execution metrics, you can extend `JpaEntityManagerService` and register additional `StatementListener` implementations on the `ProxyDataSourceBuilder` during initialization. \ No newline at end of file diff --git a/pom.xml b/pom.xml index 0ee32227abc..0f3c78ebf5c 100644 --- a/pom.xml +++ b/pom.xml @@ -764,6 +764,18 @@ 3.0.0 test + + net.ttddyy + datasource-proxy + 1.11.0 + test + + + org.apache.commons + commons-dbcp2 + 2.14.0 + test + org.testcontainers testcontainers diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index 3ccf616a688..cb4539b685a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -1107,6 +1107,16 @@ public static JsonObjectBuilder json(DataVariable dv) { .add("fileEndPosition", dv.getFileEndPosition()) .add("recordSegmentNumber", dv.getRecordSegmentNumber()) .add("numberOfDecimalPoints",dv.getNumberOfDecimalPoints()) + // TODO: This potentially is a design flaw and huge code smell. + // VariableMetadata is versioned by (FileMetadata,DatasetVersion) in the (DataVariable,FileMetadata) pair. + // This is wrong output, as we were only interested in the one version we asked for. + // This is wasteful, as we load unrelated versions of the variable metadata. + // (For datasets with many variables and many versions, this is very bad.) + // This also leads to N+1 query expansions, as in the printing code we look for related entities id's and details. + // There are two code path leading to this: a) from exporting, b) from api.Files.getFileDataTables(). + // -> For exports, we only are interested in a single dataset / file metadata version. + // -> For the API call we probably want the full details? It seems SPA related - not sure if they should provide a version. + // .add("variableMetadata",jsonVarMetadata(dv.getVariableMetadatas())) .add("invalidRanges", dv.getInvalidRanges().isEmpty() ? null : JsonPrinter.jsonInvalidRanges(dv.getInvalidRanges())) .add("summaryStatistics", dv.getSummaryStatistics().isEmpty() ? null : JsonPrinter.jsonSumStat(dv.getSummaryStatistics())) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/HugeDatasetExportPerformanceIT.java b/src/test/java/edu/harvard/iq/dataverse/export/HugeDatasetExportPerformanceIT.java new file mode 100644 index 00000000000..1cf4c17495d --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/export/HugeDatasetExportPerformanceIT.java @@ -0,0 +1,89 @@ +package edu.harvard.iq.dataverse.export; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.util.testing.fixtures.DatasetFixtureBuilder; +import edu.harvard.iq.dataverse.util.testing.performance.JpaEntityManagerService; +import edu.harvard.iq.dataverse.util.testing.performance.JpaPerformanceTest; +import edu.harvard.iq.dataverse.util.testing.recipes.DatasetRecipe; +import edu.harvard.iq.dataverse.util.testing.recipes.DatasetTypeRecipe; +import edu.harvard.iq.dataverse.util.testing.recipes.FileRecipe; +import edu.harvard.iq.dataverse.util.testing.recipes.VersionRecipe; +import net.ttddyy.dsproxy.QueryCount; +import net.ttddyy.dsproxy.QueryCountHolder; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.time.Instant; +import java.time.temporal.ChronoUnit; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +@JpaPerformanceTest +class HugeDatasetExportPerformanceIT { + + static JpaEntityManagerService jpa; + + static Dataset regularFilesDataset; + + @BeforeAll + static void setUp() { + // The manual start is required here in case you need to configure any service features before starting... + jpa.start(); + + DatasetRecipe regularFiles = DatasetRecipe.of( + DatasetTypeRecipe.dataset(), + VersionRecipe.of( + FileRecipe.regular(1000) + ) + ); + + // Build the fixture + var regularFixture = DatasetFixtureBuilder.builder().recipe(regularFiles).build(); + + // Some entities need to be present in the database to appropriatly let the ORM create the mappings + jpa.inTransactionVoid(em -> em.persist(regularFixture.datasetType())); + + // Persist the actual dataset + regularFilesDataset = regularFixture.dataset(); + jpa.inTransactionVoid(em -> { + // DataFile has no cascade path from Dataset, so each file must be persisted explicitly before + // the dataset graph is flushed. + for (DataFile dataFile : regularFixture.dataFiles()) { + em.persist(dataFile); + } + em.persist(regularFilesDataset); + }); + } + + @Test + void shouldExportLargeDataset() { + Long datasetVersionId = regularFilesDataset.getId(); + + QueryCountHolder.clear(); + Instant start = Instant.now(); + + String json = jpa.inTransaction(em -> { + var datasetVersion = em.find(DatasetVersion.class, datasetVersionId); + assumeTrue(datasetVersion != null, "No dataset version available in DB. Check fixtures!"); + + InternalExportDataProvider provider = new InternalExportDataProvider(datasetVersion); + var details = provider.getDatasetFileDetails(); + return details.toString(); + }); + + assertNotNull(json); + + Instant end = Instant.now(); + + QueryCount count = QueryCountHolder.getGrandTotal(); + System.out.println("Elapsed ms: " + start.until(end, ChronoUnit.MILLIS)); + System.out.println("Total queries: " + count.getTotal()); + System.out.println("Select queries: " + count.getSelect()); + System.out.println("Insert queries: " + count.getInsert()); + System.out.println("Update queries: " + count.getUpdate()); + System.out.println("Delete queries: " + count.getDelete()); + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/Tags.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/Tags.java index 22e13f08665..9ba29404c8e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/testing/Tags.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/Tags.java @@ -5,4 +5,5 @@ public class Tags { public static final String INTEGRATION_TEST = "integration"; public static final String USES_TESTCONTAINERS = "testcontainers"; public static final String DB_MIGRATION_TEST = "migration"; + public static final String PERFORMANCE_TEST = "performance"; } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/BuildContext.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/BuildContext.java new file mode 100644 index 00000000000..f6019d63e9a --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/BuildContext.java @@ -0,0 +1,30 @@ +package edu.harvard.iq.dataverse.util.testing.fixtures; + +import java.sql.Timestamp; +import java.time.Instant; +import java.util.Date; + +/** + * Immutable build context shared across a single fixture build. + * + *

This object exists so populators and helpers do not have to depend on + * builder-internal types. As more cross-cutting build information is needed + * (for example version index, deterministic seed, or builder configuration), + * it can be added here without changing populator method signatures.

+ * + * @param sequence deterministic sequence number for the fixture instance + */ +public record BuildContext( + long sequence, + Instant now +) { + + Timestamp getTimestamp() { + return Timestamp.from(now); + } + + Date getDate() { + return Date.from(now); + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixture.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixture.java new file mode 100644 index 00000000000..f45ad6f7f91 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixture.java @@ -0,0 +1,58 @@ +package edu.harvard.iq.dataverse.util.testing.fixtures; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DataTable; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.dataset.DatasetType; +import edu.harvard.iq.dataverse.datavariable.DataVariable; +import edu.harvard.iq.dataverse.datavariable.VarGroup; +import edu.harvard.iq.dataverse.datavariable.VariableMetadata; + +import java.util.List; + +/** + * Immutable holder for a generated dataset fixture graph. + * + *

This object gives tests convenient access not only to the root + * {@link Dataset}, but also to the current {@link DatasetVersion} and all major + * generated child entities. That makes it easier to inspect, persist, or tweak + * the graph after building it.

+ * + *

The fixture currently represents a single dataset version. Multi-version + * support will be added in a later iteration via dedicated evolution recipes.

+ * + * @param dataset root dataset + * @param currentVersion current dataset version + * @param fileMetadatas generated file metadata objects + * @param dataFiles generated data files + * @param dataTables generated data tables + * @param dataVariables generated data variables + * @param varGroups generated variable groups + * @param variableMetadata generated variable metadata rows + */ +public record DatasetFixture( + Dataset dataset, + DatasetType datasetType, + DatasetVersion currentVersion, + List fileMetadatas, + List dataFiles, + List dataTables, + List dataVariables, + List varGroups, + List variableMetadata +) { + + /** + * Compact constructor performing defensive copies of collection components. + */ + public DatasetFixture { + fileMetadatas = List.copyOf(fileMetadatas); + dataFiles = List.copyOf(dataFiles); + dataTables = List.copyOf(dataTables); + dataVariables = List.copyOf(dataVariables); + varGroups = List.copyOf(varGroups); + variableMetadata = List.copyOf(variableMetadata); + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java new file mode 100644 index 00000000000..3f6175c30c5 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java @@ -0,0 +1,504 @@ +package edu.harvard.iq.dataverse.util.testing.fixtures; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DataTable; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.dataset.DatasetType; +import edu.harvard.iq.dataverse.datavariable.DataVariable; +import edu.harvard.iq.dataverse.datavariable.VarGroup; +import edu.harvard.iq.dataverse.datavariable.VariableMetadata; +import edu.harvard.iq.dataverse.util.testing.recipes.DatasetRecipe; +import edu.harvard.iq.dataverse.util.testing.recipes.FileBuildContext; +import edu.harvard.iq.dataverse.util.testing.recipes.FileRecipe; +import edu.harvard.iq.dataverse.util.testing.recipes.VariableMetadataBuildContext; +import edu.harvard.iq.dataverse.util.testing.recipes.VariableMetadataRecipe; +import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetBuildContext; +import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetRecipe; +import edu.harvard.iq.dataverse.util.testing.recipes.VersionRecipe; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Builder/wiring layer that consumes fixture recipes and produces a fully wired + * {@link Dataset} graph. + * + *

This class is intentionally responsible for relationship correctness and + * collection initialization, while recipes are responsible for deciding graph + * shape and populators are responsible for scalar-field initialization.

+ * + *

Current scope:

+ *
    + *
  • one dataset
  • + *
  • one (current) dataset version
  • + *
  • files created according to {@link FileRecipe}
  • + *
  • tabular structure created according to {@link VariableSetRecipe}
  • + *
  • variable metadata created according to {@link VariableMetadataRecipe}
  • + *
+ */ +public class DatasetFixtureBuilder { + + /** + * Process-wide deterministic sequence used to identify each built fixture. + * + *

This is intentionally static so values are unique even across multiple + * tests running in the same JVM. It is not meant to be reset between tests.

+ */ + private static final AtomicLong SEQUENCE = new AtomicLong(1); + + /** + * Group index used for the single var group we currently create per tabular file. + *

This will become recipe-driven once a {@code VarGroupRecipe} is introduced.

+ */ + private static final int FIRST_AND_ONLY_VAR_GROUP_INDEX = 0; + + private DatasetRecipe datasetRecipe; + private FixturePopulator populator = FixturePopulator.minimal(); + + /** + * Creates a new builder instance. + * + * @return a fresh fixture builder + */ + public static DatasetFixtureBuilder builder() { + return new DatasetFixtureBuilder(); + } + + /** + * Sets the recipe used to determine the graph shape. + * + * @param datasetRecipe dataset recipe to use + * @return this builder for fluent chaining + */ + public DatasetFixtureBuilder recipe(DatasetRecipe datasetRecipe) { + this.datasetRecipe = Objects.requireNonNull(datasetRecipe); + return this; + } + + /** + * Sets the scalar-field populator policy. + * + * @param populator populator to use + * @return this builder for fluent chaining + */ + public DatasetFixtureBuilder populator(FixturePopulator populator) { + this.populator = Objects.requireNonNull(populator); + return this; + } + + /** + * Builds a dataset fixture graph according to the configured recipe and populator. + * + *

The build process happens in clearly separated phases:

+ *
    + *
  1. create the root {@link Dataset} and its current {@link DatasetVersion}
  2. + *
  3. iterate over the configured file recipes and build each file (and, where applicable, + * its tabular subgraph)
  4. + *
  5. collect everything that was created so the {@link DatasetFixture} can expose it
  6. + *
+ * + * @return generated dataset fixture + */ + public DatasetFixture build() { + if (datasetRecipe == null) { + throw new IllegalStateException("A DatasetRecipe must be configured before building."); + } + Objects.requireNonNull(populator, "populator must not be null"); + + // One context per build, so populators can use deterministic information about this fixture instance. + BuildContext context = new BuildContext(SEQUENCE.getAndIncrement(), Instant.now()); + + // Create the top-level dataset and its current version, then wire them. + Dataset dataset = createEmptyDataset(context); + DatasetType datasetType = datasetRecipe.datasetTypeRecipe().datasetType(); + DatasetVersion currentVersion = createDatasetVersion(context); + wireDatasetAndVersion(dataset, currentVersion); + + // Accumulator collects everything we generate so we can expose it in the fixture. + BuildAccumulator accumulator = new BuildAccumulator(); + + // Walk the file recipes and create files (plus tabular structure where applicable). + buildVersionFiles(currentVersion, context, accumulator); + + return accumulator.toFixture(dataset, datasetType, currentVersion); + } + + /** + * Creates a {@link Dataset} with no implicit versions. + * + *

{@code Dataset} normally creates an initial version automatically. For fixtures we want + * full control over which versions exist, so we wipe that initial version before wiring.

+ * + * @param context fixture build context + * @return a freshly populated dataset with an empty version list + */ + private Dataset createEmptyDataset(BuildContext context) { + Dataset dataset = new Dataset(); + populator.populateDataset(dataset, context); + // DatasetType comes from the recipe, not the populator, because it is a shared + // reference entity that must pre-exist in the database. The recipe either wraps + // // a pre-existing instance or builds one from scalar values for this fixture. + dataset.setDatasetType(datasetRecipe.datasetTypeRecipe().datasetType()); + dataset.setVersions(new ArrayList<>()); + return dataset; + } + + /** + * Creates a {@link DatasetVersion} populated by the configured populator. + * + * @param context fixture build context + * @return a freshly populated dataset version + */ + private DatasetVersion createDatasetVersion(BuildContext context) { + DatasetVersion version = new DatasetVersion(); + populator.populateDatasetVersion(version, context); + return version; + } + + /** + * Iterates over all file recipes for the current version and builds each file in order. + * + *

Each file gets a globally unique index across all file recipes in the version. That + * keeps populator-generated values such as labels deterministic and unique across the + * whole version.

+ * + * @param currentVersion current dataset version receiving the files + * @param context fixture build context + * @param accumulator accumulator collecting all generated entities + */ + private void buildVersionFiles( + DatasetVersion currentVersion, + BuildContext context, + BuildAccumulator accumulator + ) { + VersionRecipe versionRecipe = datasetRecipe.currentVersionRecipe(); + List fileRecipes = versionRecipe.fileRecipes(); + + // Files within a single version need globally unique indices, even though each recipe + // describes its own count. We track that separately from the recipe-local index. + int globalFileIndex = 0; + + for (FileRecipe fileRecipe : fileRecipes) { + for (int fileIndex = 0; fileIndex < fileRecipe.fileCount(); fileIndex++, globalFileIndex++) { + FileBuildContext fileContext = new FileBuildContext(fileRecipe, globalFileIndex); + buildFile(currentVersion, fileRecipe, fileContext, context, accumulator); + } + } + } + + /** + * Builds a single file: a {@link DataFile} and its current {@link FileMetadata}, plus its + * tabular subgraph if the recipe says the file is tabular. + * + * @param currentVersion owning dataset version + * @param fileRecipe the file recipe describing this file + * @param fileContext context describing this individual file + * @param context fixture build context + * @param accumulator accumulator collecting all generated entities + */ + private void buildFile( + DatasetVersion currentVersion, + FileRecipe fileRecipe, + FileBuildContext fileContext, + BuildContext context, + BuildAccumulator accumulator + ) { + // Always create the data file plus its current-version file metadata. + DataFile dataFile = new DataFile(); + populator.populateDataFile(dataFile, fileContext, context); + + FileMetadata fileMetadata = new FileMetadata(); + populator.populateFileMetadata(fileMetadata, fileContext, context); + + wireFileMetadata(currentVersion, fileMetadata, dataFile); + accumulator.addDataFile(dataFile); + accumulator.addFileMetadata(fileMetadata); + + // Tabular structure is only created when the recipe says so. + if (fileRecipe instanceof FileRecipe.Tabular tabularRecipe) { + buildTabularStructure(tabularRecipe, fileContext, dataFile, fileMetadata, context, accumulator); + } + } + + /** + * Builds the tabular structure for a file: one {@link DataTable}, its variables, the + * variable metadata, and the var group. + * + * @param tabularRecipe tabular file recipe + * @param fileContext file build context + * @param dataFile owning data file + * @param fileMetadata current-version file metadata of the data file + * @param context fixture build context + * @param accumulator accumulator collecting all generated entities + */ + private void buildTabularStructure( + FileRecipe.Tabular tabularRecipe, + FileBuildContext fileContext, + DataFile dataFile, + FileMetadata fileMetadata, + BuildContext context, + BuildAccumulator accumulator + ) { + DataTable dataTable = new DataTable(); + populator.populateDataTable(dataTable, fileContext, context); + wireDataTable(dataFile, dataTable); + accumulator.addDataTable(dataTable); + + VariableSetBuildContext variableSetContext = + new VariableSetBuildContext(tabularRecipe, fileContext.fileIndex()); + + VariableSetRecipe variableSetRecipe = tabularRecipe.variableSetRecipe(); + int variableCount = variableSetRecipe.variableCount(variableSetContext); + + // Build all variables for this table, then optionally attach variable metadata + // to (FileMetadata, DataVariable) pairs that the recipe says should have it. + List fileVariables = buildVariables( + dataTable, + variableSetContext, + variableCount, + context, + accumulator + ); + + dataTable.setVarQuantity((long) variableCount); + + buildVariableMetadata( + fileMetadata, + fileVariables, + tabularRecipe, + variableSetRecipe.variableMetadataRecipe(), + fileContext.fileIndex(), + accumulator + ); + + // Currently every non-empty tabular file gets exactly one var group with all variables. + // This will become recipe-driven once we introduce a dedicated VarGroupRecipe. + if (!fileVariables.isEmpty()) { + buildVarGroup(fileMetadata, fileVariables, fileContext, context, accumulator); + } + } + + /** + * Creates the requested number of {@link DataVariable} entities and wires them to the table. + * + * @param dataTable owning data table + * @param variableSetContext variable-set build context + * @param variableCount number of variables to create + * @param context fixture build context + * @param accumulator accumulator collecting all generated entities + * @return the variables created for this file/table, in order + */ + private List buildVariables( + DataTable dataTable, + VariableSetBuildContext variableSetContext, + int variableCount, + BuildContext context, + BuildAccumulator accumulator + ) { + List fileVariables = new ArrayList<>(variableCount); + + for (int variableIndex = 0; variableIndex < variableCount; variableIndex++) { + DataVariable dataVariable = new DataVariable(); + populator.populateDataVariable(dataVariable, variableSetContext, variableIndex, context); + wireDataVariable(dataTable, dataVariable); + + fileVariables.add(dataVariable); + accumulator.addDataVariable(dataVariable); + } + + return fileVariables; + } + + /** + * Creates {@link VariableMetadata} rows for the (file metadata, variable) pairs the recipe + * says should receive metadata. + * + *

Each metadata entity links one {@link DataVariable} and one {@link FileMetadata}. + * Because the schema enforces uniqueness on that pair, we create at most one metadata + * row per variable for the given file metadata.

+ * + * @param fileMetadata file metadata for the current version + * @param fileVariables variables in the file's tabular structure + * @param tabularRecipe tabular file recipe + * @param metadataRecipe variable-metadata recipe deciding which pairs get metadata + * @param fileIndex zero-based file index + * @param accumulator accumulator collecting all generated entities + */ + private void buildVariableMetadata( + FileMetadata fileMetadata, + List fileVariables, + FileRecipe.Tabular tabularRecipe, + VariableMetadataRecipe metadataRecipe, + int fileIndex, + BuildAccumulator accumulator + ) { + for (int variableIndex = 0; variableIndex < fileVariables.size(); variableIndex++) { + VariableMetadataBuildContext metadataContext = + new VariableMetadataBuildContext(tabularRecipe, fileIndex, variableIndex); + + if (!metadataRecipe.createFor(metadataContext)) { + continue; + } + + DataVariable variable = fileVariables.get(variableIndex); + VariableMetadata metadata = new VariableMetadata(variable, fileMetadata); + populator.populateVariableMetadata(metadata, metadataContext); + + wireVariableMetadata(fileMetadata, variable, metadata); + accumulator.addVariableMetadata(metadata); + } + } + + /** + * Creates a {@link VarGroup} containing all variables of a tabular file and attaches it + * to the file metadata. + * + * @param fileMetadata file metadata receiving the var group + * @param fileVariables variables in the file's tabular structure + * @param fileContext file build context + * @param context fixture build context + * @param accumulator accumulator collecting all generated entities + */ + private void buildVarGroup( + FileMetadata fileMetadata, + List fileVariables, + FileBuildContext fileContext, + BuildContext context, + BuildAccumulator accumulator + ) { + VarGroup varGroup = new VarGroup(); + populator.populateVarGroup(varGroup, fileContext, FIRST_AND_ONLY_VAR_GROUP_INDEX, context); + wireVarGroup(fileMetadata, varGroup, fileVariables); + accumulator.addVarGroup(varGroup); + } + + /** + * Wires a dataset and its current version together. + * + * @param dataset dataset root + * @param version current dataset version + */ + private void wireDatasetAndVersion(Dataset dataset, DatasetVersion version) { + version.setDataset(dataset); + dataset.getVersions().add(version); + } + + /** + * Wires file metadata to its dataset version and underlying data file. + * + * @param datasetVersion owning dataset version + * @param fileMetadata file metadata to wire + * @param dataFile data file to wire + */ + private void wireFileMetadata(DatasetVersion datasetVersion, FileMetadata fileMetadata, DataFile dataFile) { + fileMetadata.setDatasetVersion(datasetVersion); + fileMetadata.setDataFile(dataFile); + datasetVersion.getFileMetadatas().add(fileMetadata); + dataFile.getFileMetadatas().add(fileMetadata); + } + + /** + * Wires a data table to its data file. + * + * @param dataFile parent data file + * @param dataTable child data table + */ + private void wireDataTable(DataFile dataFile, DataTable dataTable) { + dataTable.setDataFile(dataFile); + dataFile.getDataTables().add(dataTable); + } + + /** + * Wires a data variable to its data table. + * + * @param dataTable parent data table + * @param dataVariable child data variable + */ + private void wireDataVariable(DataTable dataTable, DataVariable dataVariable) { + dataVariable.setDataTable(dataTable); + dataTable.getDataVariables().add(dataVariable); + } + + /** + * Wires a variable group to file metadata and assigns the supplied variables to that group. + * + * @param fileMetadata owning file metadata + * @param varGroup variable group to wire + * @param variables variables to include in the group + */ + private void wireVarGroup(FileMetadata fileMetadata, VarGroup varGroup, List variables) { + varGroup.setFileMetadata(fileMetadata); + varGroup.getVarsInGroup().addAll(variables); + fileMetadata.getVarGroups().add(varGroup); + } + + /** + * Wires a variable metadata row into both inverse collections (file metadata and variable). + * + * @param fileMetadata file metadata side of the pair + * @param variable variable side of the pair + * @param metadata variable metadata to wire + */ + private void wireVariableMetadata(FileMetadata fileMetadata, DataVariable variable, VariableMetadata metadata) { + fileMetadata.getVariableMetadatas().add(metadata); + variable.getVariableMetadatas().add(metadata); + } + + /** + * Internal accumulator collecting all generated entities so the fixture can expose them. + * + *

This keeps the build helper methods compact and avoids passing many lists around.

+ */ + private static final class BuildAccumulator { + + private final List fileMetadatas = new ArrayList<>(); + private final List dataFiles = new ArrayList<>(); + private final List dataTables = new ArrayList<>(); + private final List dataVariables = new ArrayList<>(); + private final List varGroups = new ArrayList<>(); + private final List variableMetadata = new ArrayList<>(); + + void addFileMetadata(FileMetadata fileMetadata) { + fileMetadatas.add(fileMetadata); + } + + void addDataFile(DataFile dataFile) { + dataFiles.add(dataFile); + } + + void addDataTable(DataTable dataTable) { + dataTables.add(dataTable); + } + + void addDataVariable(DataVariable dataVariable) { + dataVariables.add(dataVariable); + } + + void addVarGroup(VarGroup varGroup) { + varGroups.add(varGroup); + } + + void addVariableMetadata(VariableMetadata metadata) { + variableMetadata.add(metadata); + } + + DatasetFixture toFixture(Dataset dataset, DatasetType datasetType, DatasetVersion currentVersion) { + return new DatasetFixture( + dataset, + datasetType, + currentVersion, + fileMetadatas, + dataFiles, + dataTables, + dataVariables, + varGroups, + variableMetadata + ); + } + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java new file mode 100644 index 00000000000..254295d8c3b --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java @@ -0,0 +1,81 @@ +package edu.harvard.iq.dataverse.util.testing.fixtures; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.branding.BrandingUtilTest; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.json.JsonPrinter; +import edu.harvard.iq.dataverse.util.testing.recipes.DatasetRecipe; +import edu.harvard.iq.dataverse.util.testing.recipes.DatasetTypeRecipe; +import edu.harvard.iq.dataverse.util.testing.recipes.FileRecipe; +import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetRecipe; +import edu.harvard.iq.dataverse.util.testing.recipes.VersionRecipe; +import jakarta.json.Json; +import jakarta.json.JsonArrayBuilder; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.time.Duration; +import java.time.Instant; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +class DatasetFixtureTest { + + @BeforeAll + static void setUp() { + BrandingUtilTest.setupMocks(); + // Let MPCONFIG init and cache the lookup classes + JvmSettings.PREFIX.lookupOptional(); + } + + @AfterAll + static void tearDown() { + BrandingUtilTest.tearDownMocks(); + } + + @Test + void smoketest() { + + var recipe = DatasetRecipe.of( + DatasetTypeRecipe.dataset(), + VersionRecipe.of( + FileRecipe.tabular(10, VariableSetRecipe.uniform(10)), + //FileRecipe.tabular(50, VariableSetRecipe.byPredicate()), + //FileRecipe.tabular(50, VariableSetRecipe.byRandom(10, 1000, 12345)), + FileRecipe.regular(1) + ) + ); + + Instant start = Instant.now(); + + var fixture = DatasetFixtureBuilder.builder() + .recipe(recipe) + .populator(FixturePopulator.minimal()) + .build(); + + Instant finish = Instant.now(); + System.out.println("build: " + Duration.between(start, finish).toMillis() + " msec"); + + start = Instant.now(); + + JsonArrayBuilder jab = Json.createArrayBuilder(); + for (FileMetadata fileMetadata : fixture.fileMetadatas()) { + DataFile dataFile = fileMetadata.getDataFile(); + jab.add(JsonPrinter.json(dataFile, fileMetadata, true)); + } + var result = jab.build(); + + finish = Instant.now(); + System.out.println("convert: " + Duration.between(start, finish).toMillis() + " msec"); + + assertNotNull(result); + + start = Instant.now(); + result.toString(); + finish = Instant.now(); + System.out.println("print: " + Duration.between(start, finish).toMillis() + " msec"); + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/FixturePopulator.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/FixturePopulator.java new file mode 100644 index 00000000000..87d2189c1e9 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/FixturePopulator.java @@ -0,0 +1,124 @@ +package edu.harvard.iq.dataverse.util.testing.fixtures; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DataTable; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.datavariable.DataVariable; +import edu.harvard.iq.dataverse.datavariable.VarGroup; +import edu.harvard.iq.dataverse.datavariable.VariableMetadata; +import edu.harvard.iq.dataverse.util.testing.recipes.FileBuildContext; +import edu.harvard.iq.dataverse.util.testing.recipes.VariableMetadataBuildContext; +import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetBuildContext; + +/** + * Populator interface responsible for initializing scalar/non-relationship fields of + * generated fixture entities. + * + *

The builder/wiring layer is responsible for graph structure and + * relationship correctness. This population layer is responsible for making sure + * entities are also "safe enough" to serialize and persist by filling required + * or null-sensitive scalar fields and collections.

+ * + *

This separation keeps shape decisions in recipes and scalar defaults here.

+ */ +public interface FixturePopulator { + + /** + * Populates scalar fields and safe defaults for a dataset. + * + * @param dataset dataset being initialized + * @param context fixture build context + */ + void populateDataset(Dataset dataset, BuildContext context); + + /** + * Populates scalar fields and safe defaults for a dataset version. + * + * @param version dataset version being initialized + * @param context fixture build context + */ + void populateDatasetVersion(DatasetVersion version, BuildContext context); + + /** + * Populates scalar fields and safe defaults for file metadata. + * + * @param fileMetadata file metadata being initialized + * @param fileBuildContext file build context + * @param context fixture build context + */ + void populateFileMetadata(FileMetadata fileMetadata, FileBuildContext fileBuildContext, BuildContext context); + + /** + * Populates scalar fields and safe defaults for a data file. + * + * @param dataFile data file being initialized + * @param fileBuildContext file build context + * @param context fixture build context + */ + void populateDataFile(DataFile dataFile, FileBuildContext fileBuildContext, BuildContext context); + + /** + * Populates scalar fields and safe defaults for a data table. + * + * @param dataTable data table being initialized + * @param fileBuildContext file build context + * @param context fixture build context + */ + void populateDataTable(DataTable dataTable, FileBuildContext fileBuildContext, BuildContext context); + + /** + * Populates scalar fields and safe defaults for a data variable. + * + * @param dataVariable data variable being initialized + * @param variableBuildContext variable set build context + * @param variableIndex zero-based variable index within the file/table + * @param context fixture build context + */ + void populateDataVariable( + DataVariable dataVariable, + VariableSetBuildContext variableBuildContext, + int variableIndex, + BuildContext context + ); + + /** + * Populates scalar fields and safe defaults for metadata of a variable. + * + * @param metadata variable metadata being initialized + * @param variableMetadataBuildContext variable metadata build context + */ + void populateVariableMetadata( + VariableMetadata metadata, + VariableMetadataBuildContext variableMetadataBuildContext + ); + + /** + * Populates scalar fields and safe defaults for a variable group. + * + * @param varGroup var group being initialized + * @param fileBuildContext file build context + * @param groupIndex zero-based group index within the file + * @param context fixture build context + */ + void populateVarGroup( + VarGroup varGroup, + FileBuildContext fileBuildContext, + int groupIndex, + BuildContext context + ); + + /** + * Returns a deterministic, minimal-safe entity populator. + * + *

This implementation is intentionally conservative. It sets enough fields + * for fixture graphs to be usable in persistence and serialization tests, + * without trying to simulate realistic production content yet.

+ * + * @return standard, minimalized, and deterministic field populator + */ + static FixturePopulator minimal() { + return new MinimalPopulator(); + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java new file mode 100644 index 00000000000..7add6a5b4b4 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java @@ -0,0 +1,180 @@ +package edu.harvard.iq.dataverse.util.testing.fixtures; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DataTable; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess; +import edu.harvard.iq.dataverse.datavariable.DataVariable; +import edu.harvard.iq.dataverse.datavariable.VarGroup; +import edu.harvard.iq.dataverse.datavariable.VariableMetadata; +import edu.harvard.iq.dataverse.util.testing.recipes.FileBuildContext; +import edu.harvard.iq.dataverse.util.testing.recipes.VariableMetadataBuildContext; +import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetBuildContext; + +import java.util.ArrayList; +import java.util.HashSet; + +public final class MinimalPopulator implements FixturePopulator { + + /** + * Populates basic dataset scalar fields. + * + * @param dataset dataset being initialized + * @param context fixture build context + */ + @Override + public void populateDataset(Dataset dataset, BuildContext context) { + dataset.setProtocol("doi"); + dataset.setAuthority("10.5072"); + dataset.setIdentifier("fixture-dataset-" + context.sequence()); + dataset.setStorageIdentifier("fixture-storage-" + context.sequence()); + + // necessary as DvObject says "not nullable" + dataset.setCreateDate(context.getTimestamp()); + dataset.setModificationTime(context.getTimestamp()); + } + + /** + * Populates basic dataset-version scalar fields, timestamps, and terms. + * + * @param version dataset version being initialized + * @param context fixture build context + */ + @Override + public void populateDatasetVersion(DatasetVersion version, BuildContext context) { + version.setVersionNumber(1L); + version.setMinorVersionNumber(0L); + version.setVersionState(DatasetVersion.VersionState.DRAFT); + version.setVersionNote("fixture-version"); + version.setCreateTime(context.getDate()); + version.setLastUpdateTime(context.getDate()); + + // TermsOfUseAndAccess and DatasetVersion are mutually linked via a OneToOne. + // The validator reads datasetVersion from the terms object, so both sides + // must be wired before the entity graph is persisted. + TermsOfUseAndAccess terms = new TermsOfUseAndAccess(); + terms.setDatasetVersion(version); + version.setTermsOfUseAndAccess(terms); + } + + /** + * Populates basic file-metadata scalar fields. + * + * @param fileMetadata file metadata being initialized + * @param fileBuildContext file build context + * @param context fixture build context + */ + @Override + public void populateFileMetadata(FileMetadata fileMetadata, FileBuildContext fileBuildContext, BuildContext context) { + fileMetadata.setLabel("file-" + fileBuildContext.fileIndex() + ".tab"); + fileMetadata.setDescription("Fixture file " + fileBuildContext.fileIndex()); + fileMetadata.setVarGroups(new ArrayList<>()); + fileMetadata.setVariableMetadatas(new ArrayList<>()); + } + + /** + * Populates basic data-file scalar fields and null-sensitive defaults. + * + * @param dataFile data file being initialized + * @param fileBuildContext file build context + * @param context fixture build context + */ + @Override + public void populateDataFile(DataFile dataFile, FileBuildContext fileBuildContext, BuildContext context) { + dataFile.setContentType("text/tab-separated-values"); + dataFile.setChecksumType(DataFile.ChecksumType.SHA1); + dataFile.setChecksumValue("fixture-checksum-" + fileBuildContext.fileIndex()); + dataFile.setFilesize(1024L + fileBuildContext.fileIndex()); + dataFile.setDataTables(new ArrayList<>()); + dataFile.setFileMetadatas(new ArrayList<>()); + dataFile.setTags(new ArrayList<>()); + + // necessary as DvObject says "not nullable" + dataFile.setCreateDate(context.getTimestamp()); + dataFile.setModificationTime(context.getTimestamp()); + } + + /** + * Populates basic data-table scalar fields and variable collection defaults. + * + * @param dataTable data table being initialized + * @param fileBuildContext file build context + * @param context fixture build context + */ + @Override + public void populateDataTable(DataTable dataTable, FileBuildContext fileBuildContext, BuildContext context) { + dataTable.setVarQuantity(0L); + dataTable.setCaseQuantity(100L); + dataTable.setRecordsPerCase(1L); + dataTable.setUnf("UNF:fixture-table-" + fileBuildContext.fileIndex()); + dataTable.setDataVariables(new ArrayList<>()); + dataTable.setOriginalFileFormat("text/tab-separated-values"); + dataTable.setOriginalFileName("fixture-original-" + fileBuildContext.fileIndex() + ".tab"); + dataTable.setOriginalFileSize(2048L + fileBuildContext.fileIndex()); + } + + /** + * Populates basic data-variable scalar fields and initializes collections + * that are null-sensitive in serialization. + * + * @param dataVariable data variable being initialized + * @param variableSetBuildContext larger context of the data variable being populated + * @param variableIndex zero-based variable index within the file/table + * @param context fixture build context + */ + @Override + public void populateDataVariable( + DataVariable dataVariable, + VariableSetBuildContext variableSetBuildContext, + int variableIndex, + BuildContext context + ) { + dataVariable.setName("var_" + variableSetBuildContext.fileIndex() + "_" + variableIndex); + dataVariable.setLabel("Variable " + variableSetBuildContext.fileIndex() + "/" + variableIndex); + dataVariable.setType(DataVariable.VariableType.NUMERIC); + dataVariable.setFileOrder(variableIndex); + dataVariable.setUnf("UNF:fixture-var-" + variableSetBuildContext.fileIndex() + "-" + variableIndex); + dataVariable.setInvalidRanges(new ArrayList<>()); + dataVariable.setSummaryStatistics(new ArrayList<>()); + dataVariable.setCategories(new ArrayList<>()); + dataVariable.setVariableMetadatas(new ArrayList<>()); + dataVariable.setInvalidRangeItems(new ArrayList<>()); + } + + /** + * Populates metadata for a data variable. Updates the label with a unique identifier + * generated based on the provided build context. + * + * @param metadata the variable metadata object to be populated + * @param variableMetadataBuildContext the context containing information about + * the variable, including file and variable indices + */ + @Override + public void populateVariableMetadata(VariableMetadata metadata, VariableMetadataBuildContext variableMetadataBuildContext) { + metadata.setLabel("variable-metadata-" + variableMetadataBuildContext.fileIndex() + + "-" + variableMetadataBuildContext.variableIndex()); + } + + /** + * Populates basic variable-group scalar fields and initializes the backing + * variable set. + * + * @param varGroup var group being initialized + * @param fileBuildContext file build context + * @param groupIndex zero-based group index within the file + * @param context fixture build context + */ + @Override + public void populateVarGroup( + VarGroup varGroup, + FileBuildContext fileBuildContext, + int groupIndex, + BuildContext context + ) { + varGroup.setLabel("group-" + fileBuildContext.fileIndex() + "-" + groupIndex); + varGroup.setVarsInGroup(new HashSet<>()); + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaEntityManagerService.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaEntityManagerService.java new file mode 100644 index 00000000000..a2b9927638e --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaEntityManagerService.java @@ -0,0 +1,135 @@ +package edu.harvard.iq.dataverse.util.testing.performance; + +import jakarta.persistence.EntityManager; +import jakarta.persistence.EntityManagerFactory; +import jakarta.persistence.EntityTransaction; +import jakarta.persistence.Persistence; +import net.ttddyy.dsproxy.support.ProxyDataSourceBuilder; + +import javax.sql.DataSource; +import java.sql.Connection; +import java.sql.SQLException; +import java.util.HashMap; +import java.util.Map; +import java.util.function.Consumer; +import java.util.function.Function; + +/** + * Service class managing the lifecycle and operations of an {@link EntityManagerFactory} + * for JPA-based persistence. This class is responsible for configuring the persistence + * unit, initializing the factory, and providing utility methods to interact with JPA + * entities within transactions. + * + * Implementation contracts: + * - The service must be explicitly started with the {@code start()} method before usage. + * - Resources are properly released when the service is closed via the {@code close()} method. + * - Transactions are managed and isolated when executing database operations. + * + * Use cases: + * - Configure and initialize an {@link EntityManagerFactory} with a non-JTA datasource. + * - Manage entity operations within transactions, supporting both functional and void work units. + * - Validate the underlying datasource and factory to ensure system integrity. + */ +public class JpaEntityManagerService implements AutoCloseable { + + public static final String PERSISTENCE_UNIT = "VDCNet-ejbPU-test"; + + private final DataSource baseDataSource; + private DataSource proxiedDataSource; + private EntityManagerFactory emf; + + public JpaEntityManagerService(DataSource dataSource) { + this.baseDataSource = dataSource; + } + + public void start() { + if (emf != null) { + throw new IllegalStateException("JpaEntityManagerService has already been started."); + } + + proxiedDataSource = ProxyDataSourceBuilder.create() + .dataSource(baseDataSource) + .countQuery() + .buildProxy(); + + validateDataSource(proxiedDataSource); + + Map properties = new HashMap<>(); + properties.put("jakarta.persistence.nonJtaDataSource", proxiedDataSource); + properties.put("jakarta.persistence.schema-generation.database.action", "create"); + + emf = Persistence.createEntityManagerFactory(PERSISTENCE_UNIT, properties); + + validateEntityManagerFactory(); + } + + public DataSource getDataSource() { + ensureStarted(); + return proxiedDataSource; + } + + public EntityManager createEntityManager() { + ensureStarted(); + return emf.createEntityManager(); + } + + public EntityManagerFactory getEntityManagerFactory() { + ensureStarted(); + return emf; + } + + public T inTransaction(Function work) { + EntityManager em = createEntityManager(); + EntityTransaction tx = em.getTransaction(); + try { + tx.begin(); + T result = work.apply(em); + tx.commit(); + return result; + } catch (RuntimeException e) { + if (tx.isActive()) { + tx.rollback(); + } + throw e; + } finally { + em.close(); + } + } + + public void inTransactionVoid(Consumer work) { + inTransaction(em -> { + work.accept(em); + return null; + }); + } + + private void validateDataSource(DataSource dataSource) { + try (Connection connection = dataSource.getConnection()) { + if (!connection.isValid(5)) { + throw new IllegalStateException("DataSource connection is not valid"); + } + } catch (SQLException e) { + throw new IllegalStateException("Failed to validate DataSource", e); + } + } + + private void validateEntityManagerFactory() { + EntityManager entityManager = emf.createEntityManager(); + entityManager.close(); + } + + private void ensureStarted() { + if (emf == null) { + throw new IllegalStateException("JpaEntityManagerService has not been started yet - did you run .start()?"); + } + } + + @Override + public void close() { + if (emf != null && emf.isOpen()) { + emf.close(); + } + emf = null; + proxiedDataSource = null; + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaPerformanceTest.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaPerformanceTest.java new file mode 100644 index 00000000000..6f11183307a --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaPerformanceTest.java @@ -0,0 +1,34 @@ +package edu.harvard.iq.dataverse.util.testing.performance; + +import edu.harvard.iq.dataverse.util.testing.Tags; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.testcontainers.junit.jupiter.Testcontainers; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Marker annotation for JPA performance tests using Testcontainers. + *

+ * Applies automatic tags, enforces Testcontainers availability (skips if Docker is missing), + * and registers a custom extension to manage a shared PostgreSQL container and database isolation. + *

+ * Contract: Test classes using this annotation MUST declare a {@code static JpaEntityManagerService} field. + * Note: Due to the underlying extension's shared container management, test classes annotated with this + * will execute sequentially to prevent container state races. + */ +@Target(ElementType.TYPE) +@Retention(RetentionPolicy.RUNTIME) +@Tag(Tags.PERFORMANCE_TEST) +@Tag(Tags.USES_TESTCONTAINERS) +@Testcontainers(disabledWithoutDocker = true) +@ExtendWith(JpaPerformanceTestExtension.class) +// Make sure the test methods are never run in parallel - this would be bad for a performance test... +@Execution(ExecutionMode.SAME_THREAD) +public @interface JpaPerformanceTest { +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaPerformanceTestExtension.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaPerformanceTestExtension.java new file mode 100644 index 00000000000..997da6aab84 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaPerformanceTestExtension.java @@ -0,0 +1,135 @@ +package edu.harvard.iq.dataverse.util.testing.performance; + +import org.apache.commons.dbcp2.BasicDataSource; +import org.junit.jupiter.api.extension.AfterAllCallback; +import org.junit.jupiter.api.extension.BeforeAllCallback; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.testcontainers.postgresql.PostgreSQLContainer; + +import java.lang.reflect.Field; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.UUID; + +import static java.lang.reflect.Modifier.isStatic; + +/** + * JUnit 5 Extension that manages a shared PostgreSQL container for performance tests. + * It ensures a unique database is created for each test class to guarantee isolation. + */ +public class JpaPerformanceTestExtension implements BeforeAllCallback, AfterAllCallback { + + // Global shared container + private static PostgreSQLContainer sharedContainer; + + // This lock makes sure all tests using this extension are executed sequentially. + // For performance tests, executing test classes in parallel for the same, shared DB instance makes no sense. + // There is no JUnit way to express such a "global lock", thus we need to do this manually. + // Note: avoiding parallelism of test methods are done by the @JpaPerformanceTest annotation. + private static final Object CONTAINER_LOCK = new Object(); + + // Store the service instance to close it in AfterAll + private static final String SERVICE_FIELD_KEY = "jpa.service.instance"; + + @Override + public void beforeAll(ExtensionContext context) throws Exception { + // 1. Ensure the global container is running + ensureSharedContainerRunning(); + + // 2. Create a unique database for this test class + String uniqueDbName = "perf_test_" + UUID.randomUUID().toString().substring(0, 8); + createDatabase(uniqueDbName); + + // 3. Retrieve the JPA Service and inject into the test class field + JpaEntityManagerService service = getService(uniqueDbName); + injectService(context, service); + + // 4. Store reference for cleanup + context.getStore(ExtensionContext.Namespace.GLOBAL).put(SERVICE_FIELD_KEY, service); + } + + @Override + public void afterAll(ExtensionContext context) { + // Close the EntityManagerFactory and connections + JpaEntityManagerService service = (JpaEntityManagerService) context.getStore(ExtensionContext.Namespace.GLOBAL).get(SERVICE_FIELD_KEY); + if (service != null) { + try { + service.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } + // Note: We do NOT stop the sharedContainer here. + // It stays running for the next test class. + } + + // --- Helper Methods --- + + private void ensureSharedContainerRunning() { + synchronized (CONTAINER_LOCK) { + if (sharedContainer == null || !sharedContainer.isRunning()) { + String pgVersion = System.getProperty("postgresql.server.version", "16"); + sharedContainer = new PostgreSQLContainer("postgres:" + pgVersion); + sharedContainer.start(); + } + } + } + + private void createDatabase(String dbName) { + try (Connection conn = DriverManager.getConnection( + sharedContainer.getJdbcUrl(), + sharedContainer.getUsername(), + sharedContainer.getPassword())) { + + // Postgres requires auto-commit to be true for CREATE DATABASE + conn.setAutoCommit(true); + Statement stmt = conn.createStatement(); + stmt.execute("CREATE DATABASE " + dbName); + } catch (SQLException e) { + // Ignore if DB already exists (unlikely with UUID, but safe) + if (!e.getMessage().contains("already exists")) { + throw new RuntimeException("Failed to create test database: " + dbName, e); + } + } + } + + private void injectService(ExtensionContext context, JpaEntityManagerService service) throws Exception { + Class testClass = context.getRequiredTestClass(); + boolean hasBeenInjected = false; + + // Look for a static field of type JpaService + for (Field field : testClass.getDeclaredFields()) { + if (field.getType() == JpaEntityManagerService.class) { + if (!isStatic(field.getModifiers())) { + throw new RuntimeException("Cannot inject into field '" + field.getName() + "' of class '" + testClass.getName() + "': not a static field"); + } + if (hasBeenInjected) { + throw new RuntimeException("Cannot inject into field '" + field.getName() + "' of class '" + testClass.getName() + "': only one target field allowed"); + } + field.setAccessible(true); + field.set(null, service); + hasBeenInjected = true; + } + } + + if (!hasBeenInjected) { + throw new RuntimeException("Could not inject into a static field of class '" + testClass.getName() + "': no field found"); + } + } + + private static JpaEntityManagerService getService(String uniqueDbName) { + // Tune the URL as we need to apply our unique DB name (the container has a default one we override) + String tunedJdbcUrl = sharedContainer.getJdbcUrl() + .replaceFirst("/" + sharedContainer.getDatabaseName(), "/" + uniqueDbName); + + // Configure a pooled (!) DataSource for this unique database + BasicDataSource dataSource = new BasicDataSource(); + dataSource.setUrl(tunedJdbcUrl); + dataSource.setUsername(sharedContainer.getUsername()); + dataSource.setPassword(sharedContainer.getPassword()); + + return new JpaEntityManagerService(dataSource); + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetRecipe.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetRecipe.java new file mode 100644 index 00000000000..3c01e275ad9 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetRecipe.java @@ -0,0 +1,57 @@ +package edu.harvard.iq.dataverse.util.testing.recipes; + +import java.util.Objects; + +/** + * Top-level recipe describing how to construct a {@code Dataset} fixture. + * + *

This is intentionally rooted at the dataset level rather than the dataset + * version level, so the fixture system can later support scenarios involving + * multiple versions, different current-version shapes, and dataset-level + * performance tests.

+ * + *

For the initial implementation, a dataset recipe exposes exactly one + * "current version" recipe. This keeps the model simple while leaving room + * to evolve later.

+ */ +public interface DatasetRecipe { + + /** + * Returns the dataset type recipe providing the type to assign. + * + * @return dataset type recipe + */ + DatasetTypeRecipe datasetTypeRecipe(); + + /** + * Returns the recipe describing the current version of the dataset. + * + * @return recipe for the current dataset version + */ + VersionRecipe currentVersionRecipe(); + + /** + * Creates a dataset recipe with the supplied type and version recipes. + * + * @param datasetTypeRecipe recipe providing the dataset type + * @param currentVersionRecipe recipe for the current dataset version + * @return a dataset recipe + */ + static DatasetRecipe of(DatasetTypeRecipe datasetTypeRecipe, VersionRecipe currentVersionRecipe) { + Objects.requireNonNull(datasetTypeRecipe, "datasetTypeRecipe must not be null"); + Objects.requireNonNull(currentVersionRecipe, "currentVersionRecipe must not be null"); + return new SimpleDatasetRecipe(datasetTypeRecipe, currentVersionRecipe); + } + + /** + * Minimal immutable implementation of {@link DatasetRecipe}. + * + * @param datasetTypeRecipe recipe providing the dataset type + * @param currentVersionRecipe recipe for the current dataset version + */ + record SimpleDatasetRecipe( + DatasetTypeRecipe datasetTypeRecipe, + VersionRecipe currentVersionRecipe + ) implements DatasetRecipe { + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetTypeRecipe.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetTypeRecipe.java new file mode 100644 index 00000000000..8a78f37b2b1 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetTypeRecipe.java @@ -0,0 +1,93 @@ +package edu.harvard.iq.dataverse.util.testing.recipes; + +import edu.harvard.iq.dataverse.dataset.DatasetType; + +/** + * Recipe providing the {@link DatasetType} to assign to a generated dataset fixture. + * + *

Unlike structural recipes such as {@link VersionRecipe} or {@link FileRecipe}, + * this is not a construction recipe. It is a reference/creation provider — the + * dataset type it produces is expected to be persisted before the dataset fixture + * is committed to the database.

+ * + *

Two factory styles are available:

+ *
    + *
  • {@link #of(String, String, String)} — fluent factory that creates a new + * {@link DatasetType} from scalar values. Use this when generating a single + * dataset fixture and you want a self-contained recipe.
  • + *
  • {@link #of(DatasetType)} — wraps a pre-existing instance. Use this when + * you want to share the same type across multiple dataset recipes, or when + * the type has already been persisted elsewhere.
  • + *
+ */ +public interface DatasetTypeRecipe { + + /** + * Returns the dataset type to assign to the generated dataset. + * + *

The returned instance may be newly created or pre-existing, depending on + * the implementation. Either way, it must be persisted before the dataset + * fixture is committed to the database.

+ * + * @return dataset type instance + */ + DatasetType datasetType(); + + /** + * Creates a recipe that builds a new {@link DatasetType} from the supplied + * scalar values. + * + *

This is the preferred factory for single-dataset fixture scenarios where + * the type does not need to be reused or pre-built externally. The resulting + * type will need to be persisted before the dataset is committed.

+ * + * @param name machine-readable name used in APIs and stored in the database + * @param displayName human-readable name shown in the UI + * @param description optional description of the dataset type + * @return a dataset type recipe producing a new type from the supplied values + */ + static DatasetTypeRecipe of(String name, String displayName, String description) { + DatasetType datasetType = new DatasetType(); + datasetType.setName(name); + datasetType.setDisplayName(displayName); + datasetType.setDescription(description); + return new FixedDatasetTypeRecipe(datasetType); + } + + /** + * Creates a recipe that wraps a pre-existing {@link DatasetType} instance. + * + *

Use this when the type has already been persisted, or when you want to + * share the same type instance across multiple dataset recipes.

+ * + * @param datasetType pre-existing dataset type to use + * @return a dataset type recipe wrapping the supplied instance + */ + static DatasetTypeRecipe of(DatasetType datasetType) { + return new FixedDatasetTypeRecipe(datasetType); + } + + /** + * Creates a recipe using the standard {@value DatasetType#DATASET_TYPE_DATASET} + * dataset type with sensible display defaults. + * + *

This is a convenience shortcut for the most common fixture scenario, + * where you just need a valid persisted type and do not care about specific + * type semantics.

+ * + * @return a dataset type recipe for the default dataset type + */ + static DatasetTypeRecipe dataset() { + return of(DatasetType.DATASET_TYPE_DATASET, "Dataset", "Standard dataset type for fixtures"); + } + + /** + * Minimal immutable recipe holding a fixed dataset type instance. + * + * @param datasetType dataset type to return + */ + record FixedDatasetTypeRecipe( + DatasetType datasetType + ) implements DatasetTypeRecipe { + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/FileBuildContext.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/FileBuildContext.java new file mode 100644 index 00000000000..cad16f95055 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/FileBuildContext.java @@ -0,0 +1,15 @@ +package edu.harvard.iq.dataverse.util.testing.recipes; + +/** + * Context object supplied while deciding how to build a file fixture. + * + *

For now this context only exposes the file index and the recipe which ordered the creation of the file. + * It exists as a dedicated type, so the API can grow later without constantly changing method signatures.

+ * + * @param fileIndex zero-based index of the file being created within a version + */ +public record FileBuildContext( + FileRecipe fileRecipe, + int fileIndex +) { +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/FileRecipe.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/FileRecipe.java new file mode 100644 index 00000000000..8a2a5336c9a --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/FileRecipe.java @@ -0,0 +1,28 @@ +package edu.harvard.iq.dataverse.util.testing.recipes; + +public interface FileRecipe { + + /** + * Returns the total number of files to create. + * + * @return number of files in the generated dataset version + */ + int fileCount(); + + static FileRecipe tabular(int fileCount, VariableSetRecipe recipe) { + return new Tabular(fileCount, recipe); + } + + static FileRecipe regular(int fileCount) { + return new Regular(fileCount); + } + + record Tabular ( + int fileCount, + VariableSetRecipe variableSetRecipe + ) implements FileRecipe {} + + record Regular ( + int fileCount + ) implements FileRecipe {} +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataBuildContext.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataBuildContext.java new file mode 100644 index 00000000000..f2ad493e4f0 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataBuildContext.java @@ -0,0 +1,24 @@ +package edu.harvard.iq.dataverse.util.testing.recipes; + +/** + * Context object supplied while deciding whether a variable should receive + * {@link edu.harvard.iq.dataverse.datavariable.VariableMetadata}. + * + *

A variable metadata entry belongs to a specific pair of:

+ *
    + *
  • a file's metadata
  • + *
  • a variable in that file's tabular structure
  • + *
+ * + *

For now this context only exposes file and variable indices. It can grow + * later as fixture requirements become more sophisticated.

+ * + * @param fileIndex zero-based file index + * @param variableIndex zero-based variable index within the file/table + */ +public record VariableMetadataBuildContext( + FileRecipe.Tabular tabularRecipe, + int fileIndex, + int variableIndex +) { +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataRecipe.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataRecipe.java new file mode 100644 index 00000000000..f71bb670db8 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataRecipe.java @@ -0,0 +1,87 @@ +package edu.harvard.iq.dataverse.util.testing.recipes; + +import java.util.function.Predicate; + +/** + * Recipe describing whether a {@link edu.harvard.iq.dataverse.datavariable.VariableMetadata} row should be created for + * a generated {@code (FileMetadata, DataVariable)} pair. + * + *

This is modeled as a yes/no decision because the current schema enforces + * uniqueness for each pair of {@code datavariable_id} and {@code filemetadata_id}. + * As filemetadata is associated with a single dataset version, this makes variable metadata versioned, too.

+ */ +public interface VariableMetadataRecipe { + + /** + * Returns whether metadata should be created for the supplied pair context. + * + * @param context build context describing the file-variable pair + * @return {@code true} if metadata should be created, otherwise {@code false} + */ + boolean createFor(VariableMetadataBuildContext context); + + /** + * Returns a recipe that never creates metadata. + * + * @return no-op recipe + */ + static VariableMetadataRecipe noop() { + return new Noop(); + } + + /** + * Returns a recipe that always creates metadata. + * + * @return always-on recipe + */ + static VariableMetadataRecipe always() { + return new Always(); + } + + /** + * Returns a predicate-driven metadata recipe. + * + * @param predicate predicate deciding whether metadata should be created + * @return predicate-based recipe + */ + static VariableMetadataRecipe byPredicate(Predicate predicate) { + return new PredicateBased(predicate); + } + + /** + * Recipe that never creates metadata. + */ + record Noop() implements VariableMetadataRecipe { + + @Override + public boolean createFor(VariableMetadataBuildContext context) { + return false; + } + } + + /** + * Recipe that always creates metadata. + */ + record Always() implements VariableMetadataRecipe { + + @Override + public boolean createFor(VariableMetadataBuildContext context) { + return true; + } + } + + /** + * Predicate-based metadata recipe. + * + * @param predicate predicate deciding whether metadata should be created + */ + record PredicateBased( + Predicate predicate + ) implements VariableMetadataRecipe { + + @Override + public boolean createFor(VariableMetadataBuildContext context) { + return predicate.test(context); + } + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetBuildContext.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetBuildContext.java new file mode 100644 index 00000000000..48bdf06901b --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetBuildContext.java @@ -0,0 +1,18 @@ +package edu.harvard.iq.dataverse.util.testing.recipes; + +/** + * Context object supplied while deciding how many variables to create for a + * tabular file or table. + * + *

At present this only carries the file index. It is intentionally separated + * from {@link FileBuildContext} because variable population decisions may later + * need different context, such as table index, dataset version information, + * recipe seed, or file type details.

+ * + * @param fileIndex zero-based index of the file for which variables are being created + */ +public record VariableSetBuildContext( + FileRecipe.Tabular tabularRecipe, + int fileIndex +) { +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetRecipe.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetRecipe.java new file mode 100644 index 00000000000..3a88eb2db94 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetRecipe.java @@ -0,0 +1,153 @@ +package edu.harvard.iq.dataverse.util.testing.recipes; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Predicate; + +/** + * Recipe describing how many variables should be created for a tabular file + * or data table, and whether generated file-variable pairs should receive + * {@link edu.harvard.iq.dataverse.datavariable.VariableMetadata}. + */ +public interface VariableSetRecipe { + + /** + * Returns the number of variables to create for the given context. + * + * @param context contextual information about the file/table being populated + * @return variable count to create + */ + int variableCount(VariableSetBuildContext context); + + /** + * Returns the recipe describing whether metadata should be created for a + * generated {@code (FileMetadata, DataVariable)} pair. + * + * @return variable metadata recipe + */ + VariableMetadataRecipe variableMetadataRecipe(); + + /** + * Creates a uniform variable set recipe with no metadata generation. + * + * @param variableCount uniform variable count + * @return uniform variable set recipe + */ + static VariableSetRecipe uniform(int variableCount) { + return new UniformVariableSetRecipe(variableCount, VariableMetadataRecipe.noop()); + } + + /** + * Creates a uniform variable set recipe with the supplied metadata recipe. + * + * @param variableCount uniform variable count + * @param variableMetadataRecipe metadata recipe for generated pairs + * @return uniform variable set recipe + */ + static VariableSetRecipe uniform(int variableCount, VariableMetadataRecipe variableMetadataRecipe) { + return new UniformVariableSetRecipe(variableCount, variableMetadataRecipe); + } + + /** + * Creates a predicate-driven variable set recipe with no metadata generation. + * + * @return predicate-driven variable set recipe + */ + static PredicateVariableSetRecipe byPredicate() { + return new PredicateVariableSetRecipe(VariableMetadataRecipe.noop()); + } + + /** + * Creates a predicate-driven variable set recipe with the supplied metadata recipe. + * + * @param variableMetadataRecipe metadata recipe for generated pairs + * @return predicate-driven variable set recipe + */ + static PredicateVariableSetRecipe byPredicate(VariableMetadataRecipe variableMetadataRecipe) { + return new PredicateVariableSetRecipe(variableMetadataRecipe); + } + + /** + * Uniform variable set recipe. + * + * @param variableCount uniform variable count + * @param variableMetadataRecipe metadata recipe for generated pairs + */ + record UniformVariableSetRecipe( + int variableCount, + VariableMetadataRecipe variableMetadataRecipe + ) implements VariableSetRecipe { + + @Override + public int variableCount(VariableSetBuildContext context) { + return variableCount; + } + } + + /** + * Predicate-driven variable set recipe. + */ + final class PredicateVariableSetRecipe implements VariableSetRecipe { + + private final List rules = new ArrayList<>(); + private final VariableMetadataRecipe variableMetadataRecipe; + private int defaultCount = 0; + + public PredicateVariableSetRecipe(VariableMetadataRecipe variableMetadataRecipe) { + this.variableMetadataRecipe = variableMetadataRecipe; + } + + /** + * Adds a variable-count rule. + * + * @param predicate rule predicate + * @param variableCount variable count to use when matched + * @return this recipe + */ + public PredicateVariableSetRecipe when( + Predicate predicate, + int variableCount + ) { + rules.add(new Rule(predicate, variableCount)); + return this; + } + + /** + * Sets the default variable count. + * + * @param variableCount default variable count + * @return this recipe + */ + public PredicateVariableSetRecipe otherwise(int variableCount) { + this.defaultCount = variableCount; + return this; + } + + @Override + public int variableCount(VariableSetBuildContext context) { + for (Rule rule : rules) { + if (rule.predicate().test(context)) { + return rule.variableCount(); + } + } + return defaultCount; + } + + @Override + public VariableMetadataRecipe variableMetadataRecipe() { + return variableMetadataRecipe; + } + + /** + * Internal immutable predicate rule. + * + * @param predicate match condition + * @param variableCount variable count to use when matched + */ + private record Rule( + Predicate predicate, + int variableCount + ) { + } + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VersionRecipe.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VersionRecipe.java new file mode 100644 index 00000000000..450cb6940ca --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VersionRecipe.java @@ -0,0 +1,49 @@ +package edu.harvard.iq.dataverse.util.testing.recipes; + +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +/** + * Recipe describing how to construct a dataset version fixture. + * + *

At this stage, a version recipe is mainly responsible for delegating to a + * {@link FileRecipe}, which controls how files in that version are created.

+ * + *

Later, this type can be extended with more version-level concerns such as: + * draft/released state, timestamps, version numbering, or version-specific + * metadata enrichment.

+ */ +public interface VersionRecipe { + + /** + * Returns the file recipes for this dataset version. + * + * @return recipes governing file creation for the version + */ + List fileRecipes(); + + /** + * Creates a version recipe from a number of file recipes. + * + * @param fileRecipes recipes governing file creation + * @return a version recipe + */ + static VersionRecipe of(FileRecipe... fileRecipes) { + Objects.requireNonNull(fileRecipes, "fileRecipes may not be null"); + for (FileRecipe fileRecipe : fileRecipes) { + Objects.requireNonNull(fileRecipe, "fileRecipes must not contain null elements"); + } + return new SimpleVersionRecipe(Arrays.asList(fileRecipes)); + } + + /** + * Minimal immutable implementation of {@link VersionRecipe}. + * + * @param fileRecipe recipe governing file creation + */ + record SimpleVersionRecipe( + List fileRecipes + ) implements VersionRecipe { + } +} diff --git a/src/test/resources/META-INF/persistence.xml b/src/test/resources/META-INF/persistence.xml new file mode 100644 index 00000000000..e317a3c9c07 --- /dev/null +++ b/src/test/resources/META-INF/persistence.xml @@ -0,0 +1,34 @@ + + + + + org.eclipse.persistence.jpa.PersistenceProvider + + + ../classes + + false + + + + + + + + + + + + + + + + \ No newline at end of file