From 9be444eac7388705052860a58c42874ef9723aeb Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Fri, 17 Apr 2026 03:23:48 +0200
Subject: [PATCH 01/23] test(fixtures): add DatasetFixture API and builder with
initial support for tabular and standard files #11405
Introduced a new `DatasetFixture` structure and its builder API to simplify test dataset creation. This includes support for datasets, versions, files (tabular and standard), and associated metadata. Added unit tests and minimal populator implementation for scalar field initialization.
---
.../util/testing/fixtures/DatasetFixture.java | 49 ++++
.../fixtures/DatasetFixtureBuilder.java | 255 ++++++++++++++++++
.../testing/fixtures/DatasetFixtureTest.java | 77 ++++++
.../testing/fixtures/FixturePopulator.java | 116 ++++++++
.../testing/fixtures/MinimalPopulator.java | 154 +++++++++++
.../util/testing/recipes/DatasetRecipe.java | 43 +++
.../util/testing/recipes/FileRecipe.java | 28 ++
.../recipes/VariableMetadataBuildContext.java | 24 ++
.../recipes/VariableMetadataRecipe.java | 87 ++++++
.../recipes/VariableSetBuildContext.java | 18 ++
.../testing/recipes/VariableSetRecipe.java | 153 +++++++++++
.../util/testing/recipes/VersionRecipe.java | 49 ++++
12 files changed, 1053 insertions(+)
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixture.java
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/FixturePopulator.java
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetRecipe.java
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/FileRecipe.java
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataBuildContext.java
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataRecipe.java
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetBuildContext.java
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetRecipe.java
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VersionRecipe.java
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixture.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixture.java
new file mode 100644
index 00000000000..a1c533e389a
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixture.java
@@ -0,0 +1,49 @@
+package edu.harvard.iq.dataverse.util.testing.fixtures;
+
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.DataTable;
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.DatasetVersion;
+import edu.harvard.iq.dataverse.FileMetadata;
+import edu.harvard.iq.dataverse.datavariable.DataVariable;
+import edu.harvard.iq.dataverse.datavariable.VarGroup;
+
+import java.util.List;
+
+/**
+ * Immutable holder for a generated dataset fixture graph.
+ *
+ * This object gives tests convenient access not only to the root
+ * {@link Dataset}, but also to the current {@link DatasetVersion} and all major
+ * generated child entities. That makes it easier to inspect, persist, or tweak
+ * the graph after building it.
+ *
+ * @param dataset root dataset
+ * @param currentVersion current dataset version
+ * @param fileMetadatas generated file metadata objects
+ * @param dataFiles generated data files
+ * @param dataTables generated data tables
+ * @param dataVariables generated data variables
+ * @param varGroups generated var groups
+ */
+public record DatasetFixture(
+ Dataset dataset,
+ DatasetVersion currentVersion,
+ List fileMetadatas,
+ List dataFiles,
+ List dataTables,
+ List dataVariables,
+ List varGroups
+) {
+
+ /**
+ * Compact constructor performing defensive copies of collection components.
+ */
+ public DatasetFixture {
+ fileMetadatas = List.copyOf(fileMetadatas);
+ dataFiles = List.copyOf(dataFiles);
+ dataTables = List.copyOf(dataTables);
+ dataVariables = List.copyOf(dataVariables);
+ varGroups = List.copyOf(varGroups);
+ }
+}
\ No newline at end of file
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java
new file mode 100644
index 00000000000..320e61735d1
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java
@@ -0,0 +1,255 @@
+package edu.harvard.iq.dataverse.util.testing.fixtures;
+
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.DataTable;
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.DatasetVersion;
+import edu.harvard.iq.dataverse.FileMetadata;
+import edu.harvard.iq.dataverse.datavariable.DataVariable;
+import edu.harvard.iq.dataverse.datavariable.VarGroup;
+import edu.harvard.iq.dataverse.util.testing.recipes.DatasetRecipe;
+import edu.harvard.iq.dataverse.util.testing.recipes.FileBuildContext;
+import edu.harvard.iq.dataverse.util.testing.recipes.FileRecipe;
+import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetBuildContext;
+import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetRecipe;
+import edu.harvard.iq.dataverse.util.testing.recipes.VersionRecipe;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * Builder/wiring layer that consumes fixture recipes and produces a fully wired
+ * {@link Dataset} graph.
+ *
+ * This class is intentionally responsible for relationship correctness and
+ * collection initialization, while recipes are responsible for deciding graph
+ * shape and populators are responsible for scalar-field initialization.
+ *
+ * Current scope:
+ *
+ * - one dataset
+ * - one (current) dataset version
+ * - files created according to {@link FileRecipe}
+ * - tabular structure created according to {@link VariableSetRecipe}
+ *
+ */
+public class DatasetFixtureBuilder {
+
+ private static final AtomicLong SEQUENCE = new AtomicLong(1);
+
+ private DatasetRecipe recipe;
+ private FixturePopulator populator = FixturePopulator.minimal();
+
+ /**
+ * Creates a new builder instance.
+ *
+ * @return a fresh fixture builder
+ */
+ public static DatasetFixtureBuilder builder() {
+ return new DatasetFixtureBuilder();
+ }
+
+ /**
+ * Sets the recipe used to determine the graph shape.
+ *
+ * @param recipe dataset recipe to use
+ * @return this builder for fluent chaining
+ */
+ public DatasetFixtureBuilder recipe(DatasetRecipe recipe) {
+ this.recipe = Objects.requireNonNull(recipe);
+ return this;
+ }
+
+ /**
+ * Sets the scalar-field defaults policy.
+ *
+ * @param populator defaults policy to use
+ * @return this builder for fluent chaining
+ */
+ public DatasetFixtureBuilder populator(FixturePopulator populator) {
+ this.populator = Objects.requireNonNull(populator);
+ return this;
+ }
+
+ /**
+ * Builds a dataset fixture graph according to the configured recipe and populator.
+ *
+ * @return generated dataset fixture
+ */
+ public DatasetFixture build() {
+ if (recipe == null) {
+ throw new IllegalStateException("A DatasetRecipe must be configured before building.");
+ }
+
+ BuildContext context = new BuildContext(SEQUENCE.getAndIncrement());
+
+ Dataset dataset = new Dataset();
+ populator.populateDataset(dataset, context);
+
+ DatasetVersion currentVersion = new DatasetVersion();
+ populator.populateDatasetVersion(currentVersion, context);
+
+ // The constructor of Dataset implicitely creates a new version. Get rid of it before we wire ours.
+ dataset.setVersions(new ArrayList<>());
+ wireDatasetAndVersion(dataset, currentVersion);
+
+ List fileMetadatas = new ArrayList<>();
+ List dataFiles = new ArrayList<>();
+ List dataTables = new ArrayList<>();
+ List dataVariables = new ArrayList<>();
+ List varGroups = new ArrayList<>();
+
+ VersionRecipe versionRecipe = recipe.currentVersionRecipe();
+ List fileRecipes = versionRecipe.fileRecipes();
+
+ int globalFileIndex = 0;
+ for (FileRecipe fileRecipe : fileRecipes) {
+ for (int fileIndex = 0; fileIndex < fileRecipe.fileCount(); fileIndex++, globalFileIndex++) {
+ FileBuildContext fileContext = new FileBuildContext(fileRecipe, globalFileIndex);
+
+ DataFile dataFile = new DataFile();
+ populator.populateDataFile(dataFile, fileContext, context);
+
+ FileMetadata fileMetadata = new FileMetadata();
+ populator.populateFileMetadata(fileMetadata, fileContext, context);
+
+ wireFileMetadata(currentVersion, fileMetadata, dataFile);
+ fileMetadatas.add(fileMetadata);
+ dataFiles.add(dataFile);
+
+
+ TODO
+
+ - create and wire in variable metadata
+ - populate the variable metadata (extend interface, too)
+ - add lots more inline comments to this method, maybe split some
+ - add missing java docs for some classes
+ - create a version evolution, create builders and populators
+ - make datasetfixture respect versions when retrieving collections. add convenience methods pointing to current version.
+
+ if (fileRecipe instanceof FileRecipe.Tabular tabularRecipe) {
+ DataTable dataTable = new DataTable();
+ populator.populateDataTable(dataTable, fileContext, context);
+ wireDataTable(dataFile, dataTable);
+ dataTables.add(dataTable);
+
+ var variableSetContext = new VariableSetBuildContext(tabularRecipe, globalFileIndex);
+
+ VariableSetRecipe variableSetRecipe = tabularRecipe.variableSetRecipe();
+ int variableCount = variableSetRecipe.variableCount(variableSetContext);
+
+ List fileVariables = new ArrayList<>(variableCount);
+
+ for (int variableIndex = 0; variableIndex < variableCount; variableIndex++) {
+ DataVariable dataVariable = new DataVariable();
+ populator.populateDataVariable(dataVariable, variableSetContext, variableIndex, context);
+ wireDataVariable(dataTable, dataVariable);
+ fileVariables.add(dataVariable);
+ dataVariables.add(dataVariable);
+ }
+
+ dataTable.setVarQuantity((long) variableCount);
+
+ if (!fileVariables.isEmpty()) {
+ VarGroup varGroup = new VarGroup();
+ populator.populateVarGroup(varGroup, fileContext, 0, context);
+ wireVarGroup(fileMetadata, varGroup, fileVariables);
+ varGroups.add(varGroup);
+ }
+ }
+ }
+ }
+
+ return new DatasetFixture(
+ dataset,
+ currentVersion,
+ fileMetadatas,
+ dataFiles,
+ dataTables,
+ dataVariables,
+ varGroups
+ );
+ }
+
+ /**
+ * Wires a dataset and its current version together.
+ *
+ * This method centralizes the relationship setup between dataset and
+ * version. If your concrete {@code Dataset} API maintains versions differently,
+ * this is the place to adapt.
+ *
+ * @param dataset dataset root
+ * @param version current dataset version
+ */
+ private void wireDatasetAndVersion(Dataset dataset, DatasetVersion version) {
+ version.setDataset(dataset);
+
+ if (dataset.getVersions() == null) {
+ dataset.setVersions(new ArrayList<>());
+ }
+ dataset.getVersions().add(version);
+ }
+
+ /**
+ * Wires file metadata to its dataset version and underlying data file.
+ *
+ * @param datasetVersion owning dataset version
+ * @param fileMetadata file metadata to wire
+ * @param dataFile data file to wire
+ */
+ private void wireFileMetadata(DatasetVersion datasetVersion, FileMetadata fileMetadata, DataFile dataFile) {
+ fileMetadata.setDatasetVersion(datasetVersion);
+ fileMetadata.setDataFile(dataFile);
+
+ datasetVersion.getFileMetadatas().add(fileMetadata);
+ dataFile.getFileMetadatas().add(fileMetadata);
+ }
+
+ /**
+ * Wires a data table to its data file.
+ *
+ * @param dataFile parent data file
+ * @param dataTable child data table
+ */
+ private void wireDataTable(DataFile dataFile, DataTable dataTable) {
+ dataTable.setDataFile(dataFile);
+ dataFile.getDataTables().add(dataTable);
+ }
+
+ /**
+ * Wires a data variable to its data table.
+ *
+ * @param dataTable parent data table
+ * @param dataVariable child data variable
+ */
+ private void wireDataVariable(DataTable dataTable, DataVariable dataVariable) {
+ dataVariable.setDataTable(dataTable);
+ dataTable.getDataVariables().add(dataVariable);
+ }
+
+ /**
+ * Wires a variable group to file metadata and assigns the supplied variables
+ * to that group.
+ *
+ * @param fileMetadata owning file metadata
+ * @param varGroup variable group to wire
+ * @param variables variables to include in the group
+ */
+ private void wireVarGroup(FileMetadata fileMetadata, VarGroup varGroup, List variables) {
+ varGroup.setFileMetadata(fileMetadata);
+ varGroup.getVarsInGroup().addAll(variables);
+ fileMetadata.getVarGroups().add(varGroup);
+ }
+
+ /**
+ * Internal immutable build context shared across a single fixture build.
+ *
+ * @param sequence deterministic sequence number for the fixture instance
+ */
+ public record BuildContext(
+ long sequence
+ ) {
+ }
+}
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java
new file mode 100644
index 00000000000..9bbfa769d39
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java
@@ -0,0 +1,77 @@
+package edu.harvard.iq.dataverse.util.testing.fixtures;
+
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.FileMetadata;
+import edu.harvard.iq.dataverse.branding.BrandingUtilTest;
+import edu.harvard.iq.dataverse.util.json.JsonPrinter;
+import edu.harvard.iq.dataverse.util.testing.recipes.DatasetRecipe;
+import edu.harvard.iq.dataverse.util.testing.recipes.FileRecipe;
+import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetRecipe;
+import edu.harvard.iq.dataverse.util.testing.recipes.VersionRecipe;
+import jakarta.json.Json;
+import jakarta.json.JsonArrayBuilder;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+
+import java.time.Duration;
+import java.time.Instant;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+class DatasetFixtureTest {
+
+ @BeforeAll
+ static void setUp() {
+ BrandingUtilTest.setupMocks();
+ }
+
+ @AfterAll
+ static void tearDown() {
+ BrandingUtilTest.tearDownMocks();
+ }
+
+ @Test
+ void smoketest() {
+
+ var recipe = DatasetRecipe.of(
+ VersionRecipe.of(
+ FileRecipe.tabular(1000, VariableSetRecipe.uniform(10000)),
+ //FileRecipe.tabular(50, VariableSetRecipe.byPredicate()),
+ //FileRecipe.tabular(50, VariableSetRecipe.byRandom(10, 1000, 12345)),
+ FileRecipe.regular(1)
+ )
+ );
+
+ Instant start = Instant.now();
+
+ var fixture = DatasetFixtureBuilder.builder()
+ .recipe(recipe)
+ .populator(FixturePopulator.minimal())
+ .build();
+
+ Instant finish = Instant.now();
+ System.out.println("build: " + Duration.between(start, finish).toMillis() + " msec");
+
+ start = Instant.now();
+
+ JsonArrayBuilder jab = Json.createArrayBuilder();
+ for (FileMetadata fileMetadata : fixture.fileMetadatas()) {
+ DataFile dataFile = fileMetadata.getDataFile();
+ jab.add(JsonPrinter.json(dataFile, fileMetadata, true));
+ }
+ var result = jab.build();
+
+ finish = Instant.now();
+ System.out.println("convert: " + Duration.between(start, finish).toMillis() + " msec");
+
+ assertNotNull(result);
+
+ start = Instant.now();
+ System.out.println(result);
+ finish = Instant.now();
+ System.out.println("print: " + Duration.between(start, finish).toMillis() + " msec");
+
+ }
+
+}
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/FixturePopulator.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/FixturePopulator.java
new file mode 100644
index 00000000000..408a9a4ffa2
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/FixturePopulator.java
@@ -0,0 +1,116 @@
+package edu.harvard.iq.dataverse.util.testing.fixtures;
+
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.DataTable;
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.DatasetVersion;
+import edu.harvard.iq.dataverse.FileMetadata;
+import edu.harvard.iq.dataverse.TermsOfUseAndAccess;
+import edu.harvard.iq.dataverse.datavariable.DataVariable;
+import edu.harvard.iq.dataverse.datavariable.VarGroup;
+import edu.harvard.iq.dataverse.util.testing.recipes.FileBuildContext;
+import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetBuildContext;
+
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashSet;
+
+/**
+ * Populator interface responsible for initializing scalar/non-relationship fields of
+ * generated fixture entities.
+ *
+ * The builder/wiring layer is responsible for graph structure and
+ * relationship correctness. This population layer is responsible for making sure
+ * entities are also "safe enough" to serialize and persist by filling required
+ * or null-sensitive scalar fields and collections.
+ *
+ * This separation keeps shape decisions in recipes and scalar defaults here.
+ */
+public interface FixturePopulator {
+
+ /**
+ * Populates scalar fields and safe defaults for a dataset.
+ *
+ * @param dataset dataset being initialized
+ * @param context fixture build context
+ */
+ void populateDataset(Dataset dataset, DatasetFixtureBuilder.BuildContext context);
+
+ /**
+ * Populates scalar fields and safe defaults for a dataset version.
+ *
+ * @param version dataset version being initialized
+ * @param context fixture build context
+ */
+ void populateDatasetVersion(DatasetVersion version, DatasetFixtureBuilder.BuildContext context);
+
+ /**
+ * Populates scalar fields and safe defaults for file metadata.
+ *
+ * @param fileMetadata file metadata being initialized
+ * @param fileIndex zero-based file index
+ * @param context fixture build context
+ */
+ void populateFileMetadata(FileMetadata fileMetadata, FileBuildContext fileBuildContext, DatasetFixtureBuilder.BuildContext context);
+
+ /**
+ * Populates scalar fields and safe defaults for a data file.
+ *
+ * @param dataFile data file being initialized
+ * @param fileIndex zero-based file index
+ * @param context fixture build context
+ */
+ void populateDataFile(DataFile dataFile, FileBuildContext fileBuildContext, DatasetFixtureBuilder.BuildContext context);
+
+ /**
+ * Populates scalar fields and safe defaults for a data table.
+ *
+ * @param dataTable data table being initialized
+ * @param fileIndex zero-based file index
+ * @param context fixture build context
+ */
+ void populateDataTable(DataTable dataTable, FileBuildContext fileBuildContext, DatasetFixtureBuilder.BuildContext context);
+
+ /**
+ * Populates scalar fields and safe defaults for a data variable.
+ *
+ * @param dataVariable data variable being initialized
+ * @param fileIndex zero-based file index
+ * @param variableIndex zero-based variable index within the file/table
+ * @param context fixture build context
+ */
+ void populateDataVariable(
+ DataVariable dataVariable,
+ VariableSetBuildContext variableBuildContext,
+ int variableIndex,
+ DatasetFixtureBuilder.BuildContext context
+ );
+
+ /**
+ * Populates scalar fields and safe defaults for a variable group.
+ *
+ * @param varGroup var group being initialized
+ * @param fileIndex zero-based file index
+ * @param groupIndex zero-based group index within the file
+ * @param context fixture build context
+ */
+ void populateVarGroup(
+ VarGroup varGroup,
+ FileBuildContext fileBuildContext,
+ int groupIndex,
+ DatasetFixtureBuilder.BuildContext context
+ );
+
+ /**
+ * Returns a deterministic, minimal-safe entity populator.
+ *
+ * This implementation is intentionally conservative. It sets enough fields
+ * for fixture graphs to be usable in persistence and serialization tests,
+ * without trying to simulate realistic production content yet.
+ *
+ * @return standard, minimalized, and deterministic field populator
+ */
+ static FixturePopulator minimal() {
+ return new MinimalPopulator();
+ }
+}
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java
new file mode 100644
index 00000000000..12ff2b300b7
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java
@@ -0,0 +1,154 @@
+package edu.harvard.iq.dataverse.util.testing.fixtures;
+
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.DataTable;
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.DatasetVersion;
+import edu.harvard.iq.dataverse.FileMetadata;
+import edu.harvard.iq.dataverse.TermsOfUseAndAccess;
+import edu.harvard.iq.dataverse.dataset.DatasetType;
+import edu.harvard.iq.dataverse.datavariable.DataVariable;
+import edu.harvard.iq.dataverse.datavariable.VarGroup;
+import edu.harvard.iq.dataverse.util.testing.recipes.FileBuildContext;
+import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetBuildContext;
+
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashSet;
+
+public final class MinimalPopulator implements FixturePopulator {
+
+ /**
+ * Populates basic dataset scalar fields.
+ *
+ * @param dataset dataset being initialized
+ * @param context fixture build context
+ */
+ @Override
+ public void populateDataset(Dataset dataset, DatasetFixtureBuilder.BuildContext context) {
+ dataset.setProtocol("doi");
+ dataset.setAuthority("10.5072");
+ dataset.setIdentifier("fixture-dataset-" + context.sequence());
+ dataset.setStorageIdentifier("fixture-storage-" + context.sequence());
+ dataset.setDatasetType(new DatasetType());
+ }
+
+ /**
+ * Populates basic dataset-version scalar fields, timestamps, and terms.
+ *
+ * @param version dataset version being initialized
+ * @param context fixture build context
+ */
+ @Override
+ public void populateDatasetVersion(DatasetVersion version, DatasetFixtureBuilder.BuildContext context) {
+ Date now = new Date();
+ version.setVersionNumber(1L);
+ version.setMinorVersionNumber(0L);
+ version.setVersionState(DatasetVersion.VersionState.DRAFT);
+ version.setVersionNote("fixture-version");
+ version.setCreateTime(now);
+ version.setLastUpdateTime(now);
+ version.setTermsOfUseAndAccess(new TermsOfUseAndAccess());
+ }
+
+ /**
+ * Populates basic file-metadata scalar fields.
+ *
+ * @param fileMetadata file metadata being initialized
+ * @param fileIndex zero-based file index
+ * @param context fixture build context
+ */
+ @Override
+ public void populateFileMetadata(FileMetadata fileMetadata, FileBuildContext fileBuildContext, DatasetFixtureBuilder.BuildContext context) {
+ fileMetadata.setLabel("file-" + fileBuildContext.fileIndex() + ".tab");
+ fileMetadata.setDescription("Fixture file " + fileBuildContext.fileIndex());
+ fileMetadata.setVarGroups(new ArrayList<>());
+ fileMetadata.setVariableMetadatas(new ArrayList<>());
+ }
+
+ /**
+ * Populates basic data-file scalar fields and null-sensitive defaults.
+ *
+ * @param dataFile data file being initialized
+ * @param fileIndex zero-based file index
+ * @param context fixture build context
+ */
+ @Override
+ public void populateDataFile(DataFile dataFile, FileBuildContext fileBuildContext, DatasetFixtureBuilder.BuildContext context) {
+ dataFile.setContentType("text/tab-separated-values");
+ dataFile.setChecksumType(DataFile.ChecksumType.SHA1);
+ dataFile.setChecksumValue("fixture-checksum-" + fileBuildContext.fileIndex());
+ dataFile.setFilesize(1024L + fileBuildContext.fileIndex());
+ dataFile.setDataTables(new ArrayList<>());
+ dataFile.setFileMetadatas(new ArrayList<>());
+ dataFile.setTags(new ArrayList<>());
+ }
+
+ /**
+ * Populates basic data-table scalar fields and variable collection defaults.
+ *
+ * @param dataTable data table being initialized
+ * @param fileIndex zero-based file index
+ * @param context fixture build context
+ */
+ @Override
+ public void populateDataTable(DataTable dataTable, FileBuildContext fileBuildContext, DatasetFixtureBuilder.BuildContext context) {
+ dataTable.setVarQuantity(0L);
+ dataTable.setCaseQuantity(100L);
+ dataTable.setRecordsPerCase(1L);
+ dataTable.setUnf("UNF:fixture-table-" + fileBuildContext.fileIndex());
+ dataTable.setDataVariables(new ArrayList<>());
+ dataTable.setOriginalFileFormat("text/tab-separated-values");
+ dataTable.setOriginalFileName("fixture-original-" + fileBuildContext.fileIndex() + ".tab");
+ dataTable.setOriginalFileSize(2048L + fileBuildContext.fileIndex());
+ }
+
+ /**
+ * Populates basic data-variable scalar fields and initializes collections
+ * that are null-sensitive in serialization.
+ *
+ * @param dataVariable data variable being initialized
+ * @param fileIndex zero-based file index
+ * @param variableIndex zero-based variable index within the file/table
+ * @param context fixture build context
+ */
+ @Override
+ public void populateDataVariable(
+ DataVariable dataVariable,
+ VariableSetBuildContext variableSetBuildContext,
+ int variableIndex,
+ DatasetFixtureBuilder.BuildContext context
+ ) {
+ dataVariable.setName("var_" + variableSetBuildContext.fileIndex() + "_" + variableIndex);
+ dataVariable.setLabel("Variable " + variableSetBuildContext.fileIndex() + "/" + variableIndex);
+ dataVariable.setType(DataVariable.VariableType.NUMERIC);
+ dataVariable.setFileOrder(variableIndex);
+ dataVariable.setUnf("UNF:fixture-var-" + variableSetBuildContext.fileIndex() + "-" + variableIndex);
+ dataVariable.setInvalidRanges(new ArrayList<>());
+ dataVariable.setSummaryStatistics(new ArrayList<>());
+ dataVariable.setCategories(new ArrayList<>());
+ dataVariable.setVariableMetadatas(new ArrayList<>());
+ dataVariable.setInvalidRangeItems(new ArrayList<>());
+ }
+
+ /**
+ * Populates basic variable-group scalar fields and initializes the backing
+ * variable set.
+ *
+ * @param varGroup var group being initialized
+ * @param fileIndex zero-based file index
+ * @param groupIndex zero-based group index within the file
+ * @param context fixture build context
+ */
+ @Override
+ public void populateVarGroup(
+ VarGroup varGroup,
+ FileBuildContext fileBuildContext,
+ int groupIndex,
+ DatasetFixtureBuilder.BuildContext context
+ ) {
+ varGroup.setLabel("group-" + fileBuildContext.fileIndex() + "-" + groupIndex);
+ varGroup.setVarsInGroup(new HashSet<>());
+ }
+
+}
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetRecipe.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetRecipe.java
new file mode 100644
index 00000000000..61d88e5c085
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetRecipe.java
@@ -0,0 +1,43 @@
+package edu.harvard.iq.dataverse.util.testing.recipes;
+
+/**
+ * Top-level recipe describing how to construct a {@code Dataset} fixture.
+ *
+ * This is intentionally rooted at the dataset level rather than the dataset
+ * version level, so the fixture system can later support scenarios involving
+ * multiple versions, different current-version shapes, and dataset-level
+ * performance tests.
+ *
+ * For the initial implementation, a dataset recipe exposes exactly one
+ * "current version" recipe. This keeps the model simple while leaving room
+ * to evolve later.
+ */
+public interface DatasetRecipe {
+
+ /**
+ * Returns the recipe describing the current version of the dataset.
+ *
+ * @return recipe for the current dataset version
+ */
+ VersionRecipe currentVersionRecipe();
+
+ /**
+ * Creates a dataset recipe with a single current version recipe.
+ *
+ * @param currentVersionRecipe the recipe for the current dataset version
+ * @return a dataset recipe
+ */
+ static DatasetRecipe of(VersionRecipe currentVersionRecipe) {
+ return new SimpleDatasetRecipe(currentVersionRecipe);
+ }
+
+ /**
+ * Minimal immutable implementation of {@link DatasetRecipe}.
+ *
+ * @param currentVersionRecipe the recipe for the current dataset version
+ */
+ record SimpleDatasetRecipe(
+ VersionRecipe currentVersionRecipe
+ ) implements DatasetRecipe {
+ }
+}
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/FileRecipe.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/FileRecipe.java
new file mode 100644
index 00000000000..8a2a5336c9a
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/FileRecipe.java
@@ -0,0 +1,28 @@
+package edu.harvard.iq.dataverse.util.testing.recipes;
+
+public interface FileRecipe {
+
+ /**
+ * Returns the total number of files to create.
+ *
+ * @return number of files in the generated dataset version
+ */
+ int fileCount();
+
+ static FileRecipe tabular(int fileCount, VariableSetRecipe recipe) {
+ return new Tabular(fileCount, recipe);
+ }
+
+ static FileRecipe regular(int fileCount) {
+ return new Regular(fileCount);
+ }
+
+ record Tabular (
+ int fileCount,
+ VariableSetRecipe variableSetRecipe
+ ) implements FileRecipe {}
+
+ record Regular (
+ int fileCount
+ ) implements FileRecipe {}
+}
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataBuildContext.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataBuildContext.java
new file mode 100644
index 00000000000..dc39bcb04fb
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataBuildContext.java
@@ -0,0 +1,24 @@
+package edu.harvard.iq.dataverse.util.testing.recipes;
+
+/**
+ * Context object supplied while deciding whether a variable should receive
+ * {@code VariableMetadata}.
+ *
+ * A variable metadata entry belongs to a specific pair of:
+ *
+ * - a file's metadata
+ * - a variable in that file's tabular structure
+ *
+ *
+ * For now this context only exposes file and variable indices. It can grow
+ * later as fixture requirements become more sophisticated.
+ *
+ * @param fileIndex zero-based file index
+ * @param variableIndex zero-based variable index within the file/table
+ */
+public record VariableMetadataBuildContext(
+ FileRecipe.Tabular tabularRecipe,
+ int fileIndex,
+ int variableIndex
+) {
+}
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataRecipe.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataRecipe.java
new file mode 100644
index 00000000000..52d628c9761
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataRecipe.java
@@ -0,0 +1,87 @@
+package edu.harvard.iq.dataverse.util.testing.recipes;
+
+import java.util.function.Predicate;
+
+/**
+ * Recipe describing whether a {@code VariableMetadata} row should be created for
+ * a generated {@code (FileMetadata, DataVariable)} pair.
+ *
+ * This is modeled as a yes/no decision because the current schema enforces
+ * uniqueness for each pair of {@code datavariable_id} and {@code filemetadata_id}.
+ * As filemetadata is associated with a single dataset version, this makes variable metadata versioned, too.
+ */
+public interface VariableMetadataRecipe {
+
+ /**
+ * Returns whether metadata should be created for the supplied pair context.
+ *
+ * @param context build context describing the file-variable pair
+ * @return {@code true} if metadata should be created, otherwise {@code false}
+ */
+ boolean createFor(VariableMetadataBuildContext context);
+
+ /**
+ * Returns a recipe that never creates metadata.
+ *
+ * @return no-op recipe
+ */
+ static VariableMetadataRecipe noop() {
+ return new Noop();
+ }
+
+ /**
+ * Returns a recipe that always creates metadata.
+ *
+ * @return always-on recipe
+ */
+ static VariableMetadataRecipe always() {
+ return new Always();
+ }
+
+ /**
+ * Returns a predicate-driven metadata recipe.
+ *
+ * @param predicate predicate deciding whether metadata should be created
+ * @return predicate-based recipe
+ */
+ static VariableMetadataRecipe byPredicate(Predicate predicate) {
+ return new PredicateBased(predicate);
+ }
+
+ /**
+ * Recipe that never creates metadata.
+ */
+ record Noop() implements VariableMetadataRecipe {
+
+ @Override
+ public boolean createFor(VariableMetadataBuildContext context) {
+ return false;
+ }
+ }
+
+ /**
+ * Recipe that always creates metadata.
+ */
+ record Always() implements VariableMetadataRecipe {
+
+ @Override
+ public boolean createFor(VariableMetadataBuildContext context) {
+ return true;
+ }
+ }
+
+ /**
+ * Predicate-based metadata recipe.
+ *
+ * @param predicate predicate deciding whether metadata should be created
+ */
+ record PredicateBased(
+ Predicate predicate
+ ) implements VariableMetadataRecipe {
+
+ @Override
+ public boolean createFor(VariableMetadataBuildContext context) {
+ return predicate.test(context);
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetBuildContext.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetBuildContext.java
new file mode 100644
index 00000000000..48bdf06901b
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetBuildContext.java
@@ -0,0 +1,18 @@
+package edu.harvard.iq.dataverse.util.testing.recipes;
+
+/**
+ * Context object supplied while deciding how many variables to create for a
+ * tabular file or table.
+ *
+ * At present this only carries the file index. It is intentionally separated
+ * from {@link FileBuildContext} because variable population decisions may later
+ * need different context, such as table index, dataset version information,
+ * recipe seed, or file type details.
+ *
+ * @param fileIndex zero-based index of the file for which variables are being created
+ */
+public record VariableSetBuildContext(
+ FileRecipe.Tabular tabularRecipe,
+ int fileIndex
+) {
+}
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetRecipe.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetRecipe.java
new file mode 100644
index 00000000000..46920e980c0
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetRecipe.java
@@ -0,0 +1,153 @@
+package edu.harvard.iq.dataverse.util.testing.recipes;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.Predicate;
+
+/**
+ * Recipe describing how many variables should be created for a tabular file
+ * or data table, and whether generated file-variable pairs should receive
+ * {@code VariableMetadata}.
+ */
+public interface VariableSetRecipe {
+
+ /**
+ * Returns the number of variables to create for the given context.
+ *
+ * @param context contextual information about the file/table being populated
+ * @return variable count to create
+ */
+ int variableCount(VariableSetBuildContext context);
+
+ /**
+ * Returns the recipe describing whether metadata should be created for a
+ * generated {@code (FileMetadata, DataVariable)} pair.
+ *
+ * @return variable metadata recipe
+ */
+ VariableMetadataRecipe variableMetadataRecipe();
+
+ /**
+ * Creates a uniform variable set recipe with no metadata generation.
+ *
+ * @param variableCount uniform variable count
+ * @return uniform variable set recipe
+ */
+ static VariableSetRecipe uniform(int variableCount) {
+ return new UniformVariableSetRecipe(variableCount, VariableMetadataRecipe.noop());
+ }
+
+ /**
+ * Creates a uniform variable set recipe with the supplied metadata recipe.
+ *
+ * @param variableCount uniform variable count
+ * @param variableMetadataRecipe metadata recipe for generated pairs
+ * @return uniform variable set recipe
+ */
+ static VariableSetRecipe uniform(int variableCount, VariableMetadataRecipe variableMetadataRecipe) {
+ return new UniformVariableSetRecipe(variableCount, variableMetadataRecipe);
+ }
+
+ /**
+ * Creates a predicate-driven variable set recipe with no metadata generation.
+ *
+ * @return predicate-driven variable set recipe
+ */
+ static PredicateVariableSetRecipe byPredicate() {
+ return new PredicateVariableSetRecipe(VariableMetadataRecipe.noop());
+ }
+
+ /**
+ * Creates a predicate-driven variable set recipe with the supplied metadata recipe.
+ *
+ * @param variableMetadataRecipe metadata recipe for generated pairs
+ * @return predicate-driven variable set recipe
+ */
+ static PredicateVariableSetRecipe byPredicate(VariableMetadataRecipe variableMetadataRecipe) {
+ return new PredicateVariableSetRecipe(variableMetadataRecipe);
+ }
+
+ /**
+ * Uniform variable set recipe.
+ *
+ * @param variableCount uniform variable count
+ * @param variableMetadataRecipe metadata recipe for generated pairs
+ */
+ record UniformVariableSetRecipe(
+ int variableCount,
+ VariableMetadataRecipe variableMetadataRecipe
+ ) implements VariableSetRecipe {
+
+ @Override
+ public int variableCount(VariableSetBuildContext context) {
+ return variableCount;
+ }
+ }
+
+ /**
+ * Predicate-driven variable set recipe.
+ */
+ final class PredicateVariableSetRecipe implements VariableSetRecipe {
+
+ private final List rules = new ArrayList<>();
+ private final VariableMetadataRecipe variableMetadataRecipe;
+ private int defaultCount = 0;
+
+ public PredicateVariableSetRecipe(VariableMetadataRecipe variableMetadataRecipe) {
+ this.variableMetadataRecipe = variableMetadataRecipe;
+ }
+
+ /**
+ * Adds a variable-count rule.
+ *
+ * @param predicate rule predicate
+ * @param variableCount variable count to use when matched
+ * @return this recipe
+ */
+ public PredicateVariableSetRecipe when(
+ Predicate predicate,
+ int variableCount
+ ) {
+ rules.add(new Rule(predicate, variableCount));
+ return this;
+ }
+
+ /**
+ * Sets the default variable count.
+ *
+ * @param variableCount default variable count
+ * @return this recipe
+ */
+ public PredicateVariableSetRecipe otherwise(int variableCount) {
+ this.defaultCount = variableCount;
+ return this;
+ }
+
+ @Override
+ public int variableCount(VariableSetBuildContext context) {
+ for (Rule rule : rules) {
+ if (rule.predicate().test(context)) {
+ return rule.variableCount();
+ }
+ }
+ return defaultCount;
+ }
+
+ @Override
+ public VariableMetadataRecipe variableMetadataRecipe() {
+ return variableMetadataRecipe;
+ }
+
+ /**
+ * Internal immutable predicate rule.
+ *
+ * @param predicate match condition
+ * @param variableCount variable count to use when matched
+ */
+ private record Rule(
+ Predicate predicate,
+ int variableCount
+ ) {
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VersionRecipe.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VersionRecipe.java
new file mode 100644
index 00000000000..450cb6940ca
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VersionRecipe.java
@@ -0,0 +1,49 @@
+package edu.harvard.iq.dataverse.util.testing.recipes;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Recipe describing how to construct a dataset version fixture.
+ *
+ * At this stage, a version recipe is mainly responsible for delegating to a
+ * {@link FileRecipe}, which controls how files in that version are created.
+ *
+ * Later, this type can be extended with more version-level concerns such as:
+ * draft/released state, timestamps, version numbering, or version-specific
+ * metadata enrichment.
+ */
+public interface VersionRecipe {
+
+ /**
+ * Returns the file recipes for this dataset version.
+ *
+ * @return recipes governing file creation for the version
+ */
+ List fileRecipes();
+
+ /**
+ * Creates a version recipe from a number of file recipes.
+ *
+ * @param fileRecipes recipes governing file creation
+ * @return a version recipe
+ */
+ static VersionRecipe of(FileRecipe... fileRecipes) {
+ Objects.requireNonNull(fileRecipes, "fileRecipes may not be null");
+ for (FileRecipe fileRecipe : fileRecipes) {
+ Objects.requireNonNull(fileRecipe, "fileRecipes must not contain null elements");
+ }
+ return new SimpleVersionRecipe(Arrays.asList(fileRecipes));
+ }
+
+ /**
+ * Minimal immutable implementation of {@link VersionRecipe}.
+ *
+ * @param fileRecipe recipe governing file creation
+ */
+ record SimpleVersionRecipe(
+ List fileRecipes
+ ) implements VersionRecipe {
+ }
+}
From e005295b3a7323a26b5f6945efca24840c40eec1 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Fri, 17 Apr 2026 03:25:52 +0200
Subject: [PATCH 02/23] test(performance): introduce integration tests for
large dataset export with JpaTestBootstrap setup #11405
Introduced `HugeDatasetExportPerformanceIT` for testing large dataset export performance. Added `JpaTestBootstrap` to streamline test setup with Testcontainers and JPA. Updated dependencies to include `datasource-proxy` for query tracking and enhanced test tooling.
Note: this doesn't work yet, as we need a fixture generator first.
---
pom.xml | 6 +
.../db/performance/JpaTestBootstrap.java | 131 ++++++++++++++++++
.../HugeDatasetExportPerformanceIT.java | 57 ++++++++
.../iq/dataverse/util/testing/Tags.java | 1 +
4 files changed, 195 insertions(+)
create mode 100644 src/test/java/edu/harvard/iq/dataverse/db/performance/JpaTestBootstrap.java
create mode 100644 src/test/java/edu/harvard/iq/dataverse/export/HugeDatasetExportPerformanceIT.java
diff --git a/pom.xml b/pom.xml
index 0ee32227abc..a2742fce95e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -764,6 +764,12 @@
3.0.0
test
+
+ net.ttddyy
+ datasource-proxy
+ 1.11.0
+ test
+
org.testcontainers
testcontainers
diff --git a/src/test/java/edu/harvard/iq/dataverse/db/performance/JpaTestBootstrap.java b/src/test/java/edu/harvard/iq/dataverse/db/performance/JpaTestBootstrap.java
new file mode 100644
index 00000000000..9c380703444
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/db/performance/JpaTestBootstrap.java
@@ -0,0 +1,131 @@
+package edu.harvard.iq.dataverse.db.performance;
+
+import jakarta.persistence.EntityManager;
+import jakarta.persistence.EntityManagerFactory;
+import jakarta.persistence.EntityTransaction;
+import jakarta.persistence.Persistence;
+import net.ttddyy.dsproxy.support.ProxyDataSourceBuilder;
+import org.postgresql.ds.PGSimpleDataSource;
+import org.testcontainers.postgresql.PostgreSQLContainer;
+
+import javax.sql.DataSource;
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.function.Consumer;
+import java.util.function.Function;
+
+public class JpaTestBootstrap implements AutoCloseable {
+
+ public static final String PERSISTENCE_UNIT = "VDCNet-ejbPU-test";
+
+ private final PostgreSQLContainer postgres;
+
+ private DataSource dataSource;
+ private EntityManagerFactory emf;
+
+ public JpaTestBootstrap(PostgreSQLContainer postgres) {
+ this.postgres = postgres;
+ }
+
+ public void start() {
+ if (emf != null) {
+ throw new IllegalStateException("JpaTestBootstrap has already been started.");
+ }
+
+ DataSource baseDataSource = createDataSource();
+ dataSource = ProxyDataSourceBuilder.create()
+ .dataSource(baseDataSource)
+ .countQuery()
+ .buildProxy();
+
+ validateDataSource(dataSource);
+
+ Map properties = new HashMap<>();
+ properties.put("jakarta.persistence.nonJtaDataSource", dataSource);
+ properties.put("jakarta.persistence.schema-generation.database.action", "create");
+
+ emf = Persistence.createEntityManagerFactory(PERSISTENCE_UNIT, properties);
+
+ validateEntityManagerFactory();
+ }
+
+ public DataSource getDataSource() {
+ ensureStarted();
+ return dataSource;
+ }
+
+ public EntityManager createEntityManager() {
+ ensureStarted();
+ return emf.createEntityManager();
+ }
+
+ public EntityManagerFactory getEntityManagerFactory() {
+ ensureStarted();
+ return emf;
+ }
+
+ public T inTransaction(Function work) {
+ EntityManager em = createEntityManager();
+ EntityTransaction tx = em.getTransaction();
+ try {
+ tx.begin();
+ T result = work.apply(em);
+ tx.commit();
+ return result;
+ } catch (RuntimeException e) {
+ if (tx.isActive()) {
+ tx.rollback();
+ }
+ throw e;
+ } finally {
+ em.close();
+ }
+ }
+
+ public void inTransactionVoid(Consumer work) {
+ inTransaction(em -> {
+ work.accept(em);
+ return null;
+ });
+ }
+
+ private DataSource createDataSource() {
+ PGSimpleDataSource pgDataSource = new PGSimpleDataSource();
+ pgDataSource.setURL(postgres.getJdbcUrl());
+ pgDataSource.setUser(postgres.getUsername());
+ pgDataSource.setPassword(postgres.getPassword());
+ return pgDataSource;
+ }
+
+ private void validateDataSource(DataSource dataSource) {
+ try (Connection connection = dataSource.getConnection()) {
+ if (!connection.isValid(5)) {
+ throw new IllegalStateException("DataSource connection is not valid.");
+ }
+ } catch (SQLException e) {
+ throw new IllegalStateException("Failed to validate DataSource.", e);
+ }
+ }
+
+ private void validateEntityManagerFactory() {
+ EntityManager entityManager = emf.createEntityManager();
+ entityManager.close();
+ }
+
+ private void ensureStarted() {
+ if (emf == null) {
+ throw new IllegalStateException("JpaTestBootstrap has not been started yet.");
+ }
+ }
+
+ @Override
+ public void close() {
+ if (emf != null && emf.isOpen()) {
+ emf.close();
+ }
+ emf = null;
+ dataSource = null;
+ }
+}
\ No newline at end of file
diff --git a/src/test/java/edu/harvard/iq/dataverse/export/HugeDatasetExportPerformanceIT.java b/src/test/java/edu/harvard/iq/dataverse/export/HugeDatasetExportPerformanceIT.java
new file mode 100644
index 00000000000..85f09f24197
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/export/HugeDatasetExportPerformanceIT.java
@@ -0,0 +1,57 @@
+package edu.harvard.iq.dataverse.export;
+
+import edu.harvard.iq.dataverse.DatasetVersion;
+import edu.harvard.iq.dataverse.db.performance.JpaTestBootstrap;
+import edu.harvard.iq.dataverse.util.testing.Tags;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.testcontainers.junit.jupiter.Testcontainers;
+import org.testcontainers.postgresql.PostgreSQLContainer;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assumptions.assumeTrue;
+
+@Tag(Tags.USES_TESTCONTAINERS)
+@Tag(Tags.PERFORMANCE_TEST)
+@Testcontainers(disabledWithoutDocker = true)
+class HugeDatasetExportPerformanceIT {
+
+ static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:16");
+ static JpaTestBootstrap jpa;
+
+ @BeforeAll
+ static void setUp() {
+ postgres.start();
+ jpa = new JpaTestBootstrap(postgres);
+ jpa.start();
+
+ // TODO: run schema migration / load fixture here
+
+
+ }
+
+ @AfterAll
+ static void tearDown() {
+ if (jpa != null) {
+ jpa.close();
+ }
+ postgres.stop();
+ }
+
+ @Test
+ void shouldExportLargeDataset() {
+ Long datasetVersionId = 123L;
+
+ String json = jpa.inTransaction(em -> {
+ var datasetVersion = em.find(DatasetVersion.class, datasetVersionId);
+ assumeTrue(datasetVersion != null, "No dataset version available in DB. Check fixtures!");
+
+ InternalExportDataProvider provider = new InternalExportDataProvider(datasetVersion);
+ return provider.getDatasetFileDetails().toString();
+ });
+
+ assertNotNull(json);
+ }
+}
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/Tags.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/Tags.java
index 22e13f08665..9ba29404c8e 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/Tags.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/Tags.java
@@ -5,4 +5,5 @@ public class Tags {
public static final String INTEGRATION_TEST = "integration";
public static final String USES_TESTCONTAINERS = "testcontainers";
public static final String DB_MIGRATION_TEST = "migration";
+ public static final String PERFORMANCE_TEST = "performance";
}
From b8b28472d4a470c9d0a8262fcf3b1afdece0c7a6 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Fri, 17 Apr 2026 03:27:02 +0200
Subject: [PATCH 03/23] chore(util): add comments highlighting design flaws in
variable metadata handling in JsonPrinter #11405
---
.../harvard/iq/dataverse/util/json/JsonPrinter.java | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java
index 3ccf616a688..cb4539b685a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java
@@ -1107,6 +1107,16 @@ public static JsonObjectBuilder json(DataVariable dv) {
.add("fileEndPosition", dv.getFileEndPosition())
.add("recordSegmentNumber", dv.getRecordSegmentNumber())
.add("numberOfDecimalPoints",dv.getNumberOfDecimalPoints())
+ // TODO: This potentially is a design flaw and huge code smell.
+ // VariableMetadata is versioned by (FileMetadata,DatasetVersion) in the (DataVariable,FileMetadata) pair.
+ // This is wrong output, as we were only interested in the one version we asked for.
+ // This is wasteful, as we load unrelated versions of the variable metadata.
+ // (For datasets with many variables and many versions, this is very bad.)
+ // This also leads to N+1 query expansions, as in the printing code we look for related entities id's and details.
+ // There are two code path leading to this: a) from exporting, b) from api.Files.getFileDataTables().
+ // -> For exports, we only are interested in a single dataset / file metadata version.
+ // -> For the API call we probably want the full details? It seems SPA related - not sure if they should provide a version.
+ //
.add("variableMetadata",jsonVarMetadata(dv.getVariableMetadatas()))
.add("invalidRanges", dv.getInvalidRanges().isEmpty() ? null : JsonPrinter.jsonInvalidRanges(dv.getInvalidRanges()))
.add("summaryStatistics", dv.getSummaryStatistics().isEmpty() ? null : JsonPrinter.jsonSumStat(dv.getSummaryStatistics()))
From c14802af9a71ffb45e3b68b2daef7cdb6ea23e51 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Mon, 27 Apr 2026 01:12:33 +0200
Subject: [PATCH 04/23] docs(testing): replace `VariableMetadata` references
with proper Javadoc links
---
.../util/testing/recipes/VariableMetadataBuildContext.java | 2 +-
.../dataverse/util/testing/recipes/VariableMetadataRecipe.java | 2 +-
.../iq/dataverse/util/testing/recipes/VariableSetRecipe.java | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataBuildContext.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataBuildContext.java
index dc39bcb04fb..f2ad493e4f0 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataBuildContext.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataBuildContext.java
@@ -2,7 +2,7 @@
/**
* Context object supplied while deciding whether a variable should receive
- * {@code VariableMetadata}.
+ * {@link edu.harvard.iq.dataverse.datavariable.VariableMetadata}.
*
* A variable metadata entry belongs to a specific pair of:
*
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataRecipe.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataRecipe.java
index 52d628c9761..f71bb670db8 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataRecipe.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableMetadataRecipe.java
@@ -3,7 +3,7 @@
import java.util.function.Predicate;
/**
- * Recipe describing whether a {@code VariableMetadata} row should be created for
+ * Recipe describing whether a {@link edu.harvard.iq.dataverse.datavariable.VariableMetadata} row should be created for
* a generated {@code (FileMetadata, DataVariable)} pair.
*
* This is modeled as a yes/no decision because the current schema enforces
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetRecipe.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetRecipe.java
index 46920e980c0..3a88eb2db94 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetRecipe.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/VariableSetRecipe.java
@@ -7,7 +7,7 @@
/**
* Recipe describing how many variables should be created for a tabular file
* or data table, and whether generated file-variable pairs should receive
- * {@code VariableMetadata}.
+ * {@link edu.harvard.iq.dataverse.datavariable.VariableMetadata}.
*/
public interface VariableSetRecipe {
From 22983d8c7667aadadc1c035d6667a6b0342e3e4b Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Mon, 27 Apr 2026 01:15:59 +0200
Subject: [PATCH 05/23] test(fixtures): add method to populate
`VariableMetadata` in fixture builders
---
.../testing/fixtures/FixturePopulator.java | 18 +++++++++++++-----
.../testing/fixtures/MinimalPopulator.java | 16 ++++++++++++++++
2 files changed, 29 insertions(+), 5 deletions(-)
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/FixturePopulator.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/FixturePopulator.java
index 408a9a4ffa2..168ae666b10 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/FixturePopulator.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/FixturePopulator.java
@@ -5,16 +5,13 @@
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.FileMetadata;
-import edu.harvard.iq.dataverse.TermsOfUseAndAccess;
import edu.harvard.iq.dataverse.datavariable.DataVariable;
import edu.harvard.iq.dataverse.datavariable.VarGroup;
+import edu.harvard.iq.dataverse.datavariable.VariableMetadata;
import edu.harvard.iq.dataverse.util.testing.recipes.FileBuildContext;
+import edu.harvard.iq.dataverse.util.testing.recipes.VariableMetadataBuildContext;
import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetBuildContext;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.HashSet;
-
/**
* Populator interface responsible for initializing scalar/non-relationship fields of
* generated fixture entities.
@@ -85,6 +82,17 @@ void populateDataVariable(
int variableIndex,
DatasetFixtureBuilder.BuildContext context
);
+
+ /**
+ * Populates scalar fields and safe defaults for metadata of a variable.
+ *
+ * @param metadata variable metadata being initialized
+ * @param variableMetadataBuildContext variable metadata build context
+ */
+ void populateVariableMetadata(
+ VariableMetadata metadata,
+ VariableMetadataBuildContext variableMetadataBuildContext
+ );
/**
* Populates scalar fields and safe defaults for a variable group.
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java
index 12ff2b300b7..0d8c07c53ae 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java
@@ -9,7 +9,9 @@
import edu.harvard.iq.dataverse.dataset.DatasetType;
import edu.harvard.iq.dataverse.datavariable.DataVariable;
import edu.harvard.iq.dataverse.datavariable.VarGroup;
+import edu.harvard.iq.dataverse.datavariable.VariableMetadata;
import edu.harvard.iq.dataverse.util.testing.recipes.FileBuildContext;
+import edu.harvard.iq.dataverse.util.testing.recipes.VariableMetadataBuildContext;
import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetBuildContext;
import java.util.ArrayList;
@@ -131,6 +133,20 @@ public void populateDataVariable(
dataVariable.setInvalidRangeItems(new ArrayList<>());
}
+ /**
+ * Populates metadata for a data variable. Updates the label with a unique identifier
+ * generated based on the provided build context.
+ *
+ * @param metadata the variable metadata object to be populated
+ * @param variableMetadataBuildContext the context containing information about
+ * the variable, including file and variable indices
+ */
+ @Override
+ public void populateVariableMetadata(VariableMetadata metadata, VariableMetadataBuildContext variableMetadataBuildContext) {
+ metadata.setLabel("variable-metadata-" + variableMetadataBuildContext.fileIndex() +
+ "-" + variableMetadataBuildContext.variableIndex());
+ }
+
/**
* Populates basic variable-group scalar fields and initializes the backing
* variable set.
From 68c8b9f09c03a6fb3e265d822dd580136b178def Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Mon, 27 Apr 2026 01:18:42 +0200
Subject: [PATCH 06/23] refactor(fixtures): extract `BuildContext` and
`FileBuildContext` into dedicated classes
Simplified method signatures and improved maintainability by decoupling builder-internal types from fixture populators. Updated references across the codebase to use the new context classes.
---
.../util/testing/fixtures/BuildContext.java | 16 +++++++++++++
.../fixtures/DatasetFixtureBuilder.java | 4 ----
.../testing/fixtures/FixturePopulator.java | 24 +++++++++----------
.../testing/fixtures/MinimalPopulator.java | 24 +++++++++----------
.../testing/recipes/FileBuildContext.java | 15 ++++++++++++
5 files changed, 55 insertions(+), 28 deletions(-)
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/BuildContext.java
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/FileBuildContext.java
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/BuildContext.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/BuildContext.java
new file mode 100644
index 00000000000..26dfe342ad9
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/BuildContext.java
@@ -0,0 +1,16 @@
+package edu.harvard.iq.dataverse.util.testing.fixtures;
+
+/**
+ * Immutable build context shared across a single fixture build.
+ *
+ * This object exists so populators and helpers do not have to depend on
+ * builder-internal types. As more cross-cutting build information is needed
+ * (for example version index, deterministic seed, or builder configuration),
+ * it can be added here without changing populator method signatures.
+ *
+ * @param sequence deterministic sequence number for the fixture instance
+ */
+public record BuildContext(
+ long sequence
+) {
+}
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java
index 320e61735d1..245d6d286de 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java
@@ -248,8 +248,4 @@ private void wireVarGroup(FileMetadata fileMetadata, VarGroup varGroup, List());
@@ -72,11 +72,11 @@ public void populateFileMetadata(FileMetadata fileMetadata, FileBuildContext fil
* Populates basic data-file scalar fields and null-sensitive defaults.
*
* @param dataFile data file being initialized
- * @param fileIndex zero-based file index
+ * @param fileBuildContext file build context
* @param context fixture build context
*/
@Override
- public void populateDataFile(DataFile dataFile, FileBuildContext fileBuildContext, DatasetFixtureBuilder.BuildContext context) {
+ public void populateDataFile(DataFile dataFile, FileBuildContext fileBuildContext, BuildContext context) {
dataFile.setContentType("text/tab-separated-values");
dataFile.setChecksumType(DataFile.ChecksumType.SHA1);
dataFile.setChecksumValue("fixture-checksum-" + fileBuildContext.fileIndex());
@@ -90,11 +90,11 @@ public void populateDataFile(DataFile dataFile, FileBuildContext fileBuildContex
* Populates basic data-table scalar fields and variable collection defaults.
*
* @param dataTable data table being initialized
- * @param fileIndex zero-based file index
+ * @param fileBuildContext file build context
* @param context fixture build context
*/
@Override
- public void populateDataTable(DataTable dataTable, FileBuildContext fileBuildContext, DatasetFixtureBuilder.BuildContext context) {
+ public void populateDataTable(DataTable dataTable, FileBuildContext fileBuildContext, BuildContext context) {
dataTable.setVarQuantity(0L);
dataTable.setCaseQuantity(100L);
dataTable.setRecordsPerCase(1L);
@@ -110,7 +110,7 @@ public void populateDataTable(DataTable dataTable, FileBuildContext fileBuildCon
* that are null-sensitive in serialization.
*
* @param dataVariable data variable being initialized
- * @param fileIndex zero-based file index
+ * @param variableSetBuildContext larger context of the data variable being populated
* @param variableIndex zero-based variable index within the file/table
* @param context fixture build context
*/
@@ -119,7 +119,7 @@ public void populateDataVariable(
DataVariable dataVariable,
VariableSetBuildContext variableSetBuildContext,
int variableIndex,
- DatasetFixtureBuilder.BuildContext context
+ BuildContext context
) {
dataVariable.setName("var_" + variableSetBuildContext.fileIndex() + "_" + variableIndex);
dataVariable.setLabel("Variable " + variableSetBuildContext.fileIndex() + "/" + variableIndex);
@@ -152,7 +152,7 @@ public void populateVariableMetadata(VariableMetadata metadata, VariableMetadata
* variable set.
*
* @param varGroup var group being initialized
- * @param fileIndex zero-based file index
+ * @param fileBuildContext file build context
* @param groupIndex zero-based group index within the file
* @param context fixture build context
*/
@@ -161,7 +161,7 @@ public void populateVarGroup(
VarGroup varGroup,
FileBuildContext fileBuildContext,
int groupIndex,
- DatasetFixtureBuilder.BuildContext context
+ BuildContext context
) {
varGroup.setLabel("group-" + fileBuildContext.fileIndex() + "-" + groupIndex);
varGroup.setVarsInGroup(new HashSet<>());
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/FileBuildContext.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/FileBuildContext.java
new file mode 100644
index 00000000000..cad16f95055
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/FileBuildContext.java
@@ -0,0 +1,15 @@
+package edu.harvard.iq.dataverse.util.testing.recipes;
+
+/**
+ * Context object supplied while deciding how to build a file fixture.
+ *
+ * For now this context only exposes the file index and the recipe which ordered the creation of the file.
+ * It exists as a dedicated type, so the API can grow later without constantly changing method signatures.
+ *
+ * @param fileIndex zero-based index of the file being created within a version
+ */
+public record FileBuildContext(
+ FileRecipe fileRecipe,
+ int fileIndex
+) {
+}
From 188848a038baae596e440d48088bf17bb2e9b464 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Mon, 27 Apr 2026 01:19:55 +0200
Subject: [PATCH 07/23] test(fixtures): replace `System.out.println` with
`toString` in `DatasetFixtureTest`
Avoid measuring the speed of writing to the stream, while only the speed of converting the JSON in-memory model to a String is of interest.
---
.../iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java
index 9bbfa769d39..b9a23cdb64c 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java
@@ -68,7 +68,7 @@ void smoketest() {
assertNotNull(result);
start = Instant.now();
- System.out.println(result);
+ result.toString();
finish = Instant.now();
System.out.println("print: " + Duration.between(start, finish).toMillis() + " msec");
From 1ef86a40e7a80a5c0aaa86874c4e5a8f2f74a8ec Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Mon, 27 Apr 2026 01:20:26 +0200
Subject: [PATCH 08/23] feat(fixtures): extend `DatasetFixture` to include
`VariableMetadata` support
---
.../util/testing/fixtures/DatasetFixture.java | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixture.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixture.java
index a1c533e389a..50bc3dbc60f 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixture.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixture.java
@@ -7,6 +7,7 @@
import edu.harvard.iq.dataverse.FileMetadata;
import edu.harvard.iq.dataverse.datavariable.DataVariable;
import edu.harvard.iq.dataverse.datavariable.VarGroup;
+import edu.harvard.iq.dataverse.datavariable.VariableMetadata;
import java.util.List;
@@ -18,13 +19,17 @@
* generated child entities. That makes it easier to inspect, persist, or tweak
* the graph after building it.
*
+ * The fixture currently represents a single dataset version. Multi-version
+ * support will be added in a later iteration via dedicated evolution recipes.
+ *
* @param dataset root dataset
* @param currentVersion current dataset version
* @param fileMetadatas generated file metadata objects
* @param dataFiles generated data files
* @param dataTables generated data tables
* @param dataVariables generated data variables
- * @param varGroups generated var groups
+ * @param varGroups generated variable groups
+ * @param variableMetadata generated variable metadata rows
*/
public record DatasetFixture(
Dataset dataset,
@@ -33,7 +38,8 @@ public record DatasetFixture(
List dataFiles,
List dataTables,
List dataVariables,
- List varGroups
+ List varGroups,
+ List variableMetadata
) {
/**
@@ -45,5 +51,6 @@ public record DatasetFixture(
dataTables = List.copyOf(dataTables);
dataVariables = List.copyOf(dataVariables);
varGroups = List.copyOf(varGroups);
+ variableMetadata = List.copyOf(variableMetadata);
}
}
\ No newline at end of file
From 69e2dd7bc479a08eccadff72cb13dbd18ba2db4e Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Mon, 27 Apr 2026 01:22:02 +0200
Subject: [PATCH 09/23] refactor(fixtures): restructure `DatasetFixtureBuilder`
to improve modularity and add robust variable metadata handling
---
.../fixtures/DatasetFixtureBuilder.java | 463 +++++++++++++-----
1 file changed, 354 insertions(+), 109 deletions(-)
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java
index 245d6d286de..f27af7f396c 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java
@@ -7,9 +7,12 @@
import edu.harvard.iq.dataverse.FileMetadata;
import edu.harvard.iq.dataverse.datavariable.DataVariable;
import edu.harvard.iq.dataverse.datavariable.VarGroup;
+import edu.harvard.iq.dataverse.datavariable.VariableMetadata;
import edu.harvard.iq.dataverse.util.testing.recipes.DatasetRecipe;
import edu.harvard.iq.dataverse.util.testing.recipes.FileBuildContext;
import edu.harvard.iq.dataverse.util.testing.recipes.FileRecipe;
+import edu.harvard.iq.dataverse.util.testing.recipes.VariableMetadataBuildContext;
+import edu.harvard.iq.dataverse.util.testing.recipes.VariableMetadataRecipe;
import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetBuildContext;
import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetRecipe;
import edu.harvard.iq.dataverse.util.testing.recipes.VersionRecipe;
@@ -33,15 +36,28 @@
* - one (current) dataset version
* - files created according to {@link FileRecipe}
* - tabular structure created according to {@link VariableSetRecipe}
+ * - variable metadata created according to {@link VariableMetadataRecipe}
*
*/
public class DatasetFixtureBuilder {
-
+
+ /**
+ * Process-wide deterministic sequence used to identify each built fixture.
+ *
+ * This is intentionally static so values are unique even across multiple
+ * tests running in the same JVM. It is not meant to be reset between tests.
+ */
private static final AtomicLong SEQUENCE = new AtomicLong(1);
-
- private DatasetRecipe recipe;
+
+ /**
+ * Group index used for the single var group we currently create per tabular file.
+ * This will become recipe-driven once a {@code VarGroupRecipe} is introduced.
+ */
+ private static final int FIRST_AND_ONLY_VAR_GROUP_INDEX = 0;
+
+ private DatasetRecipe datasetRecipe;
private FixturePopulator populator = FixturePopulator.minimal();
-
+
/**
* Creates a new builder instance.
*
@@ -50,148 +66,321 @@ public class DatasetFixtureBuilder {
public static DatasetFixtureBuilder builder() {
return new DatasetFixtureBuilder();
}
-
+
/**
* Sets the recipe used to determine the graph shape.
*
- * @param recipe dataset recipe to use
+ * @param datasetRecipe dataset recipe to use
* @return this builder for fluent chaining
*/
- public DatasetFixtureBuilder recipe(DatasetRecipe recipe) {
- this.recipe = Objects.requireNonNull(recipe);
+ public DatasetFixtureBuilder recipe(DatasetRecipe datasetRecipe) {
+ this.datasetRecipe = Objects.requireNonNull(datasetRecipe);
return this;
}
-
+
/**
- * Sets the scalar-field defaults policy.
+ * Sets the scalar-field populator policy.
*
- * @param populator defaults policy to use
+ * @param populator populator to use
* @return this builder for fluent chaining
*/
public DatasetFixtureBuilder populator(FixturePopulator populator) {
this.populator = Objects.requireNonNull(populator);
return this;
}
-
+
/**
* Builds a dataset fixture graph according to the configured recipe and populator.
*
+ * The build process happens in clearly separated phases:
+ *
+ * - create the root {@link Dataset} and its current {@link DatasetVersion}
+ * - iterate over the configured file recipes and build each file (and, where applicable,
+ * its tabular subgraph)
+ * - collect everything that was created so the {@link DatasetFixture} can expose it
+ *
+ *
* @return generated dataset fixture
*/
public DatasetFixture build() {
- if (recipe == null) {
+ if (datasetRecipe == null) {
throw new IllegalStateException("A DatasetRecipe must be configured before building.");
}
-
+ Objects.requireNonNull(populator, "populator must not be null");
+
+ // One context per build, so populators can use deterministic information about this fixture instance.
BuildContext context = new BuildContext(SEQUENCE.getAndIncrement());
-
+
+ // Create the top-level dataset and its current version, then wire them.
+ Dataset dataset = createEmptyDataset(context);
+ DatasetVersion currentVersion = createDatasetVersion(context);
+ wireDatasetAndVersion(dataset, currentVersion);
+
+ // Accumulator collects everything we generate so we can expose it in the fixture.
+ BuildAccumulator accumulator = new BuildAccumulator();
+
+ // Walk the file recipes and create files (plus tabular structure where applicable).
+ buildVersionFiles(currentVersion, context, accumulator);
+
+ return accumulator.toFixture(dataset, currentVersion);
+ }
+
+ /**
+ * Creates a {@link Dataset} with no implicit versions.
+ *
+ * {@code Dataset} normally creates an initial version automatically. For fixtures we want
+ * full control over which versions exist, so we wipe that initial version before wiring.
+ *
+ * @param context fixture build context
+ * @return a freshly populated dataset with an empty version list
+ */
+ private Dataset createEmptyDataset(BuildContext context) {
Dataset dataset = new Dataset();
populator.populateDataset(dataset, context);
-
- DatasetVersion currentVersion = new DatasetVersion();
- populator.populateDatasetVersion(currentVersion, context);
-
- // The constructor of Dataset implicitely creates a new version. Get rid of it before we wire ours.
dataset.setVersions(new ArrayList<>());
- wireDatasetAndVersion(dataset, currentVersion);
-
- List fileMetadatas = new ArrayList<>();
- List dataFiles = new ArrayList<>();
- List dataTables = new ArrayList<>();
- List dataVariables = new ArrayList<>();
- List varGroups = new ArrayList<>();
-
- VersionRecipe versionRecipe = recipe.currentVersionRecipe();
+ return dataset;
+ }
+
+ /**
+ * Creates a {@link DatasetVersion} populated by the configured populator.
+ *
+ * @param context fixture build context
+ * @return a freshly populated dataset version
+ */
+ private DatasetVersion createDatasetVersion(BuildContext context) {
+ DatasetVersion version = new DatasetVersion();
+ populator.populateDatasetVersion(version, context);
+ return version;
+ }
+
+ /**
+ * Iterates over all file recipes for the current version and builds each file in order.
+ *
+ * Each file gets a globally unique index across all file recipes in the version. That
+ * keeps populator-generated values such as labels deterministic and unique across the
+ * whole version.
+ *
+ * @param currentVersion current dataset version receiving the files
+ * @param context fixture build context
+ * @param accumulator accumulator collecting all generated entities
+ */
+ private void buildVersionFiles(
+ DatasetVersion currentVersion,
+ BuildContext context,
+ BuildAccumulator accumulator
+ ) {
+ VersionRecipe versionRecipe = datasetRecipe.currentVersionRecipe();
List fileRecipes = versionRecipe.fileRecipes();
+ // Files within a single version need globally unique indices, even though each recipe
+ // describes its own count. We track that separately from the recipe-local index.
int globalFileIndex = 0;
+
for (FileRecipe fileRecipe : fileRecipes) {
for (int fileIndex = 0; fileIndex < fileRecipe.fileCount(); fileIndex++, globalFileIndex++) {
FileBuildContext fileContext = new FileBuildContext(fileRecipe, globalFileIndex);
-
- DataFile dataFile = new DataFile();
- populator.populateDataFile(dataFile, fileContext, context);
-
- FileMetadata fileMetadata = new FileMetadata();
- populator.populateFileMetadata(fileMetadata, fileContext, context);
-
- wireFileMetadata(currentVersion, fileMetadata, dataFile);
- fileMetadatas.add(fileMetadata);
- dataFiles.add(dataFile);
-
-
- TODO
-
- - create and wire in variable metadata
- - populate the variable metadata (extend interface, too)
- - add lots more inline comments to this method, maybe split some
- - add missing java docs for some classes
- - create a version evolution, create builders and populators
- - make datasetfixture respect versions when retrieving collections. add convenience methods pointing to current version.
-
- if (fileRecipe instanceof FileRecipe.Tabular tabularRecipe) {
- DataTable dataTable = new DataTable();
- populator.populateDataTable(dataTable, fileContext, context);
- wireDataTable(dataFile, dataTable);
- dataTables.add(dataTable);
-
- var variableSetContext = new VariableSetBuildContext(tabularRecipe, globalFileIndex);
-
- VariableSetRecipe variableSetRecipe = tabularRecipe.variableSetRecipe();
- int variableCount = variableSetRecipe.variableCount(variableSetContext);
-
- List fileVariables = new ArrayList<>(variableCount);
-
- for (int variableIndex = 0; variableIndex < variableCount; variableIndex++) {
- DataVariable dataVariable = new DataVariable();
- populator.populateDataVariable(dataVariable, variableSetContext, variableIndex, context);
- wireDataVariable(dataTable, dataVariable);
- fileVariables.add(dataVariable);
- dataVariables.add(dataVariable);
- }
-
- dataTable.setVarQuantity((long) variableCount);
-
- if (!fileVariables.isEmpty()) {
- VarGroup varGroup = new VarGroup();
- populator.populateVarGroup(varGroup, fileContext, 0, context);
- wireVarGroup(fileMetadata, varGroup, fileVariables);
- varGroups.add(varGroup);
- }
- }
+ buildFile(currentVersion, fileRecipe, fileContext, context, accumulator);
}
}
-
- return new DatasetFixture(
- dataset,
- currentVersion,
- fileMetadatas,
- dataFiles,
- dataTables,
- dataVariables,
- varGroups
+ }
+
+ /**
+ * Builds a single file: a {@link DataFile} and its current {@link FileMetadata}, plus its
+ * tabular subgraph if the recipe says the file is tabular.
+ *
+ * @param currentVersion owning dataset version
+ * @param fileRecipe the file recipe describing this file
+ * @param fileContext context describing this individual file
+ * @param context fixture build context
+ * @param accumulator accumulator collecting all generated entities
+ */
+ private void buildFile(
+ DatasetVersion currentVersion,
+ FileRecipe fileRecipe,
+ FileBuildContext fileContext,
+ BuildContext context,
+ BuildAccumulator accumulator
+ ) {
+ // Always create the data file plus its current-version file metadata.
+ DataFile dataFile = new DataFile();
+ populator.populateDataFile(dataFile, fileContext, context);
+
+ FileMetadata fileMetadata = new FileMetadata();
+ populator.populateFileMetadata(fileMetadata, fileContext, context);
+
+ wireFileMetadata(currentVersion, fileMetadata, dataFile);
+ accumulator.addDataFile(dataFile);
+ accumulator.addFileMetadata(fileMetadata);
+
+ // Tabular structure is only created when the recipe says so.
+ if (fileRecipe instanceof FileRecipe.Tabular tabularRecipe) {
+ buildTabularStructure(tabularRecipe, fileContext, dataFile, fileMetadata, context, accumulator);
+ }
+ }
+
+ /**
+ * Builds the tabular structure for a file: one {@link DataTable}, its variables, the
+ * variable metadata, and the var group.
+ *
+ * @param tabularRecipe tabular file recipe
+ * @param fileContext file build context
+ * @param dataFile owning data file
+ * @param fileMetadata current-version file metadata of the data file
+ * @param context fixture build context
+ * @param accumulator accumulator collecting all generated entities
+ */
+ private void buildTabularStructure(
+ FileRecipe.Tabular tabularRecipe,
+ FileBuildContext fileContext,
+ DataFile dataFile,
+ FileMetadata fileMetadata,
+ BuildContext context,
+ BuildAccumulator accumulator
+ ) {
+ DataTable dataTable = new DataTable();
+ populator.populateDataTable(dataTable, fileContext, context);
+ wireDataTable(dataFile, dataTable);
+ accumulator.addDataTable(dataTable);
+
+ VariableSetBuildContext variableSetContext =
+ new VariableSetBuildContext(tabularRecipe, fileContext.fileIndex());
+
+ VariableSetRecipe variableSetRecipe = tabularRecipe.variableSetRecipe();
+ int variableCount = variableSetRecipe.variableCount(variableSetContext);
+
+ // Build all variables for this table, then optionally attach variable metadata
+ // to (FileMetadata, DataVariable) pairs that the recipe says should have it.
+ List fileVariables = buildVariables(
+ dataTable,
+ variableSetContext,
+ variableCount,
+ context,
+ accumulator
+ );
+
+ dataTable.setVarQuantity((long) variableCount);
+
+ buildVariableMetadata(
+ fileMetadata,
+ fileVariables,
+ tabularRecipe,
+ variableSetRecipe.variableMetadataRecipe(),
+ fileContext.fileIndex(),
+ accumulator
);
+
+ // Currently every non-empty tabular file gets exactly one var group with all variables.
+ // This will become recipe-driven once we introduce a dedicated VarGroupRecipe.
+ if (!fileVariables.isEmpty()) {
+ buildVarGroup(fileMetadata, fileVariables, fileContext, context, accumulator);
+ }
}
-
+
/**
- * Wires a dataset and its current version together.
+ * Creates the requested number of {@link DataVariable} entities and wires them to the table.
+ *
+ * @param dataTable owning data table
+ * @param variableSetContext variable-set build context
+ * @param variableCount number of variables to create
+ * @param context fixture build context
+ * @param accumulator accumulator collecting all generated entities
+ * @return the variables created for this file/table, in order
+ */
+ private List buildVariables(
+ DataTable dataTable,
+ VariableSetBuildContext variableSetContext,
+ int variableCount,
+ BuildContext context,
+ BuildAccumulator accumulator
+ ) {
+ List fileVariables = new ArrayList<>(variableCount);
+
+ for (int variableIndex = 0; variableIndex < variableCount; variableIndex++) {
+ DataVariable dataVariable = new DataVariable();
+ populator.populateDataVariable(dataVariable, variableSetContext, variableIndex, context);
+ wireDataVariable(dataTable, dataVariable);
+
+ fileVariables.add(dataVariable);
+ accumulator.addDataVariable(dataVariable);
+ }
+
+ return fileVariables;
+ }
+
+ /**
+ * Creates {@link VariableMetadata} rows for the (file metadata, variable) pairs the recipe
+ * says should receive metadata.
*
- * This method centralizes the relationship setup between dataset and
- * version. If your concrete {@code Dataset} API maintains versions differently,
- * this is the place to adapt.
+ * Each metadata entity links one {@link DataVariable} and one {@link FileMetadata}.
+ * Because the schema enforces uniqueness on that pair, we create at most one metadata
+ * row per variable for the given file metadata.
+ *
+ * @param fileMetadata file metadata for the current version
+ * @param fileVariables variables in the file's tabular structure
+ * @param tabularRecipe tabular file recipe
+ * @param metadataRecipe variable-metadata recipe deciding which pairs get metadata
+ * @param fileIndex zero-based file index
+ * @param accumulator accumulator collecting all generated entities
+ */
+ private void buildVariableMetadata(
+ FileMetadata fileMetadata,
+ List fileVariables,
+ FileRecipe.Tabular tabularRecipe,
+ VariableMetadataRecipe metadataRecipe,
+ int fileIndex,
+ BuildAccumulator accumulator
+ ) {
+ for (int variableIndex = 0; variableIndex < fileVariables.size(); variableIndex++) {
+ VariableMetadataBuildContext metadataContext =
+ new VariableMetadataBuildContext(tabularRecipe, fileIndex, variableIndex);
+
+ if (!metadataRecipe.createFor(metadataContext)) {
+ continue;
+ }
+
+ DataVariable variable = fileVariables.get(variableIndex);
+ VariableMetadata metadata = new VariableMetadata(variable, fileMetadata);
+ populator.populateVariableMetadata(metadata, metadataContext);
+
+ wireVariableMetadata(fileMetadata, variable, metadata);
+ accumulator.addVariableMetadata(metadata);
+ }
+ }
+
+ /**
+ * Creates a {@link VarGroup} containing all variables of a tabular file and attaches it
+ * to the file metadata.
+ *
+ * @param fileMetadata file metadata receiving the var group
+ * @param fileVariables variables in the file's tabular structure
+ * @param fileContext file build context
+ * @param context fixture build context
+ * @param accumulator accumulator collecting all generated entities
+ */
+ private void buildVarGroup(
+ FileMetadata fileMetadata,
+ List fileVariables,
+ FileBuildContext fileContext,
+ BuildContext context,
+ BuildAccumulator accumulator
+ ) {
+ VarGroup varGroup = new VarGroup();
+ populator.populateVarGroup(varGroup, fileContext, FIRST_AND_ONLY_VAR_GROUP_INDEX, context);
+ wireVarGroup(fileMetadata, varGroup, fileVariables);
+ accumulator.addVarGroup(varGroup);
+ }
+
+ /**
+ * Wires a dataset and its current version together.
*
* @param dataset dataset root
* @param version current dataset version
*/
private void wireDatasetAndVersion(Dataset dataset, DatasetVersion version) {
version.setDataset(dataset);
-
- if (dataset.getVersions() == null) {
- dataset.setVersions(new ArrayList<>());
- }
dataset.getVersions().add(version);
}
-
+
/**
* Wires file metadata to its dataset version and underlying data file.
*
@@ -202,11 +391,10 @@ private void wireDatasetAndVersion(Dataset dataset, DatasetVersion version) {
private void wireFileMetadata(DatasetVersion datasetVersion, FileMetadata fileMetadata, DataFile dataFile) {
fileMetadata.setDatasetVersion(datasetVersion);
fileMetadata.setDataFile(dataFile);
-
datasetVersion.getFileMetadatas().add(fileMetadata);
dataFile.getFileMetadatas().add(fileMetadata);
}
-
+
/**
* Wires a data table to its data file.
*
@@ -217,7 +405,7 @@ private void wireDataTable(DataFile dataFile, DataTable dataTable) {
dataTable.setDataFile(dataFile);
dataFile.getDataTables().add(dataTable);
}
-
+
/**
* Wires a data variable to its data table.
*
@@ -228,10 +416,9 @@ private void wireDataVariable(DataTable dataTable, DataVariable dataVariable) {
dataVariable.setDataTable(dataTable);
dataTable.getDataVariables().add(dataVariable);
}
-
+
/**
- * Wires a variable group to file metadata and assigns the supplied variables
- * to that group.
+ * Wires a variable group to file metadata and assigns the supplied variables to that group.
*
* @param fileMetadata owning file metadata
* @param varGroup variable group to wire
@@ -242,10 +429,68 @@ private void wireVarGroup(FileMetadata fileMetadata, VarGroup varGroup, ListThis keeps the build helper methods compact and avoids passing many lists around.
*/
-}
+ private static final class BuildAccumulator {
+
+ private final List fileMetadatas = new ArrayList<>();
+ private final List dataFiles = new ArrayList<>();
+ private final List dataTables = new ArrayList<>();
+ private final List dataVariables = new ArrayList<>();
+ private final List varGroups = new ArrayList<>();
+ private final List variableMetadata = new ArrayList<>();
+
+ void addFileMetadata(FileMetadata fileMetadata) {
+ fileMetadatas.add(fileMetadata);
+ }
+
+ void addDataFile(DataFile dataFile) {
+ dataFiles.add(dataFile);
+ }
+
+ void addDataTable(DataTable dataTable) {
+ dataTables.add(dataTable);
+ }
+
+ void addDataVariable(DataVariable dataVariable) {
+ dataVariables.add(dataVariable);
+ }
+
+ void addVarGroup(VarGroup varGroup) {
+ varGroups.add(varGroup);
+ }
+
+ void addVariableMetadata(VariableMetadata metadata) {
+ variableMetadata.add(metadata);
+ }
+
+ DatasetFixture toFixture(Dataset dataset, DatasetVersion currentVersion) {
+ return new DatasetFixture(
+ dataset,
+ currentVersion,
+ fileMetadatas,
+ dataFiles,
+ dataTables,
+ dataVariables,
+ varGroups,
+ variableMetadata
+ );
+ }
+ }
+}
\ No newline at end of file
From 5e2589c9b47e77da5484e2e39cba8b7de888751a Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Mon, 27 Apr 2026 10:41:25 +0200
Subject: [PATCH 10/23] test(fixtures): initialize MPCONFIG for `JvmSettings`
in `DatasetFixtureTest` setup
Otherwise the initialization will be counted towards the execution time of building the POJOs.
---
.../dataverse/util/testing/fixtures/DatasetFixtureTest.java | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java
index b9a23cdb64c..4c5da17e3a3 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java
@@ -3,6 +3,7 @@
import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.FileMetadata;
import edu.harvard.iq.dataverse.branding.BrandingUtilTest;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.util.json.JsonPrinter;
import edu.harvard.iq.dataverse.util.testing.recipes.DatasetRecipe;
import edu.harvard.iq.dataverse.util.testing.recipes.FileRecipe;
@@ -24,6 +25,8 @@ class DatasetFixtureTest {
@BeforeAll
static void setUp() {
BrandingUtilTest.setupMocks();
+ // Let MPCONFIG init and cache the lookup classes
+ JvmSettings.PREFIX.lookupOptional();
}
@AfterAll
@@ -71,7 +74,6 @@ void smoketest() {
result.toString();
finish = Instant.now();
System.out.println("print: " + Duration.between(start, finish).toMillis() + " msec");
-
}
}
From 847c92387f8c30b398a328f2d2eb22e08e5d3651 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Mon, 27 Apr 2026 10:42:26 +0200
Subject: [PATCH 11/23] test(fixtures): adjust smoketest size to be more
manageable
We are only interested in the smoke. We will create more distinct performance related tests.
---
.../iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java
index 4c5da17e3a3..fdb5f674de6 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java
@@ -39,7 +39,7 @@ void smoketest() {
var recipe = DatasetRecipe.of(
VersionRecipe.of(
- FileRecipe.tabular(1000, VariableSetRecipe.uniform(10000)),
+ FileRecipe.tabular(10, VariableSetRecipe.uniform(10)),
//FileRecipe.tabular(50, VariableSetRecipe.byPredicate()),
//FileRecipe.tabular(50, VariableSetRecipe.byRandom(10, 1000, 12345)),
FileRecipe.regular(1)
From 278b7ca9bdc58b4e9ec00b3d178ccbce91ff1266 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Mon, 27 Apr 2026 16:32:38 +0200
Subject: [PATCH 12/23] fix(fixtures): ensure bidirectional linkage between
`DatasetVersion` and `TermsOfUseAndAccess` in `MinimalPopulator`
---
.../dataverse/util/testing/fixtures/MinimalPopulator.java | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java
index 01e23eb75bb..2b158a64c8a 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java
@@ -50,7 +50,13 @@ public void populateDatasetVersion(DatasetVersion version, BuildContext context)
version.setVersionNote("fixture-version");
version.setCreateTime(now);
version.setLastUpdateTime(now);
- version.setTermsOfUseAndAccess(new TermsOfUseAndAccess());
+
+ // TermsOfUseAndAccess and DatasetVersion are mutually linked via a OneToOne.
+ // The validator reads datasetVersion from the terms object, so both sides
+ // must be wired before the entity graph is persisted.
+ TermsOfUseAndAccess terms = new TermsOfUseAndAccess();
+ terms.setDatasetVersion(version);
+ version.setTermsOfUseAndAccess(terms);
}
/**
From 2636065be5ca8efba0c2c54479f5b9c552aff15c Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Mon, 27 Apr 2026 16:35:15 +0200
Subject: [PATCH 13/23] fix(fixtures): ensure timestamps are explicitly set for
required `DvObject` properties in fixture builders (Dataset and DataFile)
---
.../util/testing/fixtures/BuildContext.java | 16 +++++++++++++++-
.../testing/fixtures/DatasetFixtureBuilder.java | 3 ++-
.../util/testing/fixtures/MinimalPopulator.java | 14 ++++++++++----
3 files changed, 27 insertions(+), 6 deletions(-)
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/BuildContext.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/BuildContext.java
index 26dfe342ad9..f6019d63e9a 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/BuildContext.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/BuildContext.java
@@ -1,5 +1,9 @@
package edu.harvard.iq.dataverse.util.testing.fixtures;
+import java.sql.Timestamp;
+import java.time.Instant;
+import java.util.Date;
+
/**
* Immutable build context shared across a single fixture build.
*
@@ -11,6 +15,16 @@
* @param sequence deterministic sequence number for the fixture instance
*/
public record BuildContext(
- long sequence
+ long sequence,
+ Instant now
) {
+
+ Timestamp getTimestamp() {
+ return Timestamp.from(now);
+ }
+
+ Date getDate() {
+ return Date.from(now);
+ }
+
}
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java
index f27af7f396c..46984953431 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java
@@ -17,6 +17,7 @@
import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetRecipe;
import edu.harvard.iq.dataverse.util.testing.recipes.VersionRecipe;
+import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
@@ -109,7 +110,7 @@ public DatasetFixture build() {
Objects.requireNonNull(populator, "populator must not be null");
// One context per build, so populators can use deterministic information about this fixture instance.
- BuildContext context = new BuildContext(SEQUENCE.getAndIncrement());
+ BuildContext context = new BuildContext(SEQUENCE.getAndIncrement(), Instant.now());
// Create the top-level dataset and its current version, then wire them.
Dataset dataset = createEmptyDataset(context);
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java
index 2b158a64c8a..9ced493d6ad 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java
@@ -15,7 +15,6 @@
import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetBuildContext;
import java.util.ArrayList;
-import java.util.Date;
import java.util.HashSet;
public final class MinimalPopulator implements FixturePopulator {
@@ -33,6 +32,10 @@ public void populateDataset(Dataset dataset, BuildContext context) {
dataset.setIdentifier("fixture-dataset-" + context.sequence());
dataset.setStorageIdentifier("fixture-storage-" + context.sequence());
dataset.setDatasetType(new DatasetType());
+
+ // necessary as DvObject says "not nullable"
+ dataset.setCreateDate(context.getTimestamp());
+ dataset.setModificationTime(context.getTimestamp());
}
/**
@@ -43,13 +46,12 @@ public void populateDataset(Dataset dataset, BuildContext context) {
*/
@Override
public void populateDatasetVersion(DatasetVersion version, BuildContext context) {
- Date now = new Date();
version.setVersionNumber(1L);
version.setMinorVersionNumber(0L);
version.setVersionState(DatasetVersion.VersionState.DRAFT);
version.setVersionNote("fixture-version");
- version.setCreateTime(now);
- version.setLastUpdateTime(now);
+ version.setCreateTime(context.getDate());
+ version.setLastUpdateTime(context.getDate());
// TermsOfUseAndAccess and DatasetVersion are mutually linked via a OneToOne.
// The validator reads datasetVersion from the terms object, so both sides
@@ -90,6 +92,10 @@ public void populateDataFile(DataFile dataFile, FileBuildContext fileBuildContex
dataFile.setDataTables(new ArrayList<>());
dataFile.setFileMetadatas(new ArrayList<>());
dataFile.setTags(new ArrayList<>());
+
+ // necessary as DvObject says "not nullable"
+ dataFile.setCreateDate(context.getTimestamp());
+ dataFile.setModificationTime(context.getTimestamp());
}
/**
From cedaab353e9af081bcea7063856b77637738f3ae Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Mon, 27 Apr 2026 16:36:06 +0200
Subject: [PATCH 14/23] feat(fixtures): add support for `DatasetType` in
`DatasetRecipe` and its integration across fixture builders and tests using a
new recipe
---
.../util/testing/fixtures/DatasetFixture.java | 2 +
.../fixtures/DatasetFixtureBuilder.java | 11 ++-
.../testing/fixtures/DatasetFixtureTest.java | 2 +
.../testing/fixtures/MinimalPopulator.java | 2 -
.../util/testing/recipes/DatasetRecipe.java | 32 +++++--
.../testing/recipes/DatasetTypeRecipe.java | 93 +++++++++++++++++++
6 files changed, 129 insertions(+), 13 deletions(-)
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetTypeRecipe.java
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixture.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixture.java
index 50bc3dbc60f..f45ad6f7f91 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixture.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixture.java
@@ -5,6 +5,7 @@
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.FileMetadata;
+import edu.harvard.iq.dataverse.dataset.DatasetType;
import edu.harvard.iq.dataverse.datavariable.DataVariable;
import edu.harvard.iq.dataverse.datavariable.VarGroup;
import edu.harvard.iq.dataverse.datavariable.VariableMetadata;
@@ -33,6 +34,7 @@
*/
public record DatasetFixture(
Dataset dataset,
+ DatasetType datasetType,
DatasetVersion currentVersion,
List fileMetadatas,
List dataFiles,
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java
index 46984953431..3f6175c30c5 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureBuilder.java
@@ -5,6 +5,7 @@
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.FileMetadata;
+import edu.harvard.iq.dataverse.dataset.DatasetType;
import edu.harvard.iq.dataverse.datavariable.DataVariable;
import edu.harvard.iq.dataverse.datavariable.VarGroup;
import edu.harvard.iq.dataverse.datavariable.VariableMetadata;
@@ -114,6 +115,7 @@ public DatasetFixture build() {
// Create the top-level dataset and its current version, then wire them.
Dataset dataset = createEmptyDataset(context);
+ DatasetType datasetType = datasetRecipe.datasetTypeRecipe().datasetType();
DatasetVersion currentVersion = createDatasetVersion(context);
wireDatasetAndVersion(dataset, currentVersion);
@@ -123,7 +125,7 @@ public DatasetFixture build() {
// Walk the file recipes and create files (plus tabular structure where applicable).
buildVersionFiles(currentVersion, context, accumulator);
- return accumulator.toFixture(dataset, currentVersion);
+ return accumulator.toFixture(dataset, datasetType, currentVersion);
}
/**
@@ -138,6 +140,10 @@ public DatasetFixture build() {
private Dataset createEmptyDataset(BuildContext context) {
Dataset dataset = new Dataset();
populator.populateDataset(dataset, context);
+ // DatasetType comes from the recipe, not the populator, because it is a shared
+ // reference entity that must pre-exist in the database. The recipe either wraps
+ // // a pre-existing instance or builds one from scalar values for this fixture.
+ dataset.setDatasetType(datasetRecipe.datasetTypeRecipe().datasetType());
dataset.setVersions(new ArrayList<>());
return dataset;
}
@@ -481,9 +487,10 @@ void addVariableMetadata(VariableMetadata metadata) {
variableMetadata.add(metadata);
}
- DatasetFixture toFixture(Dataset dataset, DatasetVersion currentVersion) {
+ DatasetFixture toFixture(Dataset dataset, DatasetType datasetType, DatasetVersion currentVersion) {
return new DatasetFixture(
dataset,
+ datasetType,
currentVersion,
fileMetadatas,
dataFiles,
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java
index fdb5f674de6..254295d8c3b 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/DatasetFixtureTest.java
@@ -6,6 +6,7 @@
import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.util.json.JsonPrinter;
import edu.harvard.iq.dataverse.util.testing.recipes.DatasetRecipe;
+import edu.harvard.iq.dataverse.util.testing.recipes.DatasetTypeRecipe;
import edu.harvard.iq.dataverse.util.testing.recipes.FileRecipe;
import edu.harvard.iq.dataverse.util.testing.recipes.VariableSetRecipe;
import edu.harvard.iq.dataverse.util.testing.recipes.VersionRecipe;
@@ -38,6 +39,7 @@ static void tearDown() {
void smoketest() {
var recipe = DatasetRecipe.of(
+ DatasetTypeRecipe.dataset(),
VersionRecipe.of(
FileRecipe.tabular(10, VariableSetRecipe.uniform(10)),
//FileRecipe.tabular(50, VariableSetRecipe.byPredicate()),
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java
index 9ced493d6ad..7add6a5b4b4 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/fixtures/MinimalPopulator.java
@@ -6,7 +6,6 @@
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.FileMetadata;
import edu.harvard.iq.dataverse.TermsOfUseAndAccess;
-import edu.harvard.iq.dataverse.dataset.DatasetType;
import edu.harvard.iq.dataverse.datavariable.DataVariable;
import edu.harvard.iq.dataverse.datavariable.VarGroup;
import edu.harvard.iq.dataverse.datavariable.VariableMetadata;
@@ -31,7 +30,6 @@ public void populateDataset(Dataset dataset, BuildContext context) {
dataset.setAuthority("10.5072");
dataset.setIdentifier("fixture-dataset-" + context.sequence());
dataset.setStorageIdentifier("fixture-storage-" + context.sequence());
- dataset.setDatasetType(new DatasetType());
// necessary as DvObject says "not nullable"
dataset.setCreateDate(context.getTimestamp());
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetRecipe.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetRecipe.java
index 61d88e5c085..3c01e275ad9 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetRecipe.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetRecipe.java
@@ -1,5 +1,7 @@
package edu.harvard.iq.dataverse.util.testing.recipes;
+import java.util.Objects;
+
/**
* Top-level recipe describing how to construct a {@code Dataset} fixture.
*
@@ -13,31 +15,43 @@
* to evolve later.
*/
public interface DatasetRecipe {
-
+
+ /**
+ * Returns the dataset type recipe providing the type to assign.
+ *
+ * @return dataset type recipe
+ */
+ DatasetTypeRecipe datasetTypeRecipe();
+
/**
* Returns the recipe describing the current version of the dataset.
*
* @return recipe for the current dataset version
*/
VersionRecipe currentVersionRecipe();
-
+
/**
- * Creates a dataset recipe with a single current version recipe.
+ * Creates a dataset recipe with the supplied type and version recipes.
*
- * @param currentVersionRecipe the recipe for the current dataset version
+ * @param datasetTypeRecipe recipe providing the dataset type
+ * @param currentVersionRecipe recipe for the current dataset version
* @return a dataset recipe
*/
- static DatasetRecipe of(VersionRecipe currentVersionRecipe) {
- return new SimpleDatasetRecipe(currentVersionRecipe);
+ static DatasetRecipe of(DatasetTypeRecipe datasetTypeRecipe, VersionRecipe currentVersionRecipe) {
+ Objects.requireNonNull(datasetTypeRecipe, "datasetTypeRecipe must not be null");
+ Objects.requireNonNull(currentVersionRecipe, "currentVersionRecipe must not be null");
+ return new SimpleDatasetRecipe(datasetTypeRecipe, currentVersionRecipe);
}
-
+
/**
* Minimal immutable implementation of {@link DatasetRecipe}.
*
- * @param currentVersionRecipe the recipe for the current dataset version
+ * @param datasetTypeRecipe recipe providing the dataset type
+ * @param currentVersionRecipe recipe for the current dataset version
*/
record SimpleDatasetRecipe(
- VersionRecipe currentVersionRecipe
+ DatasetTypeRecipe datasetTypeRecipe,
+ VersionRecipe currentVersionRecipe
) implements DatasetRecipe {
}
}
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetTypeRecipe.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetTypeRecipe.java
new file mode 100644
index 00000000000..8a78f37b2b1
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/recipes/DatasetTypeRecipe.java
@@ -0,0 +1,93 @@
+package edu.harvard.iq.dataverse.util.testing.recipes;
+
+import edu.harvard.iq.dataverse.dataset.DatasetType;
+
+/**
+ * Recipe providing the {@link DatasetType} to assign to a generated dataset fixture.
+ *
+ * Unlike structural recipes such as {@link VersionRecipe} or {@link FileRecipe},
+ * this is not a construction recipe. It is a reference/creation provider — the
+ * dataset type it produces is expected to be persisted before the dataset fixture
+ * is committed to the database.
+ *
+ * Two factory styles are available:
+ *
+ * - {@link #of(String, String, String)} — fluent factory that creates a new
+ * {@link DatasetType} from scalar values. Use this when generating a single
+ * dataset fixture and you want a self-contained recipe.
+ * - {@link #of(DatasetType)} — wraps a pre-existing instance. Use this when
+ * you want to share the same type across multiple dataset recipes, or when
+ * the type has already been persisted elsewhere.
+ *
+ */
+public interface DatasetTypeRecipe {
+
+ /**
+ * Returns the dataset type to assign to the generated dataset.
+ *
+ * The returned instance may be newly created or pre-existing, depending on
+ * the implementation. Either way, it must be persisted before the dataset
+ * fixture is committed to the database.
+ *
+ * @return dataset type instance
+ */
+ DatasetType datasetType();
+
+ /**
+ * Creates a recipe that builds a new {@link DatasetType} from the supplied
+ * scalar values.
+ *
+ * This is the preferred factory for single-dataset fixture scenarios where
+ * the type does not need to be reused or pre-built externally. The resulting
+ * type will need to be persisted before the dataset is committed.
+ *
+ * @param name machine-readable name used in APIs and stored in the database
+ * @param displayName human-readable name shown in the UI
+ * @param description optional description of the dataset type
+ * @return a dataset type recipe producing a new type from the supplied values
+ */
+ static DatasetTypeRecipe of(String name, String displayName, String description) {
+ DatasetType datasetType = new DatasetType();
+ datasetType.setName(name);
+ datasetType.setDisplayName(displayName);
+ datasetType.setDescription(description);
+ return new FixedDatasetTypeRecipe(datasetType);
+ }
+
+ /**
+ * Creates a recipe that wraps a pre-existing {@link DatasetType} instance.
+ *
+ * Use this when the type has already been persisted, or when you want to
+ * share the same type instance across multiple dataset recipes.
+ *
+ * @param datasetType pre-existing dataset type to use
+ * @return a dataset type recipe wrapping the supplied instance
+ */
+ static DatasetTypeRecipe of(DatasetType datasetType) {
+ return new FixedDatasetTypeRecipe(datasetType);
+ }
+
+ /**
+ * Creates a recipe using the standard {@value DatasetType#DATASET_TYPE_DATASET}
+ * dataset type with sensible display defaults.
+ *
+ * This is a convenience shortcut for the most common fixture scenario,
+ * where you just need a valid persisted type and do not care about specific
+ * type semantics.
+ *
+ * @return a dataset type recipe for the default dataset type
+ */
+ static DatasetTypeRecipe dataset() {
+ return of(DatasetType.DATASET_TYPE_DATASET, "Dataset", "Standard dataset type for fixtures");
+ }
+
+ /**
+ * Minimal immutable recipe holding a fixed dataset type instance.
+ *
+ * @param datasetType dataset type to return
+ */
+ record FixedDatasetTypeRecipe(
+ DatasetType datasetType
+ ) implements DatasetTypeRecipe {
+ }
+}
\ No newline at end of file
From 4215b926b2322858f65840f697d6e8326a3cf169 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Tue, 28 Apr 2026 22:11:34 +0200
Subject: [PATCH 15/23] test(performance): add fixtures to large dataset export
testing
---
.../HugeDatasetExportPerformanceIT.java | 54 +++++++++++++++++--
1 file changed, 51 insertions(+), 3 deletions(-)
diff --git a/src/test/java/edu/harvard/iq/dataverse/export/HugeDatasetExportPerformanceIT.java b/src/test/java/edu/harvard/iq/dataverse/export/HugeDatasetExportPerformanceIT.java
index 85f09f24197..44cc6371f36 100644
--- a/src/test/java/edu/harvard/iq/dataverse/export/HugeDatasetExportPerformanceIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/export/HugeDatasetExportPerformanceIT.java
@@ -1,8 +1,17 @@
package edu.harvard.iq.dataverse.export;
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.db.performance.JpaTestBootstrap;
import edu.harvard.iq.dataverse.util.testing.Tags;
+import edu.harvard.iq.dataverse.util.testing.fixtures.DatasetFixtureBuilder;
+import edu.harvard.iq.dataverse.util.testing.recipes.DatasetRecipe;
+import edu.harvard.iq.dataverse.util.testing.recipes.DatasetTypeRecipe;
+import edu.harvard.iq.dataverse.util.testing.recipes.FileRecipe;
+import edu.harvard.iq.dataverse.util.testing.recipes.VersionRecipe;
+import net.ttddyy.dsproxy.QueryCount;
+import net.ttddyy.dsproxy.QueryCountHolder;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Tag;
@@ -10,6 +19,9 @@
import org.testcontainers.junit.jupiter.Testcontainers;
import org.testcontainers.postgresql.PostgreSQLContainer;
+import java.time.Instant;
+import java.time.temporal.ChronoUnit;
+
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assumptions.assumeTrue;
@@ -21,15 +33,37 @@ class HugeDatasetExportPerformanceIT {
static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:16");
static JpaTestBootstrap jpa;
+ static Dataset regularFilesDataset;
+
@BeforeAll
static void setUp() {
postgres.start();
jpa = new JpaTestBootstrap(postgres);
jpa.start();
- // TODO: run schema migration / load fixture here
+ DatasetRecipe regularFiles = DatasetRecipe.of(
+ DatasetTypeRecipe.dataset(),
+ VersionRecipe.of(
+ FileRecipe.regular(1000)
+ )
+ );
+ // Build the fixture
+ var regularFixture = DatasetFixtureBuilder.builder().recipe(regularFiles).build();
+ // Some entities need to be present in the database to appropriatly let the ORM create the mappings
+ jpa.inTransactionVoid(em -> em.persist(regularFixture.datasetType()));
+
+ // Persist the actual dataset
+ regularFilesDataset = regularFixture.dataset();
+ jpa.inTransactionVoid(em -> {
+ // DataFile has no cascade path from Dataset, so each file must be persisted explicitly before
+ // the dataset graph is flushed.
+ for (DataFile dataFile : regularFixture.dataFiles()) {
+ em.persist(dataFile);
+ }
+ em.persist(regularFilesDataset);
+ });
}
@AfterAll
@@ -42,16 +76,30 @@ static void tearDown() {
@Test
void shouldExportLargeDataset() {
- Long datasetVersionId = 123L;
+ Long datasetVersionId = regularFilesDataset.getId();
+
+ QueryCountHolder.clear();
+ Instant start = Instant.now();
String json = jpa.inTransaction(em -> {
var datasetVersion = em.find(DatasetVersion.class, datasetVersionId);
assumeTrue(datasetVersion != null, "No dataset version available in DB. Check fixtures!");
InternalExportDataProvider provider = new InternalExportDataProvider(datasetVersion);
- return provider.getDatasetFileDetails().toString();
+ var details = provider.getDatasetFileDetails();
+ return details.toString();
});
assertNotNull(json);
+
+ Instant end = Instant.now();
+
+ QueryCount count = QueryCountHolder.getGrandTotal();
+ System.out.println("Elapsed ms: " + start.until(end, ChronoUnit.MILLIS));
+ System.out.println("Total queries: " + count.getTotal());
+ System.out.println("Select queries: " + count.getSelect());
+ System.out.println("Insert queries: " + count.getInsert());
+ System.out.println("Update queries: " + count.getUpdate());
+ System.out.println("Delete queries: " + count.getDelete());
}
}
From bfccabe06570a72a856ff066e293bb00f8bd9ae1 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Thu, 30 Apr 2026 22:00:25 +0200
Subject: [PATCH 16/23] docs(fixtures): add detailed documentation for test
fixture generator and recipes
Provides comprehensive guides and examples for using the dataset fixture generator, recipes, and populators. Includes usage scenarios, architecture overview, persistence guidance, and extension recommendations to support test development.
---
.../source/developers/testing/fixtures.md | 383 ++++++++++++++++++
1 file changed, 383 insertions(+)
create mode 100644 doc/sphinx-guides/source/developers/testing/fixtures.md
diff --git a/doc/sphinx-guides/source/developers/testing/fixtures.md b/doc/sphinx-guides/source/developers/testing/fixtures.md
new file mode 100644
index 00000000000..06f8edfc25a
--- /dev/null
+++ b/doc/sphinx-guides/source/developers/testing/fixtures.md
@@ -0,0 +1,383 @@
+# Fixtures For Tests
+
+Most Dataverse test fixtures are based on JSON files stored in the test resources of the codebase.
+
+In addition, (as of Dataverse 6.11) you can use a generator utility to create dataset-centered fixtures programmatically.
+This is most useful for local integration and performance tests but may be of use for unit tests as well.
+
+```{contents} Contents:
+:local:
+:depth: 3
+```
+
+(fixture-generator)=
+## Dataset Fixture Generator
+
+The dataset fixture generator is a test utility for creating connected dataset entity graphs with configurable size and shape.
+It is located in the core testing utilities at `edu.harvard.iq.dataverse.util.testing.fixtures` and `edu.harvard.iq.dataverse.util.testing.recipes`.
+
+The fixture generator is useful when tests need one or more datasets with many files, tabular files, variables, and optional variable metadata, while still keeping the test setup readable.
+It is primarily intended for integration and performance tests where hand-building entities would be too verbose, brittle, or too uniform to uncover ORM and serialization issues.
+
+The generator creates an in-memory entity graph.
+Persisting that graph to a database is optional and requires the usual JPA persistence rules to be respected (see below).
+
+
+### Architecture
+
+The fixture generator is built around three main concepts: a builder, recipes for it, and field populators.
+This separation keeps entity graph shape, relationship wiring, and scalar field population independent of each other.
+
+#### Fixture Builder
+
+The builder creates the connected *entity graph* by consuming recipes. It is responsible for:
+
+- Creating the entities
+- Wiring relationships
+- Keeping both sides of relationships in sync where needed
+- Returning a `DatasetFixture` with convenient references to generated objects
+
+#### Recipes
+
+Recipes *describe* the *shape* of the fixture's entity graph and should not manually wire entity relationships:
+
+- How many files should exist?
+- Which files are tabular?
+- How many variables should a tabular file contain?
+- Should variable metadata be created?
+
+**Available Recipes:**
+
+Recipes are composable using a fluent API and work together.
+
+```text
+DatasetRecipe
+ -> DatasetTypeRecipe
+ -> VersionRecipe
+ -> FileRecipe
+ -> VariableSetRecipe
+ -> VariableMetadataRecipe
+```
+
+`DatasetRecipe`
+Top-level recipe for creating a dataset fixture. It combines a `DatasetTypeRecipe` and a `VersionRecipe`.
+
+`DatasetTypeRecipe`
+Provides the dataset type assigned to the generated dataset.
+Can create a dataset type from scalar values or wrap an existing instance.
+
+Note: the recipe provides the type object but does not persist it.
+Tests that persist generated fixtures must ensure the dataset type is managed before the dataset is flushed.
+
+`VersionRecipe`
+Describes the current dataset version. At the moment, this mainly means providing one or more file recipes.
+
+`FileRecipe`
+Describes file populations. A file recipe may create regular files or tabular files.
+
+`VariableSetRecipe`
+Describes how many variables to create for tabular files. It supports uniform and skewed variable populations.
+
+`VariableMetadataRecipe`
+Decides whether a `VariableMetadata` row should be created for a generated `(FileMetadata, DataVariable)` pair.
+At most one metadata row is generated for each such pair.
+
+
+#### Fixture Populator
+
+The populator fills scalar and non-relationship *fields*, which are not primarily about graph shape.
+
+It sets values such as:
+
+- Identifiers
+- Timestamps
+- File labels
+- Content types
+- Checksums
+- Variable names
+- Required fields
+- Null-sensitive collections
+
+The default *minimal* populator is conservative.
+It creates enough data for serialization and persistence tests, but it does not try to simulate fully realistic production metadata.
+
+
+
+### Full Example
+
+The following example creates a small but non-uniform dataset fixture. It's suitable
+- for a smoke test of a serializer,
+- for an integration test with assertions on the result,
+- for a performance test with benchmarking speed of different implementations, and other scenarios.
+
+```java
+var recipe = DatasetRecipe.of(
+ DatasetTypeRecipe.dataset(),
+ VersionRecipe.of(
+ FileRecipe.regular(20),
+ FileRecipe.tabular(30,
+ VariableSetRecipe
+ .byPredicate(VariableMetadataRecipe.byPredicate(ctx -> ctx.variableIndex() < 5))
+ .when(ctx -> ctx.fileIndex() % 10 == 0, 1_000)
+ .otherwise(25)
+ ))
+);
+
+DatasetFixture fixture = DatasetFixtureBuilder.builder()
+ .recipe(recipe)
+ .populator(FixturePopulator.minimal())
+ .build();
+
+JsonArrayBuilder files = Json.createArrayBuilder();
+
+for (FileMetadata fileMetadata : fixture.fileMetadatas()) {
+ files.add(JsonPrinter.json(fileMetadata.getDataFile(), fileMetadata, true));
+}
+
+var json = files.build();
+```
+
+This creates:
+- 20 regular files
+- 30 tabular files
+ - some tabular files with 1,000 variables
+ - other tabular files with 25 variables
+ - variable metadata only for the first few variables in each tabular file
+
+This helps exercise code paths that traverse files, file metadata, data tables, data variables, variable metadata.
+All of this happends without the need to pre-produce an enormous fixture as a JSON file.
+Its deterministic nature allows running the test anywhere without depending on seeded randomness, offering reliable and reproducible results.
+
+
+
+### Basic Usage
+
+#### Small Dataset
+
+This example creates:
+
+- one dataset
+- one current version
+- 10 tabular files
+- 10 variables per tabular file
+- 1 regular file
+
+```java
+var recipe = DatasetRecipe.of(
+ DatasetTypeRecipe.dataset(),
+ VersionRecipe.of(
+ FileRecipe.tabular(10, VariableSetRecipe.uniform(10)),
+ FileRecipe.regular(1)
+ )
+);
+
+DatasetFixture fixture = DatasetFixtureBuilder.builder()
+ .recipe(recipe)
+ .populator(FixturePopulator.minimal())
+ .build();
+
+Dataset dataset = fixture.dataset();
+DatasetVersion version = fixture.currentVersion();
+```
+
+#### Skewed Variable Populations
+
+Skewed data is useful for performance testing because real datasets are rarely uniform.
+Some files may have only a few variables, while others may be very large.
+
+This example creates 500 tabular files:
+
+- one dataset
+- one current version
+- 500 tabular files
+ - every 100th file receives 100,000 variables
+ - every 10th file receives 10,000 variables
+ - all others receive 250 variables
+
+```java
+var variables = VariableSetRecipe.byPredicate()
+ .when(ctx -> ctx.fileIndex() % 100 == 0, 100_000)
+ .when(ctx -> ctx.fileIndex() % 10 == 0, 10_000)
+ .otherwise(250);
+
+var recipe = DatasetRecipe.of(
+ DatasetTypeRecipe.dataset(),
+ VersionRecipe.of(
+ FileRecipe.tabular(500, variables)
+ )
+);
+
+DatasetFixture fixture = DatasetFixtureBuilder.builder()
+ .recipe(recipe)
+ .build();
+```
+
+#### Adding Variable Metadata
+
+Variable Metadata is optional and controlled by `VariableMetadataRecipe`.
+The metadata recipe is evaluated for each generated `(FileMetadata, DataVariable)` pair.
+This matters because `VariableMetadata` is versioned indirectly through `FileMetadata`.
+
+*No variable metadata (default):*
+
+```java
+VariableSetRecipe.uniform(1_000)
+- or -
+VariableSetRecipe.uniform(1_000, VariableMetadataRecipe.noop())
+```
+
+*Metadata for every variable:*
+
+```java
+VariableSetRecipe.uniform(1_000, VariableMetadataRecipe.always())
+```
+
+*Metadata for selected variables:*
+
+```java
+VariableSetRecipe.uniform(1_000, VariableMetadataRecipe.byPredicate(ctx -> ctx.variableIndex() % 10 == 0))
+```
+
+
+
+### Persistence Usage
+
+The generator creates an in-memory entity graph. Persisting that graph is optional and follows normal JPA rules.
+
+When persisting a generated fixture to a database, remember that not all relationships cascade from `Dataset` to every object.
+In particular, `DataFile` instances usually need to be persisted explicitly before persisting the dataset graph.
+The `DatasetType` must also be managed, either by persisting the generated type or by looking up an existing one in the same persistence context.
+
+A typical persistence sequence is:
+
+```java
+jpa.inTransactionVoid(em -> {
+ em.persist(fixture.datasetType());
+ for (DataFile dataFile : fixture.dataFiles()) {
+ em.persist(dataFile);
+ }
+ em.persist(fixture.dataset());
+});
+```
+
+The exact order may evolve as the fixture generator grows, may depend on the exact usage scenario, and
+is influenced by the evolution of the entity classes themselves, but the important point is:
+**Shared/reference entities and non-cascaded entities must be managed (persisted) before the dataset graph is flushed**.
+
+
+
+### Discussion and Limitations
+
+#### Benefits
+
+1. **Readable scenarios:** tests describe intent at a high level.
+ For example: `FileRecipe.tabular(500, VariableSetRecipe.uniform(1_000))` is easier to understand than manually creating thousands of entities.
+2. **Composable graph shape:** different recipes can be combined to describe mixed datasets.
+3. **Deterministic output:** the build context carries fixture-wide values such as sequence and timestamp, making generated data easier to debug and compare.
+4. **Reduced boilerplate:** relationship wiring and null-sensitive defaults are centralized.
+5. **Better performance testing:** skewed fixtures can expose ORM issues that uniform data may hide, such as N+1 query expansion over large variable collections.
+6. **Serialization safety:** the minimal populator initializes fields and collections that serializers commonly traverse.
+
+#### Tradeoffs
+
+1. **More concepts to learn:** developers need to understand builders, recipes, populators, and resulting fixture objects vs. a static factory.
+2. **Not a full production object factory:** the minimal populator creates safe test data, not necessarily realistic production data.
+3. **Persistence still requires care:** some entities must be persisted explicitly because the production model does not cascade every relationship.
+4. **Hardcoded defaults:** the minimal populator uses deterministic placeholder values, tests that need realistic metadata should provide a custom populator.
+
+#### Limitations
+
+1. **Minimalistic:** The current fixture generator is intentionally minimal.
+2. **Single dataset version only:** the fixture currently models one current dataset version and does not generate multiple versions.
+3. **No version evolution recipes:** there is no support yet for deriving later versions from earlier versions, modeling change over time.
+4. **Limited dataset metadata:** dataset fields and metadata blocks are not generated in detail.
+5. **Simple dataset type handling:** a `DatasetType` can be generated or supplied, but persistence of shared types is still the responsibility of the test.
+6. **No persistence manager:** the fixture system builds graphs, but it does not yet provide a dedicated persister that knows the correct persistence order.
+7. **One table per tabular file:** tabular files currently get one `DataTable`. The domain model can allow more, but the fixture generator does not expose that yet.
+8. **One variable group per tabular file:** each non-empty tabular file currently gets one `VarGroup` containing all variables, there is no `VarGroupRecipe` yet.
+9. **Limited variable metadata content:** variable metadata can be present or absent, but the minimal populator only fills basic scalar values.
+10. **No category or statistics recipes:** the fixture generator does not yet provide recipes for variable categories, summary statistics, invalid ranges, or category metadata.
+
+#### Unsupported Usage Scenarios
+
+The following scenarios are not yet directly expressible:
+
+- multiple dataset versions sharing the same `DataFile` objects
+- metadata-only changes between versions
+- version-specific `VariableMetadata` changes across versions
+- files added or removed between versions
+- multiple `DataTable` objects per file
+- different variable group distributions per file
+- weighted random or seeded random file populations
+- Zipf-like or heavy-tail distributions as first-class recipes
+- realistic dataset field metadata
+- fixture graphs that mimic a fully published dataset lifecycle
+
+
+
+### Extending The Fixture Generator
+
+When extending the fixture generator, first decide which responsibility your change belongs to.
+
+#### Add Recipes For Graph Shaping
+
+Use a new recipe when the test needs to describe what shape should be created.
+
+Examples:
+
+- number of var groups
+- number of data tables per file
+- whether categories should exist
+- how many variables receive summary statistics
+- how versions evolve over time
+
+Recipe changes usually belong in the `edu.harvard.iq.dataverse.util.testing.recipes` package.
+
+#### Add Populator Behavior For Scalar Values
+
+Use a new or custom populator when entities should be filled differently, but the graph shape is the same.
+Extend the populator interface if new types of scalar data are required.
+
+Examples:
+
+- more realistic file names
+- different content types
+- richer variable labels
+- custom checksums
+- realistic variable metadata text
+
+Populator changes usually belong in the `edu.harvard.iq.dataverse.util.testing.fixtures` package.
+
+#### Change Builder For Wiring
+
+Change the builder when new relationships must be created or maintained.
+
+Examples:
+
+- adding support for `VariableCategory`
+- wiring category metadata
+- creating multiple data tables per file
+- linking version-evolved file metadata back to shared data files
+
+Builder changes should be kept small and split into helper methods where possible.
+
+#### Recommended Extension Path
+
+A practical roadmap for further evolution is:
+
+1. Add a `VarGroupRecipe` to control group count and membership.
+2. Add category and summary statistic recipes for variable-level enrichment.
+3. Add a fixture persister that knows the correct persistence order.
+4. Add version evolution recipes for multi-version datasets.
+5. Add richer dataset metadata generation.
+6. Add (seeded!) random distribution recipes if a deterministic skew is not enough.
+7. Add fuzzy testing by generating fixtures with targeted chaos.
+
+#### Guidelines For Contributions
+
+1. Keep recipes declarative: recipes should describe shape, not manually wire entity relationships.
+2. Keep populators focused: populators should fill fields, not decide how many entities exist.
+3. Keep builders responsible for wiring: relationship consistency belongs in the builder.
+4. Prefer deterministic generation: deterministic data makes performance tests easier to reproduce and debug.
+5. Avoid hiding persistence requirements: if an entity must be persisted before another, document it clearly or add a dedicated persister.
+6. Start minimal: add the smallest recipe or populator extension needed for the scenario. Avoid making the DSL generic before there is a concrete test need.
From b7b335eee06d08d89ac35cef3cb134b7d347744f Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Sat, 2 May 2026 02:35:27 +0200
Subject: [PATCH 17/23] test(performance): remove `JpaTestBootstrap` utility
from performance tests
This utility is replaced by a more sophisticated JUnit extension
---
.../db/performance/JpaTestBootstrap.java | 131 ------------------
1 file changed, 131 deletions(-)
delete mode 100644 src/test/java/edu/harvard/iq/dataverse/db/performance/JpaTestBootstrap.java
diff --git a/src/test/java/edu/harvard/iq/dataverse/db/performance/JpaTestBootstrap.java b/src/test/java/edu/harvard/iq/dataverse/db/performance/JpaTestBootstrap.java
deleted file mode 100644
index 9c380703444..00000000000
--- a/src/test/java/edu/harvard/iq/dataverse/db/performance/JpaTestBootstrap.java
+++ /dev/null
@@ -1,131 +0,0 @@
-package edu.harvard.iq.dataverse.db.performance;
-
-import jakarta.persistence.EntityManager;
-import jakarta.persistence.EntityManagerFactory;
-import jakarta.persistence.EntityTransaction;
-import jakarta.persistence.Persistence;
-import net.ttddyy.dsproxy.support.ProxyDataSourceBuilder;
-import org.postgresql.ds.PGSimpleDataSource;
-import org.testcontainers.postgresql.PostgreSQLContainer;
-
-import javax.sql.DataSource;
-import java.sql.Connection;
-import java.sql.SQLException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.function.Consumer;
-import java.util.function.Function;
-
-public class JpaTestBootstrap implements AutoCloseable {
-
- public static final String PERSISTENCE_UNIT = "VDCNet-ejbPU-test";
-
- private final PostgreSQLContainer postgres;
-
- private DataSource dataSource;
- private EntityManagerFactory emf;
-
- public JpaTestBootstrap(PostgreSQLContainer postgres) {
- this.postgres = postgres;
- }
-
- public void start() {
- if (emf != null) {
- throw new IllegalStateException("JpaTestBootstrap has already been started.");
- }
-
- DataSource baseDataSource = createDataSource();
- dataSource = ProxyDataSourceBuilder.create()
- .dataSource(baseDataSource)
- .countQuery()
- .buildProxy();
-
- validateDataSource(dataSource);
-
- Map properties = new HashMap<>();
- properties.put("jakarta.persistence.nonJtaDataSource", dataSource);
- properties.put("jakarta.persistence.schema-generation.database.action", "create");
-
- emf = Persistence.createEntityManagerFactory(PERSISTENCE_UNIT, properties);
-
- validateEntityManagerFactory();
- }
-
- public DataSource getDataSource() {
- ensureStarted();
- return dataSource;
- }
-
- public EntityManager createEntityManager() {
- ensureStarted();
- return emf.createEntityManager();
- }
-
- public EntityManagerFactory getEntityManagerFactory() {
- ensureStarted();
- return emf;
- }
-
- public T inTransaction(Function work) {
- EntityManager em = createEntityManager();
- EntityTransaction tx = em.getTransaction();
- try {
- tx.begin();
- T result = work.apply(em);
- tx.commit();
- return result;
- } catch (RuntimeException e) {
- if (tx.isActive()) {
- tx.rollback();
- }
- throw e;
- } finally {
- em.close();
- }
- }
-
- public void inTransactionVoid(Consumer work) {
- inTransaction(em -> {
- work.accept(em);
- return null;
- });
- }
-
- private DataSource createDataSource() {
- PGSimpleDataSource pgDataSource = new PGSimpleDataSource();
- pgDataSource.setURL(postgres.getJdbcUrl());
- pgDataSource.setUser(postgres.getUsername());
- pgDataSource.setPassword(postgres.getPassword());
- return pgDataSource;
- }
-
- private void validateDataSource(DataSource dataSource) {
- try (Connection connection = dataSource.getConnection()) {
- if (!connection.isValid(5)) {
- throw new IllegalStateException("DataSource connection is not valid.");
- }
- } catch (SQLException e) {
- throw new IllegalStateException("Failed to validate DataSource.", e);
- }
- }
-
- private void validateEntityManagerFactory() {
- EntityManager entityManager = emf.createEntityManager();
- entityManager.close();
- }
-
- private void ensureStarted() {
- if (emf == null) {
- throw new IllegalStateException("JpaTestBootstrap has not been started yet.");
- }
- }
-
- @Override
- public void close() {
- if (emf != null && emf.isOpen()) {
- emf.close();
- }
- emf = null;
- dataSource = null;
- }
-}
\ No newline at end of file
From 595e8f22a20286c3f1b1de4a0aa442b7d934eef0 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Sat, 2 May 2026 02:36:21 +0200
Subject: [PATCH 18/23] test(performance): introduce JPA performance testing
utilities and JUnit extension
Adds `JpaEntityManagerService` for managing JPA entity lifecycle and transaction operations in tests, the `JpaPerformanceTest` annotation to streamline performance test setup with Testcontainers, and the `JpaPerformanceTestExtension` to handle shared PostgreSQL container and database isolation. Updates `pom.xml` with required dependencies for these utilities.
---
pom.xml | 6 +
.../performance/JpaEntityManagerService.java | 135 ++++++++++++++++++
.../performance/JpaPerformanceTest.java | 34 +++++
.../JpaPerformanceTestExtension.java | 135 ++++++++++++++++++
4 files changed, 310 insertions(+)
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaEntityManagerService.java
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaPerformanceTest.java
create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaPerformanceTestExtension.java
diff --git a/pom.xml b/pom.xml
index a2742fce95e..0f3c78ebf5c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -770,6 +770,12 @@
1.11.0
test
+
+ org.apache.commons
+ commons-dbcp2
+ 2.14.0
+ test
+
org.testcontainers
testcontainers
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaEntityManagerService.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaEntityManagerService.java
new file mode 100644
index 00000000000..a2b9927638e
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaEntityManagerService.java
@@ -0,0 +1,135 @@
+package edu.harvard.iq.dataverse.util.testing.performance;
+
+import jakarta.persistence.EntityManager;
+import jakarta.persistence.EntityManagerFactory;
+import jakarta.persistence.EntityTransaction;
+import jakarta.persistence.Persistence;
+import net.ttddyy.dsproxy.support.ProxyDataSourceBuilder;
+
+import javax.sql.DataSource;
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.function.Consumer;
+import java.util.function.Function;
+
+/**
+ * Service class managing the lifecycle and operations of an {@link EntityManagerFactory}
+ * for JPA-based persistence. This class is responsible for configuring the persistence
+ * unit, initializing the factory, and providing utility methods to interact with JPA
+ * entities within transactions.
+ *
+ * Implementation contracts:
+ * - The service must be explicitly started with the {@code start()} method before usage.
+ * - Resources are properly released when the service is closed via the {@code close()} method.
+ * - Transactions are managed and isolated when executing database operations.
+ *
+ * Use cases:
+ * - Configure and initialize an {@link EntityManagerFactory} with a non-JTA datasource.
+ * - Manage entity operations within transactions, supporting both functional and void work units.
+ * - Validate the underlying datasource and factory to ensure system integrity.
+ */
+public class JpaEntityManagerService implements AutoCloseable {
+
+ public static final String PERSISTENCE_UNIT = "VDCNet-ejbPU-test";
+
+ private final DataSource baseDataSource;
+ private DataSource proxiedDataSource;
+ private EntityManagerFactory emf;
+
+ public JpaEntityManagerService(DataSource dataSource) {
+ this.baseDataSource = dataSource;
+ }
+
+ public void start() {
+ if (emf != null) {
+ throw new IllegalStateException("JpaEntityManagerService has already been started.");
+ }
+
+ proxiedDataSource = ProxyDataSourceBuilder.create()
+ .dataSource(baseDataSource)
+ .countQuery()
+ .buildProxy();
+
+ validateDataSource(proxiedDataSource);
+
+ Map properties = new HashMap<>();
+ properties.put("jakarta.persistence.nonJtaDataSource", proxiedDataSource);
+ properties.put("jakarta.persistence.schema-generation.database.action", "create");
+
+ emf = Persistence.createEntityManagerFactory(PERSISTENCE_UNIT, properties);
+
+ validateEntityManagerFactory();
+ }
+
+ public DataSource getDataSource() {
+ ensureStarted();
+ return proxiedDataSource;
+ }
+
+ public EntityManager createEntityManager() {
+ ensureStarted();
+ return emf.createEntityManager();
+ }
+
+ public EntityManagerFactory getEntityManagerFactory() {
+ ensureStarted();
+ return emf;
+ }
+
+ public T inTransaction(Function work) {
+ EntityManager em = createEntityManager();
+ EntityTransaction tx = em.getTransaction();
+ try {
+ tx.begin();
+ T result = work.apply(em);
+ tx.commit();
+ return result;
+ } catch (RuntimeException e) {
+ if (tx.isActive()) {
+ tx.rollback();
+ }
+ throw e;
+ } finally {
+ em.close();
+ }
+ }
+
+ public void inTransactionVoid(Consumer work) {
+ inTransaction(em -> {
+ work.accept(em);
+ return null;
+ });
+ }
+
+ private void validateDataSource(DataSource dataSource) {
+ try (Connection connection = dataSource.getConnection()) {
+ if (!connection.isValid(5)) {
+ throw new IllegalStateException("DataSource connection is not valid");
+ }
+ } catch (SQLException e) {
+ throw new IllegalStateException("Failed to validate DataSource", e);
+ }
+ }
+
+ private void validateEntityManagerFactory() {
+ EntityManager entityManager = emf.createEntityManager();
+ entityManager.close();
+ }
+
+ private void ensureStarted() {
+ if (emf == null) {
+ throw new IllegalStateException("JpaEntityManagerService has not been started yet - did you run .start()?");
+ }
+ }
+
+ @Override
+ public void close() {
+ if (emf != null && emf.isOpen()) {
+ emf.close();
+ }
+ emf = null;
+ proxiedDataSource = null;
+ }
+}
\ No newline at end of file
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaPerformanceTest.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaPerformanceTest.java
new file mode 100644
index 00000000000..6f11183307a
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaPerformanceTest.java
@@ -0,0 +1,34 @@
+package edu.harvard.iq.dataverse.util.testing.performance;
+
+import edu.harvard.iq.dataverse.util.testing.Tags;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.junit.jupiter.api.parallel.Execution;
+import org.junit.jupiter.api.parallel.ExecutionMode;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Marker annotation for JPA performance tests using Testcontainers.
+ *
+ * Applies automatic tags, enforces Testcontainers availability (skips if Docker is missing),
+ * and registers a custom extension to manage a shared PostgreSQL container and database isolation.
+ *
+ * Contract: Test classes using this annotation MUST declare a {@code static JpaEntityManagerService} field.
+ * Note: Due to the underlying extension's shared container management, test classes annotated with this
+ * will execute sequentially to prevent container state races.
+ */
+@Target(ElementType.TYPE)
+@Retention(RetentionPolicy.RUNTIME)
+@Tag(Tags.PERFORMANCE_TEST)
+@Tag(Tags.USES_TESTCONTAINERS)
+@Testcontainers(disabledWithoutDocker = true)
+@ExtendWith(JpaPerformanceTestExtension.class)
+// Make sure the test methods are never run in parallel - this would be bad for a performance test...
+@Execution(ExecutionMode.SAME_THREAD)
+public @interface JpaPerformanceTest {
+}
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaPerformanceTestExtension.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaPerformanceTestExtension.java
new file mode 100644
index 00000000000..997da6aab84
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/performance/JpaPerformanceTestExtension.java
@@ -0,0 +1,135 @@
+package edu.harvard.iq.dataverse.util.testing.performance;
+
+import org.apache.commons.dbcp2.BasicDataSource;
+import org.junit.jupiter.api.extension.AfterAllCallback;
+import org.junit.jupiter.api.extension.BeforeAllCallback;
+import org.junit.jupiter.api.extension.ExtensionContext;
+import org.testcontainers.postgresql.PostgreSQLContainer;
+
+import java.lang.reflect.Field;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.UUID;
+
+import static java.lang.reflect.Modifier.isStatic;
+
+/**
+ * JUnit 5 Extension that manages a shared PostgreSQL container for performance tests.
+ * It ensures a unique database is created for each test class to guarantee isolation.
+ */
+public class JpaPerformanceTestExtension implements BeforeAllCallback, AfterAllCallback {
+
+ // Global shared container
+ private static PostgreSQLContainer sharedContainer;
+
+ // This lock makes sure all tests using this extension are executed sequentially.
+ // For performance tests, executing test classes in parallel for the same, shared DB instance makes no sense.
+ // There is no JUnit way to express such a "global lock", thus we need to do this manually.
+ // Note: avoiding parallelism of test methods are done by the @JpaPerformanceTest annotation.
+ private static final Object CONTAINER_LOCK = new Object();
+
+ // Store the service instance to close it in AfterAll
+ private static final String SERVICE_FIELD_KEY = "jpa.service.instance";
+
+ @Override
+ public void beforeAll(ExtensionContext context) throws Exception {
+ // 1. Ensure the global container is running
+ ensureSharedContainerRunning();
+
+ // 2. Create a unique database for this test class
+ String uniqueDbName = "perf_test_" + UUID.randomUUID().toString().substring(0, 8);
+ createDatabase(uniqueDbName);
+
+ // 3. Retrieve the JPA Service and inject into the test class field
+ JpaEntityManagerService service = getService(uniqueDbName);
+ injectService(context, service);
+
+ // 4. Store reference for cleanup
+ context.getStore(ExtensionContext.Namespace.GLOBAL).put(SERVICE_FIELD_KEY, service);
+ }
+
+ @Override
+ public void afterAll(ExtensionContext context) {
+ // Close the EntityManagerFactory and connections
+ JpaEntityManagerService service = (JpaEntityManagerService) context.getStore(ExtensionContext.Namespace.GLOBAL).get(SERVICE_FIELD_KEY);
+ if (service != null) {
+ try {
+ service.close();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ // Note: We do NOT stop the sharedContainer here.
+ // It stays running for the next test class.
+ }
+
+ // --- Helper Methods ---
+
+ private void ensureSharedContainerRunning() {
+ synchronized (CONTAINER_LOCK) {
+ if (sharedContainer == null || !sharedContainer.isRunning()) {
+ String pgVersion = System.getProperty("postgresql.server.version", "16");
+ sharedContainer = new PostgreSQLContainer("postgres:" + pgVersion);
+ sharedContainer.start();
+ }
+ }
+ }
+
+ private void createDatabase(String dbName) {
+ try (Connection conn = DriverManager.getConnection(
+ sharedContainer.getJdbcUrl(),
+ sharedContainer.getUsername(),
+ sharedContainer.getPassword())) {
+
+ // Postgres requires auto-commit to be true for CREATE DATABASE
+ conn.setAutoCommit(true);
+ Statement stmt = conn.createStatement();
+ stmt.execute("CREATE DATABASE " + dbName);
+ } catch (SQLException e) {
+ // Ignore if DB already exists (unlikely with UUID, but safe)
+ if (!e.getMessage().contains("already exists")) {
+ throw new RuntimeException("Failed to create test database: " + dbName, e);
+ }
+ }
+ }
+
+ private void injectService(ExtensionContext context, JpaEntityManagerService service) throws Exception {
+ Class> testClass = context.getRequiredTestClass();
+ boolean hasBeenInjected = false;
+
+ // Look for a static field of type JpaService
+ for (Field field : testClass.getDeclaredFields()) {
+ if (field.getType() == JpaEntityManagerService.class) {
+ if (!isStatic(field.getModifiers())) {
+ throw new RuntimeException("Cannot inject into field '" + field.getName() + "' of class '" + testClass.getName() + "': not a static field");
+ }
+ if (hasBeenInjected) {
+ throw new RuntimeException("Cannot inject into field '" + field.getName() + "' of class '" + testClass.getName() + "': only one target field allowed");
+ }
+ field.setAccessible(true);
+ field.set(null, service);
+ hasBeenInjected = true;
+ }
+ }
+
+ if (!hasBeenInjected) {
+ throw new RuntimeException("Could not inject into a static field of class '" + testClass.getName() + "': no field found");
+ }
+ }
+
+ private static JpaEntityManagerService getService(String uniqueDbName) {
+ // Tune the URL as we need to apply our unique DB name (the container has a default one we override)
+ String tunedJdbcUrl = sharedContainer.getJdbcUrl()
+ .replaceFirst("/" + sharedContainer.getDatabaseName(), "/" + uniqueDbName);
+
+ // Configure a pooled (!) DataSource for this unique database
+ BasicDataSource dataSource = new BasicDataSource();
+ dataSource.setUrl(tunedJdbcUrl);
+ dataSource.setUsername(sharedContainer.getUsername());
+ dataSource.setPassword(sharedContainer.getPassword());
+
+ return new JpaEntityManagerService(dataSource);
+ }
+}
From 4ead80646e3170a065c4e023372575c0e37e552e Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Sat, 2 May 2026 02:37:13 +0200
Subject: [PATCH 19/23] test(performance): migrate large dataset export test to
`JpaPerformanceTest` utilities
Replaces `JpaTestBootstrap` and manual container management with the `JpaPerformanceTest` annotation and `JpaEntityManagerService`. Removes redundant `@AfterAll` cleanup logic.
---
.../HugeDatasetExportPerformanceIT.java | 26 ++++---------------
1 file changed, 5 insertions(+), 21 deletions(-)
diff --git a/src/test/java/edu/harvard/iq/dataverse/export/HugeDatasetExportPerformanceIT.java b/src/test/java/edu/harvard/iq/dataverse/export/HugeDatasetExportPerformanceIT.java
index 44cc6371f36..1cf4c17495d 100644
--- a/src/test/java/edu/harvard/iq/dataverse/export/HugeDatasetExportPerformanceIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/export/HugeDatasetExportPerformanceIT.java
@@ -3,21 +3,17 @@
import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetVersion;
-import edu.harvard.iq.dataverse.db.performance.JpaTestBootstrap;
-import edu.harvard.iq.dataverse.util.testing.Tags;
import edu.harvard.iq.dataverse.util.testing.fixtures.DatasetFixtureBuilder;
+import edu.harvard.iq.dataverse.util.testing.performance.JpaEntityManagerService;
+import edu.harvard.iq.dataverse.util.testing.performance.JpaPerformanceTest;
import edu.harvard.iq.dataverse.util.testing.recipes.DatasetRecipe;
import edu.harvard.iq.dataverse.util.testing.recipes.DatasetTypeRecipe;
import edu.harvard.iq.dataverse.util.testing.recipes.FileRecipe;
import edu.harvard.iq.dataverse.util.testing.recipes.VersionRecipe;
import net.ttddyy.dsproxy.QueryCount;
import net.ttddyy.dsproxy.QueryCountHolder;
-import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
-import org.testcontainers.junit.jupiter.Testcontainers;
-import org.testcontainers.postgresql.PostgreSQLContainer;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
@@ -25,20 +21,16 @@
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assumptions.assumeTrue;
-@Tag(Tags.USES_TESTCONTAINERS)
-@Tag(Tags.PERFORMANCE_TEST)
-@Testcontainers(disabledWithoutDocker = true)
+@JpaPerformanceTest
class HugeDatasetExportPerformanceIT {
- static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:16");
- static JpaTestBootstrap jpa;
+ static JpaEntityManagerService jpa;
static Dataset regularFilesDataset;
@BeforeAll
static void setUp() {
- postgres.start();
- jpa = new JpaTestBootstrap(postgres);
+ // The manual start is required here in case you need to configure any service features before starting...
jpa.start();
DatasetRecipe regularFiles = DatasetRecipe.of(
@@ -66,14 +58,6 @@ static void setUp() {
});
}
- @AfterAll
- static void tearDown() {
- if (jpa != null) {
- jpa.close();
- }
- postgres.stop();
- }
-
@Test
void shouldExportLargeDataset() {
Long datasetVersionId = regularFilesDataset.getId();
From 5e18459b4888c740bf50eb7031dad68e96949693 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Sat, 2 May 2026 02:38:26 +0200
Subject: [PATCH 20/23] test(performance): add `persistence.xml` for test
configuration
Includes resource-local transaction management, entity scanning setup, and matching production-like JPA properties, but tailored for test scenarios.
---
src/test/resources/META-INF/persistence.xml | 34 +++++++++++++++++++++
1 file changed, 34 insertions(+)
create mode 100644 src/test/resources/META-INF/persistence.xml
diff --git a/src/test/resources/META-INF/persistence.xml b/src/test/resources/META-INF/persistence.xml
new file mode 100644
index 00000000000..e317a3c9c07
--- /dev/null
+++ b/src/test/resources/META-INF/persistence.xml
@@ -0,0 +1,34 @@
+
+
+
+
+ org.eclipse.persistence.jpa.PersistenceProvider
+
+
+ ../classes
+
+ false
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
From 8095d2f044cd7ef652ad1f3b93859b0288f57c82 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Sat, 2 May 2026 02:38:50 +0200
Subject: [PATCH 21/23] docs(performance): add comprehensive guide for
performance testing setup and best practices
Includes instructions for running performance tests, database-bound testing with Testcontainers, and using `JpaEntityManagerService`. Provides example test class, configuration details, and advanced usage for query profiling.
---
.../testing/SamplePerformanceIT.java | 64 +++++++++++++
.../source/developers/testing/performance.md | 89 +++++++++++++++++++
2 files changed, 153 insertions(+)
create mode 100644 doc/sphinx-guides/source/_static/developers/testing/SamplePerformanceIT.java
create mode 100644 doc/sphinx-guides/source/developers/testing/performance.md
diff --git a/doc/sphinx-guides/source/_static/developers/testing/SamplePerformanceIT.java b/doc/sphinx-guides/source/_static/developers/testing/SamplePerformanceIT.java
new file mode 100644
index 00000000000..847093433ec
--- /dev/null
+++ b/doc/sphinx-guides/source/_static/developers/testing/SamplePerformanceIT.java
@@ -0,0 +1,64 @@
+package edu.harvard.iq.dataverse.somepackage;
+
+import edu.harvard.iq.dataverse.util.testing.performance.JpaEntityManagerService;
+import edu.harvard.iq.dataverse.util.testing.performance.JpaPerformanceTest;
+import net.ttddyy.dsproxy.QueryCount;
+import net.ttddyy.dsproxy.QueryCountHolder;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+
+import java.time.Instant;
+import java.time.temporal.ChronoUnit;
+import jakarta.persistence.EntityManager;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+// Single annotation for automatic setup of
+// 1) basic tags for JUnit groups,
+// 2) shared PostgreSQL server via Testcontainers, and
+// 3) creation and injection of JPA entity manager service.
+@JpaPerformanceTest
+class SamplePerformanceIT {
+
+ static JpaEntityManagerService jpa;
+
+ @BeforeAll
+ static void setUp() {
+ // A manual start is necessary to allow you to selectively enable service features as necessary
+ jpa.start();
+
+ // inTransactionVoid: Use this when you only need to execute database operations
+ // (e.g., persisting test fixtures) without returning a value.
+ jpa.inTransactionVoid(em -> {
+ // EntityManager em is provided here.
+ // em.persist(myEntity);
+ });
+ }
+
+ @Test
+ void shouldMeasureOperationPerformance() {
+ // Clear any previous query statistics
+ QueryCountHolder.clear();
+ Instant start = Instant.now();
+
+ // inTransaction: Use this when your operation returns a result that needs
+ // to be asserted or measured.
+ Object result = jpa.inTransaction(em -> {
+ // Execute your performance-critical operation using the EntityManager.
+ // return result;
+ return null; // Placeholder
+ });
+
+ Instant end = Instant.now();
+ assertNotNull(result);
+
+ // Retrieve and log ORM statistics
+ QueryCount count = QueryCountHolder.getGrandTotal();
+ System.out.println("Elapsed ms: " + start.until(end, ChronoUnit.MILLIS));
+ System.out.println("Total queries: " + count.getTotal());
+ System.out.println("Select queries: " + count.getSelect());
+ System.out.println("Insert queries: " + count.getInsert());
+ System.out.println("Update queries: " + count.getUpdate());
+ System.out.println("Delete queries: " + count.getDelete());
+ }
+}
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/developers/testing/performance.md b/doc/sphinx-guides/source/developers/testing/performance.md
new file mode 100644
index 00000000000..e981cee5d80
--- /dev/null
+++ b/doc/sphinx-guides/source/developers/testing/performance.md
@@ -0,0 +1,89 @@
+# Performance Testing
+
+## Introduction
+Performance tests measure how your application behaves under load, focusing on execution time, resource consumption, and database efficiency.
+Unlike *unit tests*, which verify isolated logic, or *integration* or *API tests*, which validate component interactions and full request lifecycles, performance tests quantify *how fast* operations complete and *how many* database queries they trigger.
+
+## Running Performance Tests
+Performance tests are excluded from the default test run to save CI/CD time and local resources.
+To execute them, use the Maven `verify` lifecycle phase and override the `it.groups` property:
+
+```shell
+mvn verify -Dit.groups=performance
+```
+
+```{note}
+The `it.groups` property accepts a comma-separated list.
+You can combine groups (e.g., `-Dit.groups=integration,performance`) as necessary.
+However, it is highly recommended to run them in isolation due to their computational intensity and sensitivity to system load.
+```
+
+## Testing database-bound code
+Performance tests for code relying on retrieving entities from a database are essential for catching regressions in ORM efficiency.
+They can identify N+1 query problems or ensure that heavy data processing pipelines (e.g., exporting large datasets) remain responsive as the codebase evolves.
+
+### Prerequisites
+Any tests around database-bound code rely on [Testcontainers](https://www.testcontainers.org/) to spin up ephemeral database instances.
+Avoiding in-memory databases for such tests allow for more realistic testing as seen in actual deployments.
+Consequently, you must have **Docker** installed and running, allowing Testcontainer to start a PostgreSQL server.
+
+- If you use a local Docker daemon, ensure it has sufficient memory allocated (typically 1GB+ is recommended for running Postgres containers alongside your tests).
+- If your Docker daemon runs remotely, ensure the `DOCKER_HOST` environment variable is correctly configured in your shell so Testcontainers can locate it.
+
+The automated testing setup will look up a system property `postgresql.server.version` to determine which container image tag to use.
+The property is injected from `pom.xml` by Maven Failsafe and use a reasonable fallback value if missing.
+To test with a different version of PostgreSQL, you may set the Maven property `postgresql.server.version` for a run.
+
+### Example
+Performance test classes must follow specific conventions to be discovered and executed correctly:
+
+1. **Package Location:**
+ Place your test class in `src/test/java`, mirroring the package structure of the code you want to test (e.g., `edu.harvard.iq.dataverse.export`).
+ This placement grants the test class access to package private members in `src/main/java`, which is often necessary when testing internal services directly without going through the full API layer.
+2. **Naming Convention:**
+ Name the class `*IT.java` so that the Maven Failsafe plugin automatically picks it up during the `integration-test` phase.
+3. **Setup Annotation:**
+ Annotate the class with `@JpaPerformanceTest` to have everything set up automatically for you.
+ A `JpaEntityManagerService` will be injected into a static class field for you, allowing interaction with a JPA Entity Manager.
+
+Below is a minimal, generic example [`SamplePerformanceIT`](/_static/developers/testing/SamplePerformanceIT.java) demonstrating the structure and how to run a transaction with or without a return value.
+
+```{literalinclude} /_static/developers/testing/SamplePerformanceIT.java
+:name: sample-performance-test
+:language: java
+:start-at: //
+```
+
+### Understanding JpaEntityManagerService
+The `JpaEntityManagerService` class abstracts away the boilerplate required to set up a JPA environment for testing.
+Here is what it does under the hood:
+
+1. **Automatic PostgreSQL Server Setup:**
+ The involved JUnit Test Extension makes sure to create a single server instance to speed up test setups.
+ Nonetheless, any test class will run within its own database on the server, guaranteeing test database isolation.
+
+2. **Automatic Schema Generation:**
+ When you call `.start()` on a `JpaEntityManagerService` instance, it initializes an EclipseLink `EntityManagerFactory` configured to automatically generate the database schema (`schema-generation.database.action=create`).
+ This guarantees that every test run begins with a pristine database structure derived directly from your current JPA entity mappings.
+ You do not need to run Flyway migrations or seed the database beforehand.
+
+3. **Transaction Management:**
+ The service handles the lifecycle of JPA transactions automatically.
+ You simply pass a lambda to `inTransaction()` or `inTransactionVoid()`.
+ The service will:
+ 1. Create an `EntityManager` and begin a transaction.
+ 2. Execute your lambda.
+ 3. Commit the transaction on success, or roll it back if a `RuntimeException` is thrown.
+ 4. Close the `EntityManager` in a `finally` block to prevent resource leaks.
+
+4. **Query Statistics via Wrapped DataSource:**
+ To make it easy to profile ORM behavior, `JpaEntityManagerService` wraps the underlying PostgreSQL `DataSource` using a proxy that intercepts all SQL statements.
+
+ By default, the proxy tracks query counts, which you can retrieve via `QueryCountHolder.getGrandTotal()`.
+ This provides immediate, programmatic insight into database efficiency without needing to parse verbose SQL logs.
+ It is particularly useful for:
+ - Verifying that a batch operation executes in a single query rather than a loop.
+ - Catching N+1 query problems by asserting on the number of `SELECT` statements.
+
+ *Advanced Usage:* The default service only tracks query counts.
+ If you need detailed SQL logging (including bound parameters) or custom execution metrics, you can extend `JpaEntityManagerService` and register additional `StatementListener` implementations on the `ProxyDataSourceBuilder` during initialization.
\ No newline at end of file
From 4bb2816a9a7ca7c38c2d303b5667423ee3343bb1 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Sat, 2 May 2026 02:39:33 +0200
Subject: [PATCH 22/23] docs(develop): add temporary links to testing fixtures
and performance guides
---
doc/sphinx-guides/source/developers/index.rst | 2 ++
1 file changed, 2 insertions(+)
diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst
index 28b1fbaae82..c829901bead 100755
--- a/doc/sphinx-guides/source/developers/index.rst
+++ b/doc/sphinx-guides/source/developers/index.rst
@@ -47,4 +47,6 @@ Developer Guide
fontcustom
classic-dev-env
search-services
+ testing/fixtures.md
+ testing/performance.md
From 36c9e935848c4a12abaac9a009b889db8b7839c5 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch
Date: Sat, 2 May 2026 02:39:51 +0200
Subject: [PATCH 23/23] docs: enable Markdown support in Sphinx and update
myst-parser version
Adds support for Markdown files in Sphinx configuration. Upgrades `myst-parser` from `2.0.0` to `4.0.0` in requirements for compatibility.
---
doc/sphinx-guides/requirements.txt | 2 +-
doc/sphinx-guides/source/conf.py | 5 ++++-
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/doc/sphinx-guides/requirements.txt b/doc/sphinx-guides/requirements.txt
index 9c74ed75f6d..5a188b3cacb 100755
--- a/doc/sphinx-guides/requirements.txt
+++ b/doc/sphinx-guides/requirements.txt
@@ -4,7 +4,7 @@ Sphinx==7.4.0
sphinx-icon==0.1.2
# Markdown support
-myst-parser==2.0.0
+myst-parser==4.0.0
# tabs
sphinx-tabs==3.4.5
diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py
index 6ecaeebaf54..a0ef9edd31f 100755
--- a/doc/sphinx-guides/source/conf.py
+++ b/doc/sphinx-guides/source/conf.py
@@ -53,7 +53,10 @@
templates_path = ['_templates']
# The suffix of source filenames.
-source_suffix = '.rst'
+source_suffix = {
+ ".rst": "restructuredtext",
+ ".md": "markdown",
+}
# The encoding of source files.
#source_encoding = 'utf-8-sig'