Skip to content

Commit

Permalink
Merge branch 'DSpace:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
hutattedonmyarm committed Apr 19, 2023
2 parents c54dc03 + bcb7142 commit 0dc5a29
Show file tree
Hide file tree
Showing 20 changed files with 251 additions and 97 deletions.
1 change: 0 additions & 1 deletion .dockerignore
Expand Up @@ -6,6 +6,5 @@ dspace/modules/*/target/
Dockerfile.*
dspace/src/main/docker/dspace-postgres-pgcrypto
dspace/src/main/docker/dspace-postgres-pgcrypto-curl
dspace/src/main/docker/solr
dspace/src/main/docker/README.md
dspace/src/main/docker-compose/
37 changes: 35 additions & 2 deletions .github/workflows/build.yml
Expand Up @@ -79,6 +79,39 @@ jobs:
name: ${{ matrix.type }} results
path: ${{ matrix.resultsdir }}

# https://github.com/codecov/codecov-action
# Upload code coverage report to artifact, so that it can be shared with the 'codecov' job (see below)
- name: Upload code coverage report to Artifact
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.type }} coverage report
path: 'dspace/target/site/jacoco-aggregate/jacoco.xml'
retention-days: 14

# Codecov upload is a separate job in order to allow us to restart this separate from the entire build/test
# job above. This is necessary because Codecov uploads seem to randomly fail at times.
# See https://community.codecov.com/t/upload-issues-unable-to-locate-build-via-github-actions-api/3954
codecov:
# Must run after 'tests' job above
needs: tests
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3

# Download artifacts from previous 'tests' job
- name: Download coverage artifacts
uses: actions/download-artifact@v3

# Now attempt upload to Codecov using its action.
# NOTE: We use a retry action to retry the Codecov upload if it fails the first time.
#
# Retry action: https://github.com/marketplace/actions/retry-action
# Codecov action: https://github.com/codecov/codecov-action
- name: Upload coverage to Codecov.io
uses: codecov/codecov-action@v3
uses: Wandalen/wretry.action@v1.0.36
with:
action: codecov/codecov-action@v3
# Try upload 5 times max
attempt_limit: 5
# Run again in 30 seconds
attempt_delay: 30000
26 changes: 26 additions & 0 deletions .github/workflows/docker.yml
Expand Up @@ -170,3 +170,29 @@ jobs:
# Use tags / labels provided by 'docker/metadata-action' above
tags: ${{ steps.meta_build_cli.outputs.tags }}
labels: ${{ steps.meta_build_cli.outputs.labels }}

###########################################
# Build/Push the 'dspace/dspace-solr' image
###########################################
# Get Metadata for docker_build_solr step below
- name: Sync metadata (tags, labels) from GitHub to Docker for 'dspace-solr' image
id: meta_build_solr
uses: docker/metadata-action@v4
with:
images: dspace/dspace-solr
tags: ${{ env.IMAGE_TAGS }}
flavor: ${{ env.TAGS_FLAVOR }}

- name: Build and push 'dspace-solr' image
id: docker_build_solr
uses: docker/build-push-action@v3
with:
context: .
file: ./dspace/src/main/docker/dspace-solr/Dockerfile
platforms: ${{ env.PLATFORMS }}
# For pull requests, we run the Docker build (to ensure no PR changes break the build),
# but we ONLY do an image push to DockerHub if it's NOT a PR
push: ${{ github.event_name != 'pull_request' }}
# Use tags / labels provided by 'docker/metadata-action' above
tags: ${{ steps.meta_build_solr.outputs.tags }}
labels: ${{ steps.meta_build_solr.outputs.labels }}
2 changes: 1 addition & 1 deletion .github/workflows/issue_opened.yml
Expand Up @@ -16,7 +16,7 @@ jobs:
# Only add to project board if issue is flagged as "needs triage" or has no labels
# NOTE: By default we flag new issues as "needs triage" in our issue template
if: (contains(github.event.issue.labels.*.name, 'needs triage') || join(github.event.issue.labels.*.name) == '')
uses: actions/add-to-project@v0.3.0
uses: actions/add-to-project@v0.5.0
# Note, the authentication token below is an ORG level Secret.
# It must be created/recreated manually via a personal access token with admin:org, project, public_repo permissions
# See: https://docs.github.com/en/actions/configuring-and-managing-workflows/authenticating-with-the-github_token#permissions-for-the-github_token
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/label_merge_conflicts.yml
Expand Up @@ -23,7 +23,7 @@ jobs:
steps:
# See: https://github.com/prince-chrismc/label-merge-conflicts-action
- name: Auto-label PRs with merge conflicts
uses: prince-chrismc/label-merge-conflicts-action@v2
uses: prince-chrismc/label-merge-conflicts-action@v3
# Add "merge conflict" label if a merge conflict is detected. Remove it when resolved.
# Note, the authentication token is created automatically
# See: https://docs.github.com/en/actions/configuring-and-managing-workflows/authenticating-with-the-github_token
Expand Down
27 changes: 13 additions & 14 deletions docker-compose.yml
Expand Up @@ -81,8 +81,12 @@ services:
# DSpace Solr container
dspacesolr:
container_name: dspacesolr
# Uses official Solr image at https://hub.docker.com/_/solr/
image: solr:8.11-slim
image: "${DOCKER_OWNER:-dspace}/dspace-solr:${DSPACE_VER:-dspace-7_x}"
build:
context: .
dockerfile: ./dspace/src/main/docker/dspace-solr/Dockerfile
args:
SOLR_VERSION: "${SOLR_VER:-8.11}"
networks:
dspacenet:
ports:
Expand All @@ -92,30 +96,25 @@ services:
tty: true
working_dir: /var/solr/data
volumes:
# Mount our local Solr core configs so that they are available as Solr configsets on container
- ./dspace/solr/authority:/opt/solr/server/solr/configsets/authority
- ./dspace/solr/oai:/opt/solr/server/solr/configsets/oai
- ./dspace/solr/search:/opt/solr/server/solr/configsets/search
- ./dspace/solr/statistics:/opt/solr/server/solr/configsets/statistics
# Keep Solr data directory between reboots
- solr_data:/var/solr/data
# Initialize all DSpace Solr cores using the mounted local configsets (see above), then start Solr
# Initialize all DSpace Solr cores then start Solr:
# * First, run precreate-core to create the core (if it doesn't yet exist). If exists already, this is a no-op
# * Second, copy updated configs from mounted configsets to this core. If it already existed, this updates core
# to the latest configs. If it's a newly created core, this is a no-op.
# * Second, copy configsets to this core:
# Updates to Solr configs require the container to be rebuilt/restarted: `docker compose -p d7 up -d --build dspacesolr`
entrypoint:
- /bin/bash
- '-c'
- |
init-var-solr
precreate-core authority /opt/solr/server/solr/configsets/authority
cp -r -u /opt/solr/server/solr/configsets/authority/* authority
cp -r /opt/solr/server/solr/configsets/authority/* authority
precreate-core oai /opt/solr/server/solr/configsets/oai
cp -r -u /opt/solr/server/solr/configsets/oai/* oai
cp -r /opt/solr/server/solr/configsets/oai/* oai
precreate-core search /opt/solr/server/solr/configsets/search
cp -r -u /opt/solr/server/solr/configsets/search/* search
cp -r /opt/solr/server/solr/configsets/search/* search
precreate-core statistics /opt/solr/server/solr/configsets/statistics
cp -r -u /opt/solr/server/solr/configsets/statistics/* statistics
cp -r /opt/solr/server/solr/configsets/statistics/* statistics
exec solr -f
volumes:
assetstore:
Expand Down
2 changes: 1 addition & 1 deletion dspace-api/pom.xml
Expand Up @@ -776,7 +776,7 @@
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20180130</version>
<version>20230227</version>
</dependency>

<!-- Useful for testing command-line tools -->
Expand Down
Expand Up @@ -332,8 +332,8 @@ public void updateLastModified(Context context, Bitstream bitstream) {
}

@Override
public List<Bitstream> findDeletedBitstreams(Context context) throws SQLException {
return bitstreamDAO.findDeletedBitstreams(context);
public List<Bitstream> findDeletedBitstreams(Context context, int limit, int offset) throws SQLException {
return bitstreamDAO.findDeletedBitstreams(context, limit, offset);
}

@Override
Expand Down
Expand Up @@ -29,7 +29,7 @@ public interface BitstreamDAO extends DSpaceObjectLegacySupportDAO<Bitstream> {

public Iterator<Bitstream> findAll(Context context, int limit, int offset) throws SQLException;

public List<Bitstream> findDeletedBitstreams(Context context) throws SQLException;
public List<Bitstream> findDeletedBitstreams(Context context, int limit, int offset) throws SQLException;

public List<Bitstream> findDuplicateInternalIdentifier(Context context, Bitstream bitstream) throws SQLException;

Expand Down
Expand Up @@ -41,13 +41,14 @@ protected BitstreamDAOImpl() {
}

@Override
public List<Bitstream> findDeletedBitstreams(Context context) throws SQLException {
public List<Bitstream> findDeletedBitstreams(Context context, int limit, int offset) throws SQLException {
CriteriaBuilder criteriaBuilder = getCriteriaBuilder(context);
CriteriaQuery criteriaQuery = getCriteriaQuery(criteriaBuilder, Bitstream.class);
Root<Bitstream> bitstreamRoot = criteriaQuery.from(Bitstream.class);
criteriaQuery.select(bitstreamRoot);
criteriaQuery.orderBy(criteriaBuilder.desc(bitstreamRoot.get(Bitstream_.ID)));
criteriaQuery.where(criteriaBuilder.equal(bitstreamRoot.get(Bitstream_.deleted), true));
return list(context, criteriaQuery, false, Bitstream.class, -1, -1);
return list(context, criteriaQuery, false, Bitstream.class, limit, offset);

}

Expand Down
Expand Up @@ -183,7 +183,7 @@ public InputStream retrieve(Context context, Bitstream bitstream)
* @return a list of all bitstreams that have been "deleted"
* @throws SQLException if database error
*/
public List<Bitstream> findDeletedBitstreams(Context context) throws SQLException;
public List<Bitstream> findDeletedBitstreams(Context context, int limit, int offset) throws SQLException;


/**
Expand Down
Expand Up @@ -141,7 +141,6 @@ public void consume(Context ctx, Event event) throws Exception {
+ item.getID() + " and DOI " + doi + ".", ex);
}
}
ctx.commit();
}
}

Expand Down
Expand Up @@ -17,6 +17,7 @@
import java.util.UUID;
import javax.annotation.Nullable;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections4.MapUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down Expand Up @@ -224,25 +225,62 @@ public InputStream retrieve(Context context, Bitstream bitstream)
@Override
public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLException, IOException, AuthorizeException {
Context context = new Context(Context.Mode.BATCH_EDIT);
int commitCounter = 0;

int offset = 0;
int limit = 100;

int cleanedBitstreamCount = 0;

int deletedBitstreamCount = bitstreamService.countDeletedBitstreams(context);
System.out.println("Found " + deletedBitstreamCount + " deleted bistream to cleanup");

try {
context.turnOffAuthorisationSystem();

List<Bitstream> storage = bitstreamService.findDeletedBitstreams(context);
for (Bitstream bitstream : storage) {
UUID bid = bitstream.getID();
Map wantedMetadata = new HashMap();
wantedMetadata.put("size_bytes", null);
wantedMetadata.put("modified", null);
Map receivedMetadata = this.getStore(bitstream.getStoreNumber()).about(bitstream, wantedMetadata);
while (cleanedBitstreamCount < deletedBitstreamCount) {

List<Bitstream> storage = bitstreamService.findDeletedBitstreams(context, limit, offset);

if (CollectionUtils.isEmpty(storage)) {
break;
}

for (Bitstream bitstream : storage) {
UUID bid = bitstream.getID();
Map wantedMetadata = new HashMap();
wantedMetadata.put("size_bytes", null);
wantedMetadata.put("modified", null);
Map receivedMetadata = this.getStore(bitstream.getStoreNumber()).about(bitstream, wantedMetadata);


// Make sure entries which do not exist are removed
if (MapUtils.isEmpty(receivedMetadata)) {
log.debug("bitstore.about is empty, so file is not present");
if (deleteDbRecords) {
log.debug("deleting record");
if (verbose) {
System.out.println(" - Deleting bitstream information (ID: " + bid + ")");
}
checksumHistoryService.deleteByBitstream(context, bitstream);
if (verbose) {
System.out.println(" - Deleting bitstream record from database (ID: " + bid + ")");
}
bitstreamService.expunge(context, bitstream);
}
context.uncacheEntity(bitstream);
continue;
}

// This is a small chance that this is a file which is
// being stored -- get it next time.
if (isRecent(Long.valueOf(receivedMetadata.get("modified").toString()))) {
log.debug("file is recent");
context.uncacheEntity(bitstream);
continue;
}

// Make sure entries which do not exist are removed
if (MapUtils.isEmpty(receivedMetadata)) {
log.debug("bitstore.about is empty, so file is not present");
if (deleteDbRecords) {
log.debug("deleting record");
log.debug("deleting db record");
if (verbose) {
System.out.println(" - Deleting bitstream information (ID: " + bid + ")");
}
Expand All @@ -252,64 +290,42 @@ public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLExceptio
}
bitstreamService.expunge(context, bitstream);
}
context.uncacheEntity(bitstream);
continue;
}

// This is a small chance that this is a file which is
// being stored -- get it next time.
if (isRecent(Long.valueOf(receivedMetadata.get("modified").toString()))) {
log.debug("file is recent");
context.uncacheEntity(bitstream);
continue;
}

if (deleteDbRecords) {
log.debug("deleting db record");
if (verbose) {
System.out.println(" - Deleting bitstream information (ID: " + bid + ")");
if (isRegisteredBitstream(bitstream.getInternalId())) {
context.uncacheEntity(bitstream);
continue; // do not delete registered bitstreams
}
checksumHistoryService.deleteByBitstream(context, bitstream);
if (verbose) {
System.out.println(" - Deleting bitstream record from database (ID: " + bid + ")");


// Since versioning allows for multiple bitstreams, check if the internal
// identifier isn't used on
// another place
if (bitstreamService.findDuplicateInternalIdentifier(context, bitstream).isEmpty()) {
this.getStore(bitstream.getStoreNumber()).remove(bitstream);

String message = ("Deleted bitstreamID " + bid + ", internalID " + bitstream.getInternalId());
if (log.isDebugEnabled()) {
log.debug(message);
}
if (verbose) {
System.out.println(message);
}
}
bitstreamService.expunge(context, bitstream);
}

if (isRegisteredBitstream(bitstream.getInternalId())) {
context.uncacheEntity(bitstream);
continue; // do not delete registered bitstreams
}

// Commit actual changes to DB after dispatch events
System.out.print("Performing incremental commit to the database...");
context.commit();
System.out.println(" Incremental commit done!");

// Since versioning allows for multiple bitstreams, check if the internal identifier isn't used on
// another place
if (bitstreamService.findDuplicateInternalIdentifier(context, bitstream).isEmpty()) {
this.getStore(bitstream.getStoreNumber()).remove(bitstream);

String message = ("Deleted bitstreamID " + bid + ", internalID " + bitstream.getInternalId());
if (log.isDebugEnabled()) {
log.debug(message);
}
if (verbose) {
System.out.println(message);
}
}
cleanedBitstreamCount = cleanedBitstreamCount + storage.size();

// Make sure to commit our outstanding work every 100
// iterations. Otherwise you risk losing the entire transaction
// if we hit an exception, which isn't useful at all for large
// amounts of bitstreams.
commitCounter++;
if (commitCounter % 100 == 0) {
context.dispatchEvents();
// Commit actual changes to DB after dispatch events
System.out.print("Performing incremental commit to the database...");
context.commit();
System.out.println(" Incremental commit done!");
if (!deleteDbRecords) {
offset = offset + limit;
}

context.uncacheEntity(bitstream);
}

System.out.print("Committing changes to the database...");
Expand Down

0 comments on commit 0dc5a29

Please sign in to comment.