Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance for Groups with many EPerson members. Fix pagination on endpoints #9078

Merged
merged 11 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -305,10 +305,13 @@
throw new AuthorizeException(
"You must be an admin to delete an EPerson");
}
// Get all workflow-related groups that the current EPerson belongs to
Set<Group> workFlowGroups = getAllWorkFlowGroups(context, ePerson);
for (Group group: workFlowGroups) {
List<EPerson> ePeople = groupService.allMembers(context, group);
if (ePeople.size() == 1 && ePeople.contains(ePerson)) {
// Get total number of unique EPerson objs who are a member of this group (or subgroup)
int totalMembers = groupService.countAllMembers(context, group);
// If only one EPerson is a member, then we cannot delete the last member of this group.
if (totalMembers == 1) {
throw new EmptyWorkflowGroupException(ePerson.getID(), group.getID());
}
}
Expand Down Expand Up @@ -567,14 +570,29 @@

@Override
public List<EPerson> findByGroups(Context c, Set<Group> groups) throws SQLException {
return findByGroups(c, groups, -1, -1);
}

@Override
public List<EPerson> findByGroups(Context c, Set<Group> groups, int pageSize, int offset) throws SQLException {
//Make sure we at least have one group, if not don't even bother searching.
if (CollectionUtils.isNotEmpty(groups)) {
return ePersonDAO.findByGroups(c, groups);
return ePersonDAO.findByGroups(c, groups, pageSize, offset);
} else {
return new ArrayList<>();
}
}

@Override
public int countByGroups(Context c, Set<Group> groups) throws SQLException {
//Make sure we at least have one group, if not don't even bother counting.
if (CollectionUtils.isNotEmpty(groups)) {
return ePersonDAO.countByGroups(c, groups);
} else {
return 0;

Check warning on line 592 in dspace-api/src/main/java/org/dspace/eperson/EPersonServiceImpl.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/eperson/EPersonServiceImpl.java#L592

Added line #L592 was not covered by tests
}
}

@Override
public List<EPerson> findEPeopleWithSubscription(Context context) throws SQLException {
return ePersonDAO.findAllSubscribers(context);
Expand Down
14 changes: 11 additions & 3 deletions dspace-api/src/main/java/org/dspace/eperson/Group.java
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,11 @@ void addMember(EPerson e) {
}

/**
* Return EPerson members of a Group
* Return EPerson members of a Group.
* <P>
* WARNING: This method may have bad performance for Groups with large numbers of EPerson members.
* Therefore, only use this when you need to access every EPerson member. Instead, consider using
* EPersonService.findByGroups() for a paginated list of EPersons.
*
* @return list of EPersons
*/
Expand Down Expand Up @@ -143,9 +147,13 @@ List<Group> getParentGroups() {
}

/**
* Return Group members of a Group.
* Return Group members (i.e. direct subgroups) of a Group.
* <P>
* WARNING: This method may have bad performance for Groups with large numbers of Subgroups.
* Therefore, only use this when you need to access every Subgroup. Instead, consider using
* GroupService.findByParent() for a paginated list of Subgroups.
*
* @return list of groups
* @return list of subgroups
*/
public List<Group> getMemberGroups() {
return groups;
Expand Down
64 changes: 56 additions & 8 deletions dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,13 @@
for (CollectionRole collectionRole : collectionRoles) {
tdonohue marked this conversation as resolved.
Show resolved Hide resolved
if (StringUtils.equals(collectionRole.getRoleId(), role.getId())
&& claimedTask.getWorkflowItem().getCollection() == collectionRole.getCollection()) {
tdonohue marked this conversation as resolved.
Show resolved Hide resolved
List<EPerson> ePeople = allMembers(context, group);
if (ePeople.size() == 1 && ePeople.contains(ePerson)) {
// Count number of EPersons who are *direct* members of this group
int totalDirectEPersons = ePersonService.countByGroups(context, Set.of(group));
// Count number of Groups which have this groupParent as a direct parent
int totalChildGroups = countByParent(context, group);
// If this group has only one direct EPerson and *zero* child groups, then we cannot delete the
// EPerson or we will leave this group empty.
if (totalDirectEPersons == 1 && totalChildGroups == 0) {
tdonohue marked this conversation as resolved.
Show resolved Hide resolved
throw new IllegalStateException(
"Refused to remove user " + ePerson
.getID() + " from workflow group because the group " + group
Expand All @@ -191,8 +196,13 @@
}
}
if (!poolTasks.isEmpty()) {
List<EPerson> ePeople = allMembers(context, group);
if (ePeople.size() == 1 && ePeople.contains(ePerson)) {
// Count number of EPersons who are *direct* members of this group
int totalDirectEPersons = ePersonService.countByGroups(context, Set.of(group));
// Count number of Groups which have this groupParent as a direct parent
int totalChildGroups = countByParent(context, group);
// If this group has only one direct EPerson and *zero* child groups, then we cannot delete the
// EPerson or we will leave this group empty.
if (totalDirectEPersons == 1 && totalChildGroups == 0) {
tdonohue marked this conversation as resolved.
Show resolved Hide resolved
throw new IllegalStateException(
"Refused to remove user " + ePerson
.getID() + " from workflow group because the group " + group
Expand All @@ -212,9 +222,13 @@
if (!collectionRoles.isEmpty()) {
List<PoolTask> poolTasks = poolTaskService.findByGroup(context, groupParent);
if (!poolTasks.isEmpty()) {
List<EPerson> parentPeople = allMembers(context, groupParent);
List<EPerson> childPeople = allMembers(context, childGroup);
if (childPeople.containsAll(parentPeople)) {
// Count number of Groups which have this groupParent as a direct parent
int totalChildGroups = countByParent(context, groupParent);

Check warning on line 226 in dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java#L226

Added line #L226 was not covered by tests
// Count number of EPersons who are *direct* members of this group
int totalDirectEPersons = ePersonService.countByGroups(context, Set.of(groupParent));

Check warning on line 228 in dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java#L228

Added line #L228 was not covered by tests
// If this group has only one childGroup and *zero* direct EPersons, then we cannot delete the
// childGroup or we will leave this group empty.
if (totalChildGroups == 1 && totalDirectEPersons == 0) {
tdonohue marked this conversation as resolved.
Show resolved Hide resolved
throw new IllegalStateException(
"Refused to remove sub group " + childGroup
.getID() + " from workflow group because the group " + groupParent
Expand Down Expand Up @@ -368,7 +382,8 @@

// Get all groups which are a member of this group
List<Group2GroupCache> group2GroupCaches = group2GroupCacheDAO.findByParent(c, g);
Set<Group> groups = new HashSet<>();
// Initialize HashSet based on List size to avoid Set resizing. See https://stackoverflow.com/a/21822273
Set<Group> groups = new HashSet<>((int) (group2GroupCaches.size() / 0.75 + 1));
for (Group2GroupCache group2GroupCache : group2GroupCaches) {
groups.add(group2GroupCache.getChild());
}
Expand All @@ -381,6 +396,23 @@
return new ArrayList<>(childGroupChildren);
}

@Override
public int countAllMembers(Context context, Group group) throws SQLException {
// Get all groups which are a member of this group
List<Group2GroupCache> group2GroupCaches = group2GroupCacheDAO.findByParent(context, group);
// Initialize HashSet based on List size + current 'group' to avoid Set resizing.
// See https://stackoverflow.com/a/21822273
Set<Group> groups = new HashSet<>((int) ((group2GroupCaches.size() + 1) / 0.75 + 1));
for (Group2GroupCache group2GroupCache : group2GroupCaches) {
groups.add(group2GroupCache.getChild());
}
// Append current group as well
groups.add(group);

// Return total number of unique EPerson objects in any of these groups
return ePersonService.countByGroups(context, groups);
}

@Override
public Group find(Context context, UUID id) throws SQLException {
if (id == null) {
Expand Down Expand Up @@ -829,4 +861,20 @@
public String getName(Group dso) {
return dso.getName();
}

@Override
public List<Group> findByParent(Context context, Group parent, int pageSize, int offset) throws SQLException {
if (parent == null) {
return null;

Check warning on line 868 in dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java#L868

Added line #L868 was not covered by tests
}
return groupDAO.findByParent(context, parent, pageSize, offset);
}

@Override
public int countByParent(Context context, Group parent) throws SQLException {
if (parent == null) {
return 0;

Check warning on line 876 in dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java

View check run for this annotation

Codecov / codecov/patch

dspace-api/src/main/java/org/dspace/eperson/GroupServiceImpl.java#L876

Added line #L876 was not covered by tests
}
return groupDAO.countByParent(context, parent);
}
}
24 changes: 23 additions & 1 deletion dspace-api/src/main/java/org/dspace/eperson/dao/EPersonDAO.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,29 @@ public List<EPerson> search(Context context, String query, List<MetadataField> q

public int searchResultCount(Context context, String query, List<MetadataField> queryFields) throws SQLException;

public List<EPerson> findByGroups(Context context, Set<Group> groups) throws SQLException;
/**
* Find all EPersons who are a member of one or more of the listed groups in a paginated fashion. This returns
* EPersons ordered by UUID.
*
* @param context current Context
* @param groups Set of group(s) to check membership in
* @param pageSize number of EPerson objects to load at one time. Set to <=0 to disable pagination
* @param offset number of page to load (starting with 1). Set to <=0 to disable pagination
* @return List of all EPersons who are a member of one or more groups.
* @throws SQLException
*/
List<EPerson> findByGroups(Context context, Set<Group> groups, int pageSize, int offset) throws SQLException;

/**
* Count total number of EPersons who are a member of one or more of the listed groups. This provides the total
* number of results to expect from corresponding findByGroups() for pagination purposes.
*
* @param context current Context
* @param groups Set of group(s) to check membership in
* @return total number of (unique) EPersons who are a member of one or more groups.
* @throws SQLException
*/
int countByGroups(Context context, Set<Group> groups) throws SQLException;

public List<EPerson> findWithPasswordWithoutDigestAlgorithm(Context context) throws SQLException;

Expand Down
24 changes: 24 additions & 0 deletions dspace-api/src/main/java/org/dspace/eperson/dao/GroupDAO.java
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,28 @@ List<Group> findAll(Context context, List<MetadataField> metadataSortFields, int
*/
Group findByIdAndMembership(Context context, UUID id, EPerson ePerson) throws SQLException;

/**
* Find all groups which are members of a given parent group.
* This provides the same behavior as group.getMemberGroups(), but in a paginated fashion.
*
* @param context The DSpace context
* @param parent Parent Group to search within
* @param pageSize how many results return
* @param offset the position of the first result to return
* @return Groups matching the query
* @throws SQLException if database error
*/
List<Group> findByParent(Context context, Group parent, int pageSize, int offset) throws SQLException;

/**
* Returns the number of groups which are members of a given parent group.
* This provides the same behavior as group.getMemberGroups().size(), but with better performance for large groups.
* This method may be used with findByParent() to perform pagination.
*
* @param context The DSpace context
* @param parent Parent Group to search within
* @return Number of Groups matching the query
* @throws SQLException if database error
*/
int countByParent(Context context, Group parent) throws SQLException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ public List<EPerson> findAll(Context context, MetadataField metadataSortField, S
}

@Override
public List<EPerson> findByGroups(Context context, Set<Group> groups) throws SQLException {
public List<EPerson> findByGroups(Context context, Set<Group> groups, int pageSize, int offset)
throws SQLException {
Query query = createQuery(context,
"SELECT DISTINCT e FROM EPerson e " +
"JOIN e.groups g " +
Expand All @@ -122,12 +123,35 @@ public List<EPerson> findByGroups(Context context, Set<Group> groups) throws SQL
for (Group group : groups) {
idList.add(group.getID());
}

query.setParameter("idList", idList);

if (pageSize > 0) {
query.setMaxResults(pageSize);
}
if (offset > 0) {
query.setFirstResult(offset);
}

return list(query);
}

@Override
public int countByGroups(Context context, Set<Group> groups) throws SQLException {
Query query = createQuery(context,
"SELECT count(DISTINCT e) FROM EPerson e " +
"JOIN e.groups g " +
"WHERE g.id IN (:idList) ");

List<UUID> idList = new ArrayList<>(groups.size());
for (Group group : groups) {
idList.add(group.getID());
}

query.setParameter("idList", idList);

return count(query);
}

@Override
public List<EPerson> findWithPasswordWithoutDigestAlgorithm(Context context) throws SQLException {
CriteriaBuilder criteriaBuilder = getCriteriaBuilder(context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,4 +196,28 @@ public int countRows(Context context) throws SQLException {
return count(createQuery(context, "SELECT count(*) FROM Group"));
}

@Override
public List<Group> findByParent(Context context, Group parent, int pageSize, int offset) throws SQLException {
Query query = createQuery(context,
"SELECT g FROM Group g JOIN g.parentGroups pg " +
"WHERE pg.id = :parent_id");
query.setParameter("parent_id", parent.getID());
if (pageSize > 0) {
query.setMaxResults(pageSize);
}
if (offset > 0) {
query.setFirstResult(offset);
}
query.setHint("org.hibernate.cacheable", Boolean.TRUE);

return list(query);
}

public int countByParent(Context context, Group parent) throws SQLException {
Query query = createQuery(context, "SELECT count(g) FROM Group g JOIN g.parentGroups pg " +
"WHERE pg.id = :parent_id");
query.setParameter("parent_id", parent.getID());

return count(query);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -252,14 +252,42 @@ public EPerson create(Context context) throws SQLException,
public List<String> getDeleteConstraints(Context context, EPerson ePerson) throws SQLException;

/**
* Retrieve all accounts which belong to at least one of the specified groups.
* Retrieve all EPerson accounts which belong to at least one of the specified groups.
* <P>
* WARNING: This method may have bad performance issues for Groups with a very large number of members,
* as it will load all member EPerson objects into memory.
* <P>
* For better performance, use the paginated version of this method.
*
* @param c The relevant DSpace Context.
* @param groups set of eperson groups
* @return a list of epeople
* @throws SQLException An exception that provides information on a database access error or other errors.
*/
public List<EPerson> findByGroups(Context c, Set<Group> groups) throws SQLException;
List<EPerson> findByGroups(Context c, Set<Group> groups) throws SQLException;

/**
* Retrieve all EPerson accounts which belong to at least one of the specified groups, in a paginated fashion.
*
* @param c The relevant DSpace Context.
* @param groups Set of group(s) to check membership in
* @param pageSize number of EPerson objects to load at one time. Set to <=0 to disable pagination
* @param offset number of page to load (starting with 1). Set to <=0 to disable pagination
* @return a list of epeople
* @throws SQLException An exception that provides information on a database access error or other errors.
*/
List<EPerson> findByGroups(Context c, Set<Group> groups, int pageSize, int offset) throws SQLException;

/**
* Count all EPerson accounts which belong to at least one of the specified groups. This provides the total
* number of results to expect from corresponding findByGroups() for pagination purposes.
*
* @param c The relevant DSpace Context.
* @param groups Set of group(s) to check membership in
* @return total number of (unique) EPersons who are a member of one or more groups.
* @throws SQLException An exception that provides information on a database access error or other errors.
*/
int countByGroups(Context c, Set<Group> groups) throws SQLException;

/**
* Retrieve all accounts which are subscribed to receive information about new items.
Expand Down