Skip to content

Commit

Permalink
Fix string bucked boundary specification
Browse files Browse the repository at this point in the history
Current form of bucket boundary specification was OK for the original
use (within prism properties) but not for prism containers. This commit
brings a container-proof alternative.

This resolves MID-6684.
  • Loading branch information
mederly committed Mar 1, 2021
1 parent 41f2bab commit 0d0a9df
Show file tree
Hide file tree
Showing 13 changed files with 205 additions and 41 deletions.
Expand Up @@ -1602,21 +1602,37 @@
<xsd:complexContent>
<xsd:extension base="tns:AbstractWorkSegmentationType">
<xsd:sequence>
<xsd:element name="boundary" type="tns:BoundarySpecificationType" minOccurs="0" maxOccurs="unbounded">
<xsd:annotation>
<xsd:documentation>
Characters that make up the boundaries at given string position(s).
</xsd:documentation>
<xsd:appinfo>
<a:since>4.3</a:since>
</xsd:appinfo>
</xsd:annotation>
</xsd:element>
<xsd:element name="boundaryCharacters" type="xsd:string" minOccurs="0" maxOccurs="unbounded">
<xsd:annotation>
<xsd:documentation>
Characters that make up the boundaries. These characters must be sorted.
Reserved characters: '-', '$' (to be implemented later)
Escaping character: '\'

DEPRECATED. Please use boundary instead.
</xsd:documentation>
<xsd:appinfo>
<a:deprecated>true</a:deprecated>
<a:deprecatedSince>4.3</a:deprecatedSince>
</xsd:appinfo>
</xsd:annotation>
</xsd:element>
<xsd:element name="depth" type="xsd:int" minOccurs="0" default="1">
<xsd:annotation>
<xsd:documentation>
If a value N greater than 1 is specified here, boundaryCharacters values are repeated N times
(if values of V1, V2, ..., Vk are specified, the resulting sequence is V1, V2, ..., Vk, V1, V2,
... Vk, etc, with N repetitions - so N*k values in total).
If a value N greater than 1 is specified here, values specified by boundary or boundaryCharacters
are repeated N times (if values of V1, V2, ..., Vk are specified, the resulting sequence
is V1, V2, ..., Vk, V1, V2, ... Vk, etc, with N repetitions - so N*k values in total).
</xsd:documentation>
</xsd:annotation>
</xsd:element>
Expand All @@ -1632,6 +1648,39 @@
</xsd:complexContent>
</xsd:complexType>

<xsd:complexType name="BoundarySpecificationType">
<xsd:annotation>
<xsd:documentation>
Specification of how string-based segments are delineated.
</xsd:documentation>
<xsd:appinfo>
<a:since>4.3</a:since>
<a:container>true</a:container>
</xsd:appinfo>
</xsd:annotation>
<xsd:sequence>
<xsd:element name="position" type="xsd:int" minOccurs="0" maxOccurs="unbounded">
<xsd:annotation>
<xsd:documentation>
Position(s) to which the boundary characters apply.
Should be specified, because (1) the ordering of boundary specifications is undefined,
(2) multiple definitions of the same boundary characters is not possible.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="characters" type="xsd:string" minOccurs="0">
<xsd:annotation>
<xsd:documentation>
Characters that make up the boundaries. These characters must be sorted.
Reserved characters: '-', '$' (to be implemented later)
Escaping character: '\'
</xsd:documentation>
</xsd:annotation>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
<xsd:element name="boundarySpecification" type="tns:BoundarySpecificationType" />

<xsd:complexType name="OidWorkSegmentationType">
<xsd:annotation>
<xsd:documentation>
Expand Down
Expand Up @@ -39,7 +39,9 @@
<buckets>
<stringSegmentation>
<discriminator>attributes/icfs:name</discriminator>
<boundaryCharacters>abcdefghijklmnopqrstuvwxyz</boundaryCharacters>
<boundary>
<characters>abcdefghijklmnopqrstuvwxyz</characters>
</boundary>
</stringSegmentation>
</buckets>
<workers>
Expand Down
Expand Up @@ -39,7 +39,9 @@
<buckets>
<stringSegmentation>
<discriminator>attributes/icfs:name</discriminator>
<boundaryCharacters>agmt</boundaryCharacters>
<boundary>
<characters>agmt</characters>
</boundary>
</stringSegmentation>
</buckets>
<workers>
Expand Down
Expand Up @@ -37,7 +37,9 @@
<buckets>
<stringSegmentation>
<discriminator>attributes/icfs:name</discriminator>
<boundaryCharacters>abcdefghijklmnopqrstuvwxyz</boundaryCharacters>
<boundary>
<characters>abcdefghijklmnopqrstuvwxyz</characters>
</boundary>
</stringSegmentation>
</buckets>
<workers>
Expand Down
Expand Up @@ -49,8 +49,14 @@
<buckets>
<stringSegmentation>
<c:discriminator>name</c:discriminator>
<boundaryCharacters>0</boundaryCharacters>
<boundaryCharacters>0-9</boundaryCharacters>
<boundary>
<position>0</position>
<characters>0</characters>
</boundary>
<boundary>
<position>1</position>
<characters>0-9</characters>
</boundary>
</stringSegmentation>
</buckets>
</workManagement>
Expand Down
Expand Up @@ -40,10 +40,19 @@
<buckets>
<stringSegmentation>
<discriminator>name</discriminator>
<boundaryCharacters>r</boundaryCharacters>
<boundaryCharacters>y</boundaryCharacters>
<boundaryCharacters>0-9</boundaryCharacters>
<boundaryCharacters>0-9</boundaryCharacters>
<boundary>
<position>0</position>
<characters>r</characters>
</boundary>
<boundary>
<position>1</position>
<characters>y</characters>
</boundary>
<boundary>
<position>2</position>
<position>3</position>
<characters>0-9</characters>
</boundary>
<comparisonMethod>prefix</comparisonMethod>
</stringSegmentation>
</buckets>
Expand Down
Expand Up @@ -39,10 +39,19 @@
<buckets>
<stringSegmentation>
<discriminator>name</discriminator>
<boundaryCharacters>r</boundaryCharacters>
<boundaryCharacters>x</boundaryCharacters>
<boundaryCharacters>0-9</boundaryCharacters>
<boundaryCharacters>0-9</boundaryCharacters>
<boundary>
<position>0</position>
<characters>r</characters>
</boundary>
<boundary>
<position>1</position>
<characters>x</characters>
</boundary>
<boundary>
<position>2</position>
<position>3</position>
<characters>0-9</characters>
</boundary>
<comparisonMethod>prefix</comparisonMethod>
</stringSegmentation>
</buckets>
Expand Down
Expand Up @@ -37,9 +37,15 @@
<buckets>
<stringSegmentation>
<discriminator>attributes/name</discriminator>
<boundaryCharacters>v</boundaryCharacters>
<boundaryCharacters>0-9</boundaryCharacters>
<boundaryCharacters>0-9</boundaryCharacters>
<boundary>
<position>0</position>
<characters>v</characters>
</boundary>
<boundary>
<position>1</position>
<position>2</position>
<characters>0-9</characters>
</boundary>
<comparisonMethod>prefix</comparisonMethod>
</stringSegmentation>
</buckets>
Expand Down
Expand Up @@ -36,9 +36,15 @@
<buckets>
<stringSegmentation>
<discriminator>attributes/name</discriminator>
<boundaryCharacters>u</boundaryCharacters>
<boundaryCharacters>0-9</boundaryCharacters>
<boundaryCharacters>0-9</boundaryCharacters>
<boundary>
<position>0</position>
<characters>u</characters>
</boundary>
<boundary>
<position>1</position>
<position>2</position>
<characters>0-9</characters>
</boundary>
<comparisonMethod>prefix</comparisonMethod>
</stringSegmentation>
</buckets>
Expand Down
Expand Up @@ -16,11 +16,13 @@
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

import java.util.ArrayList;
import java.util.List;
import java.util.*;
import java.util.stream.Collectors;

import static com.evolveum.midpoint.util.MiscUtil.argCheck;
import static com.evolveum.midpoint.xml.ns._public.common.common_3.StringWorkBucketsBoundaryMarkingType.INTERVAL;

import static java.util.Collections.emptyList;
import static java.util.Collections.singletonList;
import static org.apache.commons.lang3.ObjectUtils.defaultIfNull;

Expand Down Expand Up @@ -199,23 +201,30 @@ public Integer estimateNumberOfBuckets(@Nullable TaskWorkStateType workState) {
}

private List<String> processBoundaries() {
List<String> configuredBoundaries;
if (bucketsConfiguration instanceof OidWorkSegmentationType && bucketsConfiguration.getBoundaryCharacters().isEmpty()) {
configuredBoundaries = singletonList(OID_BOUNDARIES);
} else {
configuredBoundaries = bucketsConfiguration.getBoundaryCharacters();
}
int depth = defaultIfNull(bucketsConfiguration.getDepth(), 1);
List<String> expanded = configuredBoundaries.stream()
List<String> expanded = getConfiguredBoundaries().stream()
.map(this::expand)
.collect(Collectors.toList());
int depth = defaultIfNull(bucketsConfiguration.getDepth(), 1);
List<String> rv = new ArrayList<>(expanded.size() * depth);
for (int i = 0; i < depth; i++) {
rv.addAll(expanded);
}
return rv;
}

private List<String> getConfiguredBoundaries() {
if (!bucketsConfiguration.getBoundary().isEmpty()) {
return new Boundaries(bucketsConfiguration.getBoundary())
.getConfiguredBoundaries();
} else if (!bucketsConfiguration.getBoundaryCharacters().isEmpty()) {
return bucketsConfiguration.getBoundaryCharacters();
} else if (bucketsConfiguration instanceof OidWorkSegmentationType) {
return singletonList(OID_BOUNDARIES);
} else {
return emptyList();
}
}

private static class Scanner {
private final String string;
private int index;
Expand Down Expand Up @@ -297,4 +306,55 @@ private void appendFromTo(StringBuilder sb, char fromExclusive, char toInclusive
public List<String> getBoundaries() {
return boundaries;
}

private static class Boundaries {

private final List<BoundarySpecificationType> specifications;
private final List<String> configuredBoundaries = new ArrayList<>();

private Boundaries(List<BoundarySpecificationType> specifications) {
this.specifications = specifications;
}

public List<String> getConfiguredBoundaries() {
for (BoundarySpecificationType specification : specifications) {
process(specification);
}
checkConsistency();
return configuredBoundaries;
}

private void process(BoundarySpecificationType specification) {
if (specification.getPosition().isEmpty()) {
configuredBoundaries.add(specification.getCharacters());
return;
}
for (Integer position : specification.getPosition()) {
argCheck(position != null, "Position is null in %s", specification);
extendIfNeeded(position);
set(position, specification.getCharacters());
}
}

private void extendIfNeeded(int position) {
while (configuredBoundaries.size() <= position) {
configuredBoundaries.add(null);
}
}

private void set(int position, String characters) {
assert configuredBoundaries.size() > position;
argCheck(configuredBoundaries.get(position) == null,
"Boundary characters for position %d defined more than once: %s", position, configuredBoundaries);
configuredBoundaries.set(position, characters);
}

private void checkConsistency() {
for (int i = 0; i < configuredBoundaries.size(); i++) {
String configuredBoundary = configuredBoundaries.get(i);
argCheck(configuredBoundary != null, "Boundary characters for position %d are not defined: %s",
i, configuredBoundaries);
}
}
}
}
Expand Up @@ -21,6 +21,7 @@
<stringSegmentation>
<discriminator>name</discriminator>
<matchingRule>polyStringNorm</matchingRule>
<!-- legacy specification -->
<boundaryCharacters>a</boundaryCharacters>
<boundaryCharacters>\0-\1\a-\c</boundaryCharacters>
<boundaryCharacters>01abc</boundaryCharacters>
Expand Down
Expand Up @@ -7,10 +7,7 @@
-->

<task oid="44444444-0000-0000-0000-125000000000"
xmlns="http://midpoint.evolveum.com/xml/ns/public/common/common-3"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
xmlns:q="http://prism.evolveum.com/xml/ns/public/query-3">
xmlns="http://midpoint.evolveum.com/xml/ns/public/common/common-3">
<name>task-125-c-single</name>
<taskIdentifier>44444444-0000-0000-0000-125000000000</taskIdentifier>
<ownerRef oid="00000000-0000-0000-0000-000000000002"/>
Expand All @@ -21,9 +18,18 @@
<stringSegmentation>
<discriminator>name</discriminator>
<matchingRule>polyStringNorm</matchingRule>
<boundaryCharacters>a</boundaryCharacters>
<boundaryCharacters>\0-\1\a-\c</boundaryCharacters>
<boundaryCharacters>01abc</boundaryCharacters>
<boundary>
<position>0</position>
<characters>a</characters>
</boundary>
<boundary>
<position>1</position>
<characters>\0-\1\a-\c</characters>
</boundary>
<boundary>
<position>2</position>
<characters>01abc</characters>
</boundary>
<comparisonMethod>exactMatch</comparisonMethod>
</stringSegmentation>
</buckets>
Expand Down
Expand Up @@ -21,8 +21,14 @@
<stringSegmentation>
<discriminator>name</discriminator>
<matchingRule>polyStringNorm</matchingRule>
<boundaryCharacters>05am</boundaryCharacters>
<boundaryCharacters>0am</boundaryCharacters>
<boundary>
<position>0</position>
<characters>05am</characters>
</boundary>
<boundary>
<position>1</position>
<characters>0am</characters>
</boundary>
<comparisonMethod>interval</comparisonMethod>
</stringSegmentation>
</buckets>
Expand Down

0 comments on commit 0d0a9df

Please sign in to comment.