Skip to content

Commit

Permalink
Backport 2263 and 2268
Browse files Browse the repository at this point in the history
  • Loading branch information
labkey-martyp committed Jun 15, 2021
1 parent 8101d9f commit 5e21312
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 85 deletions.
135 changes: 67 additions & 68 deletions api/src/org/labkey/api/dataiterator/DataIteratorUtil.java
Expand Up @@ -53,6 +53,7 @@
import java.util.Map;
import java.util.Spliterator;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

Expand Down Expand Up @@ -129,58 +130,79 @@ public static Map<String,ColumnInfo> createTableMap(TableInfo target, boolean us
return targetAliasesMap;
}

enum MatchType {propertyuri, name, alias, jdbcname, tsvColumn}

// rank of a match of import column NAME matching various properties of target column
// MatchType.low is used for matches based on something other than name
enum MatchType {propertyuri, name, alias, jdbcname, tsvColumn, low}


/**
* NOTE: matchColumns() handles multiple source columns matching the same target column (usually a data file problem
* for the user to fix), we don't handle one source column matching multiple target columns (more of an admin design problem).
* One idea would be to return MultiValuedMap<String,Pair<>>, or check for duplicates entries of the same MatchType.
*/
protected static Map<String,Pair<ColumnInfo,MatchType>> _createTableMap(TableInfo target, boolean useImportAliases)
{
List<ColumnInfo> cols = target.getColumns();
/* CONSIDER: move this functionality into a TableInfo method so this map (or maps) can be cached */
List<ColumnInfo> cols = target.getColumns().stream()
.filter(col -> !col.isMvIndicatorColumn() && !col.isRawValueColumn())
.collect(Collectors.toList());

Map<String, Pair<ColumnInfo,MatchType>> targetAliasesMap = new CaseInsensitiveHashMap<>(cols.size()*4);

// should this be under the useImportAliases flag???
for (ColumnInfo col : cols)
{
if (col.isMvIndicatorColumn() || col.isRawValueColumn())
continue;
final String name = col.getName();
targetAliasesMap.put(name, new Pair<>(col,MatchType.name));
String uri = col.getPropertyURI();
if (null != uri)
{
if (!targetAliasesMap.containsKey(uri))
targetAliasesMap.put(uri, new Pair<>(col, MatchType.propertyuri));
String propName = uri.substring(uri.lastIndexOf('#')+1);
if (!targetAliasesMap.containsKey(propName))
targetAliasesMap.put(propName, new Pair<>(col, MatchType.alias));
}
String label = col.getLabel();
if (null != label && !targetAliasesMap.containsKey(label))
targetAliasesMap.put(label, new Pair<>(col, MatchType.alias));
String translatedFieldKey;
if (useImportAliases || "folder".equalsIgnoreCase(name))
// Issue 21015: Dataset snapshot over flow assay dataset doesn't pick up stat column values
// TSVColumnWriter.ColumnHeaderType.queryColumnName format is a FieldKey display value from the column name. Blech.
String tsvQueryColumnName = FieldKey.fromString(col.getName()).toDisplayString();
targetAliasesMap.put(tsvQueryColumnName, new Pair<>(col, MatchType.tsvColumn));
}

// should this be under the useImportAliases flag???
for (ColumnInfo col : cols)
{
// Jdbc resultset names have substitutions for special characters. If this is such a column, need the substituted name to match on
targetAliasesMap.put(col.getJdbcRsName(), new Pair<>(col, MatchType.jdbcname));
}

for (ColumnInfo col : cols)
{
if (useImportAliases || "folder".equalsIgnoreCase(col.getName()))
{
for (String alias : col.getImportAliasSet())
{
if (!targetAliasesMap.containsKey(alias))
targetAliasesMap.put(alias, new Pair<>(col, MatchType.alias));
}
targetAliasesMap.put(alias, new Pair<>(col, MatchType.alias));

// Be sure we have an alias the column name we generate for TSV exports. See issue 21774
translatedFieldKey = FieldKey.fromString(name).toDisplayString();
if (!targetAliasesMap.containsKey(translatedFieldKey))
{
targetAliasesMap.put(translatedFieldKey, new Pair<>(col, MatchType.alias));
}
String translatedFieldKey = FieldKey.fromString(col.getName()).toDisplayString();
targetAliasesMap.put(translatedFieldKey, new Pair<>(col, MatchType.alias));
}
// Jdbc resultset names have substitutions for special characters. If this is such a column, need the substituted name to match on
translatedFieldKey = col.getJdbcRsName();
if (!name.equals(translatedFieldKey))
}

for (ColumnInfo col : cols)
{
String label = col.getLabel();
if (null != label)
targetAliasesMap.put(label, new Pair<>(col, MatchType.alias));
}

for (ColumnInfo col : cols)
{
String uri = col.getPropertyURI();
if (null != uri)
{
targetAliasesMap.put(translatedFieldKey, new Pair<>(col, MatchType.jdbcname));
targetAliasesMap.put(uri, new Pair<>(col, MatchType.propertyuri));
String propName = uri.substring(uri.lastIndexOf('#')+1);
targetAliasesMap.put(propName, new Pair<>(col, MatchType.alias));
}
}

// Issue 21015: Dataset snapshot over flow assay dataset doesn't pick up stat column values
// TSVColumnWriter.ColumnHeaderType.queryColumnName format is a FieldKey display value from the column name. Blech.
String tsvQueryColumnName = FieldKey.fromString(name).toDisplayString();
if (!targetAliasesMap.containsKey(tsvQueryColumnName))
targetAliasesMap.put(tsvQueryColumnName, new Pair<>(col, MatchType.tsvColumn));
for (ColumnInfo col : cols)
{
String name = col.getName();
targetAliasesMap.put(name, new Pair<>(col,MatchType.name));
}

return targetAliasesMap;
}

Expand All @@ -196,7 +218,6 @@ protected static ArrayList<Pair<ColumnInfo,MatchType>> _matchColumns(DataIterato
ArrayList<Pair<ColumnInfo,MatchType>> matches = new ArrayList<>(input.getColumnCount()+1);
matches.add(null);

// match columns to target columninfos (duplicates StandardDataIteratorBuilder, extract shared method?)
for (int i=1 ; i<=input.getColumnCount() ; i++)
{
ColumnInfo from = input.getColumnInfo(i);
Expand All @@ -205,34 +226,15 @@ protected static ArrayList<Pair<ColumnInfo,MatchType>> _matchColumns(DataIterato
matches.add(null);
continue;
}

Pair<ColumnInfo,MatchType> to = null;
if (isEtl)
Pair<ColumnInfo,MatchType> to = targetMap.get(from.getName());
if (null == to && null != from.getPropertyURI())
{
// Match by name first
to = targetMap.get(from.getName());

// If name matches, check if property URI matches for higher priority match type
if (null != to && null != from.getPropertyURI())
{
// Renamed built-in columns (ex. LSID, ParticipantId) in source queries will not match propertyURI with
// target. In that case, just stick with name match. Otherwise check for propertyURI match for higher priority match
// (this is primarily for ParticipantId which can have two columns with same name in the dataiterator)
if (from.getPropertyURI().equals(to.first.getPropertyURI())) {
Pair<ColumnInfo,MatchType> toUri = targetMap.get(from.getPropertyURI());
if (null != toUri)
to = toUri;
}
}
}
else
{
if (null != from.getPropertyURI())
to = targetMap.get(from.getPropertyURI());
if (null == to)
to = targetMap.get(from.getName());
// Do we actually rely on this anywhere???
// Like maybe ETL from one study to another where subject name does not match? or assay publish?
to = targetMap.get(from.getPropertyURI());
if (null != to)
to = new Pair<>(to.first, MatchType.low);
}

if (null == to)
{
// Check to see if the column i.e. propURI has a property descriptor and vocabulary domain is present
Expand Down Expand Up @@ -302,13 +304,10 @@ public static ArrayList<ColumnInfo> matchColumns(DataIterator input, TableInfo t
}




// NOTE: first consider if using QueryUpdateService is better
// this is just a point-to-point copy _without_ triggers
public static int copy(File from, TableInfo to, Container c, User user) throws IOException, BatchValidationException
{

BatchValidationException errors = new BatchValidationException();
DataIteratorContext context = new DataIteratorContext(errors);
DataLoader loader = DataLoaderService.get().createLoader(from, null, true, c, TabLoader.TSV_FILE_TYPE);
Expand Down
Expand Up @@ -39,6 +39,7 @@ public abstract class ExistingRecordDataIterator extends WrapperDataIterator
{
public static final String EXISTING_RECORD_COLUMN_NAME = "_" + ExistingRecordDataIterator.class.getName() + "#EXISTING_RECORD_COLUMN_NAME";

final CachingDataIterator _unwrapped;
final TableInfo target;
final ArrayList<ColumnInfo> pkColumns = new ArrayList<>();
final ArrayList<Supplier<Object>> pkSuppliers = new ArrayList<>();
Expand All @@ -52,6 +53,10 @@ public abstract class ExistingRecordDataIterator extends WrapperDataIterator
ExistingRecordDataIterator(DataIterator in, TableInfo target, @Nullable Set<String> keys, boolean useMark)
{
super(in);

// NOTE it might get wrapped with a LoggingDataIterator, so remember the original DataIterator
this._unwrapped = useMark ? (CachingDataIterator)in : null;

this.target = target;
this.existingColIndex = in.getColumnCount()+1;
this.useMark = useMark;
Expand Down Expand Up @@ -130,7 +135,7 @@ public boolean next() throws BatchValidationException
{
// NOTE: we have to call mark() before we call next() if we want the 'next' row to be cached
if (useMark)
((CachingDataIterator)_delegate).mark();
_unwrapped.mark(); // unwrapped _delegate
boolean ret = super.next();
if (ret && !pkColumns.isEmpty())
prefetchExisting();
Expand Down Expand Up @@ -251,7 +256,7 @@ protected void prefetchExisting() throws BatchValidationException
existingRecords.put(r,(Map<String,Object>)map);
});
// backup to where we started so caller can iterate through them one at a time
((CachingDataIterator)_delegate).reset();
_unwrapped.reset(); // unwrapped _delegate
_delegate.next();
}
}
Expand Down
35 changes: 20 additions & 15 deletions study/src/org/labkey/study/model/DatasetDataIteratorBuilder.java
Expand Up @@ -199,6 +199,22 @@ public DataIterator getDataIterator(DataIteratorContext context)
continue;
}

if (match == subjectCol)
{
try
{
// translate the incoming participant column
// do a conversion for PTID aliasing
it.translatePtid(in, user);
continue;
}
catch (ValidationException e)
{
setupError(e.getMessage());
return it;
}
}

int out;
if (DefaultStudyDesignWriter.isColumnNumericForeignKeyToDataspaceTable(match.getFk(), true))
{
Expand Down Expand Up @@ -250,19 +266,8 @@ else if (match.getPropertyType() == PropertyType.FILE_LINK)
Integer indexContainer = outputMap.get(containerColumn);
Integer indexReplace = outputMap.get("replace");

// do a conversion for PTID aliasing
Integer translatedIndexPTID = indexPTID;
try
{
translatedIndexPTID = it.translatePtid(indexPTIDInput, user);
}
catch (ValidationException e)
{
context.getErrors().addRowError(e);
}

// For now, just specify null for sequence num index... we'll add it below
it.setSpecialOutputColumns(translatedIndexPTID, null, indexVisitDate, indexKeyProperty, indexContainer);
it.setSpecialOutputColumns(indexPTID, null, indexVisitDate, indexKeyProperty, indexContainer);
it.setTimepointType(timetype);

/* NOTE: these columns must be added in dependency order
Expand Down Expand Up @@ -412,11 +417,11 @@ else if (_datasetDefinition.getUseTimeKeyField())
{
Integer indexVisit = timetype.isVisitBased() ? it.indexSequenceNumOutput : indexVisitDate;
// no point if required columns are missing
if (null != translatedIndexPTID && null != indexVisit)
if (null != indexPTID && null != indexVisit)
{
ScrollableDataIterator scrollable = DataIteratorUtil.wrapScrollable(ret);
_datasetDefinition.checkForDuplicates(scrollable, indexLSID,
translatedIndexPTID, null == indexVisit ? -1 : indexVisit, null == indexKeyProperty ? -1 : indexKeyProperty, null == indexReplace ? -1 : indexReplace,
indexPTID, null == indexVisit ? -1 : indexVisit, null == indexKeyProperty ? -1 : indexKeyProperty, null == indexReplace ? -1 : indexReplace,
context, null,
checkDuplicates);
scrollable.beforeFirst();
Expand Down Expand Up @@ -613,7 +618,7 @@ int translateSequenceNum(Integer indexSequenceNumInput, Integer indexVisitDateIn

int translatePtid(Integer indexPtidInput, User user) throws ValidationException
{
ColumnInfo col = new BaseColumnInfo("ParticipantId", JdbcType.VARCHAR);
ColumnInfo col = new BaseColumnInfo(_datasetDefinition.getStudy().getSubjectColumnName(), JdbcType.VARCHAR);
ParticipantIdImportHelper piih = new ParticipantIdImportHelper(_datasetDefinition.getStudy(), user, _datasetDefinition);
Callable call = piih.getCallable(getInput(), indexPtidInput);
return addColumn(col, call);
Expand Down

0 comments on commit 5e21312

Please sign in to comment.