Backport 2263 and 2268

- #2263 - #2268
LabKey · Jun 15, 2021 · 5e21312 · 5e21312
1 parent 8101d9f
commit 5e21312
Show file tree

Hide file tree

Showing 3 changed files with 94 additions and 85 deletions.
diff --git a/api/src/org/labkey/api/dataiterator/DataIteratorUtil.java b/api/src/org/labkey/api/dataiterator/DataIteratorUtil.java
@@ -53,6 +53,7 @@
 import java.util.Map;
 import java.util.Spliterator;
 import java.util.function.Consumer;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 
@@ -129,58 +130,79 @@ public static Map<String,ColumnInfo> createTableMap(TableInfo target, boolean us
         return targetAliasesMap;
     }
 
-    enum MatchType {propertyuri, name, alias, jdbcname, tsvColumn}
 
+    // rank of a match of import column NAME matching various properties of target column
+    // MatchType.low is used for matches based on something other than name
+    enum MatchType {propertyuri, name, alias, jdbcname, tsvColumn, low}
+
+
+    /**
+     * NOTE: matchColumns() handles multiple source columns matching the same target column (usually a data file problem
+     * for the user to fix), we don't handle one source column matching multiple target columns (more of an admin design problem).
+     * One idea would be to return MultiValuedMap<String,Pair<>>, or check for duplicates entries of the same MatchType.
+     */
     protected static Map<String,Pair<ColumnInfo,MatchType>> _createTableMap(TableInfo target, boolean useImportAliases)
     {
-        List<ColumnInfo> cols = target.getColumns();
+        /* CONSIDER: move this functionality into a TableInfo method so this map (or maps) can be cached */
+        List<ColumnInfo> cols = target.getColumns().stream()
+                .filter(col -> !col.isMvIndicatorColumn() && !col.isRawValueColumn())
+                .collect(Collectors.toList());
+
         Map<String, Pair<ColumnInfo,MatchType>> targetAliasesMap = new CaseInsensitiveHashMap<>(cols.size()*4);
+
+        // should this be under the useImportAliases flag???
         for (ColumnInfo col : cols)
         {
-            if (col.isMvIndicatorColumn() || col.isRawValueColumn())
-                continue;
-            final String name = col.getName();
-            targetAliasesMap.put(name, new Pair<>(col,MatchType.name));
-            String uri = col.getPropertyURI();
-            if (null != uri)
-            {
-                if (!targetAliasesMap.containsKey(uri))
-                    targetAliasesMap.put(uri, new Pair<>(col, MatchType.propertyuri));
-                String propName = uri.substring(uri.lastIndexOf('#')+1);
-                if (!targetAliasesMap.containsKey(propName))
-                    targetAliasesMap.put(propName, new Pair<>(col, MatchType.alias));
-            }
-            String label = col.getLabel();
-            if (null != label && !targetAliasesMap.containsKey(label))
-                targetAliasesMap.put(label, new Pair<>(col, MatchType.alias));
-            String translatedFieldKey;
-            if (useImportAliases || "folder".equalsIgnoreCase(name))
+            // Issue 21015: Dataset snapshot over flow assay dataset doesn't pick up stat column values
+            // TSVColumnWriter.ColumnHeaderType.queryColumnName format is a FieldKey display value from the column name. Blech.
+            String tsvQueryColumnName = FieldKey.fromString(col.getName()).toDisplayString();
+            targetAliasesMap.put(tsvQueryColumnName, new Pair<>(col, MatchType.tsvColumn));
+        }
+
+        // should this be under the useImportAliases flag???
+        for (ColumnInfo col : cols)
+        {
+            // Jdbc resultset names have substitutions for special characters. If this is such a column, need the substituted name to match on
+            targetAliasesMap.put(col.getJdbcRsName(), new Pair<>(col, MatchType.jdbcname));
+        }
+
+        for (ColumnInfo col : cols)
+        {
+            if (useImportAliases || "folder".equalsIgnoreCase(col.getName()))
             {
                 for (String alias : col.getImportAliasSet())
-                {
-                    if (!targetAliasesMap.containsKey(alias))
-                        targetAliasesMap.put(alias, new Pair<>(col, MatchType.alias));
-                }
+                    targetAliasesMap.put(alias, new Pair<>(col, MatchType.alias));
+
                 // Be sure we have an alias the column name we generate for TSV exports. See issue 21774
-                translatedFieldKey = FieldKey.fromString(name).toDisplayString();
-                if (!targetAliasesMap.containsKey(translatedFieldKey))
-                {
-                    targetAliasesMap.put(translatedFieldKey, new Pair<>(col, MatchType.alias));
-                }
+                String translatedFieldKey = FieldKey.fromString(col.getName()).toDisplayString();
+                targetAliasesMap.put(translatedFieldKey, new Pair<>(col, MatchType.alias));
             }
-            // Jdbc resultset names have substitutions for special characters. If this is such a column, need the substituted name to match on
-            translatedFieldKey = col.getJdbcRsName();
-            if (!name.equals(translatedFieldKey))
+        }
+
+        for (ColumnInfo col : cols)
+        {
+            String label = col.getLabel();
+            if (null != label)
+                targetAliasesMap.put(label, new Pair<>(col, MatchType.alias));
+        }
+
+        for (ColumnInfo col : cols)
+        {
+            String uri = col.getPropertyURI();
+            if (null != uri)
             {
-                targetAliasesMap.put(translatedFieldKey, new Pair<>(col, MatchType.jdbcname));
+                targetAliasesMap.put(uri, new Pair<>(col, MatchType.propertyuri));
+                String propName = uri.substring(uri.lastIndexOf('#')+1);
+                targetAliasesMap.put(propName, new Pair<>(col, MatchType.alias));
             }
+        }
 
-            // Issue 21015: Dataset snapshot over flow assay dataset doesn't pick up stat column values
-            // TSVColumnWriter.ColumnHeaderType.queryColumnName format is a FieldKey display value from the column name. Blech.
-            String tsvQueryColumnName = FieldKey.fromString(name).toDisplayString();
-            if (!targetAliasesMap.containsKey(tsvQueryColumnName))
-                targetAliasesMap.put(tsvQueryColumnName, new Pair<>(col, MatchType.tsvColumn));
+        for (ColumnInfo col : cols)
+        {
+            String name = col.getName();
+            targetAliasesMap.put(name, new Pair<>(col,MatchType.name));
         }
+
         return targetAliasesMap;
     }
 
@@ -196,7 +218,6 @@ protected static ArrayList<Pair<ColumnInfo,MatchType>> _matchColumns(DataIterato
         ArrayList<Pair<ColumnInfo,MatchType>> matches = new ArrayList<>(input.getColumnCount()+1);
         matches.add(null);
 
-        // match columns to target columninfos (duplicates StandardDataIteratorBuilder, extract shared method?)
         for (int i=1 ; i<=input.getColumnCount() ; i++)
         {
             ColumnInfo from = input.getColumnInfo(i);
@@ -205,34 +226,15 @@ protected static ArrayList<Pair<ColumnInfo,MatchType>> _matchColumns(DataIterato
                 matches.add(null);
                 continue;
             }
-
-            Pair<ColumnInfo,MatchType> to = null;
-            if (isEtl)
+            Pair<ColumnInfo,MatchType> to = targetMap.get(from.getName());
+            if (null == to && null != from.getPropertyURI())
             {
-                // Match by name first
-                to = targetMap.get(from.getName());
-
-                // If name matches, check if property URI matches for higher priority match type
-                if (null != to && null != from.getPropertyURI())
-                {
-                    // Renamed built-in columns (ex. LSID, ParticipantId) in source queries will not match propertyURI with
-                    // target. In that case, just stick with name match. Otherwise check for propertyURI match for higher priority match
-                    // (this is primarily for ParticipantId which can have two columns with same name in the dataiterator)
-                    if (from.getPropertyURI().equals(to.first.getPropertyURI())) {
-                        Pair<ColumnInfo,MatchType> toUri = targetMap.get(from.getPropertyURI());
-                        if (null != toUri)
-                            to = toUri;
-                    }
-                }
-            }
-            else
-            {
-                if (null != from.getPropertyURI())
-                    to = targetMap.get(from.getPropertyURI());
-                if (null == to)
-                    to = targetMap.get(from.getName());
+                // Do we actually rely on this anywhere???
+                // Like maybe ETL from one study to another where subject name does not match? or assay publish?
+                to = targetMap.get(from.getPropertyURI());
+                if (null != to)
+                    to = new Pair<>(to.first, MatchType.low);
             }
-
             if (null == to)
             {
                 // Check to see if the column i.e. propURI has a property descriptor and vocabulary domain is present
@@ -302,13 +304,10 @@ public static ArrayList<ColumnInfo> matchColumns(DataIterator input, TableInfo t
     }
 
 
-
-
     // NOTE: first consider if using QueryUpdateService is better
     // this is just a point-to-point copy _without_ triggers
     public static int copy(File from, TableInfo to, Container c, User user) throws IOException, BatchValidationException
     {
-
         BatchValidationException errors = new BatchValidationException();
         DataIteratorContext context = new DataIteratorContext(errors);
         DataLoader loader = DataLoaderService.get().createLoader(from, null, true, c, TabLoader.TSV_FILE_TYPE);

diff --git a/api/src/org/labkey/api/dataiterator/ExistingRecordDataIterator.java b/api/src/org/labkey/api/dataiterator/ExistingRecordDataIterator.java
@@ -39,6 +39,7 @@ public abstract class ExistingRecordDataIterator extends WrapperDataIterator
 {
     public static final String EXISTING_RECORD_COLUMN_NAME = "_" + ExistingRecordDataIterator.class.getName() + "#EXISTING_RECORD_COLUMN_NAME";
 
+    final CachingDataIterator _unwrapped;
     final TableInfo target;
     final ArrayList<ColumnInfo> pkColumns = new ArrayList<>();
     final ArrayList<Supplier<Object>> pkSuppliers = new ArrayList<>();
@@ -52,6 +53,10 @@ public abstract class ExistingRecordDataIterator extends WrapperDataIterator
     ExistingRecordDataIterator(DataIterator in, TableInfo target, @Nullable Set<String> keys, boolean useMark)
     {
         super(in);
+
+        // NOTE it might get wrapped with a LoggingDataIterator, so remember the original DataIterator
+        this._unwrapped = useMark ? (CachingDataIterator)in : null;
+
         this.target = target;
         this.existingColIndex = in.getColumnCount()+1;
         this.useMark = useMark;
@@ -130,7 +135,7 @@ public boolean next() throws BatchValidationException
     {
         // NOTE: we have to call mark() before we call next() if we want the 'next' row to be cached
         if (useMark)
-            ((CachingDataIterator)_delegate).mark();
+            _unwrapped.mark();  // unwrapped _delegate
         boolean ret = super.next();
         if (ret && !pkColumns.isEmpty())
             prefetchExisting();
@@ -251,7 +256,7 @@ protected void prefetchExisting() throws BatchValidationException
                 existingRecords.put(r,(Map<String,Object>)map);
             });
             // backup to where we started so caller can iterate through them one at a time
-            ((CachingDataIterator)_delegate).reset();
+            _unwrapped.reset(); // unwrapped _delegate
             _delegate.next();
         }
     }

diff --git a/study/src/org/labkey/study/model/DatasetDataIteratorBuilder.java b/study/src/org/labkey/study/model/DatasetDataIteratorBuilder.java
@@ -199,6 +199,22 @@ public DataIterator getDataIterator(DataIteratorContext context)
                     continue;
                 }
 
+                if (match == subjectCol)
+                {
+                    try
+                    {
+                        // translate the incoming participant column
+                        // do a conversion for PTID aliasing
+                        it.translatePtid(in, user);
+                        continue;
+                    }
+                    catch (ValidationException e)
+                    {
+                        setupError(e.getMessage());
+                        return it;
+                    }
+                }
+
                 int out;
                 if (DefaultStudyDesignWriter.isColumnNumericForeignKeyToDataspaceTable(match.getFk(), true))
                 {
@@ -250,19 +266,8 @@ else if (match.getPropertyType() == PropertyType.FILE_LINK)
         Integer indexContainer = outputMap.get(containerColumn);
         Integer indexReplace = outputMap.get("replace");
 
-        // do a conversion for PTID aliasing
-        Integer translatedIndexPTID = indexPTID;
-        try
-        {
-            translatedIndexPTID = it.translatePtid(indexPTIDInput, user);
-        }
-        catch (ValidationException e)
-        {
-            context.getErrors().addRowError(e);
-        }
-
         // For now, just specify null for sequence num index... we'll add it below
-        it.setSpecialOutputColumns(translatedIndexPTID, null, indexVisitDate, indexKeyProperty, indexContainer);
+        it.setSpecialOutputColumns(indexPTID, null, indexVisitDate, indexKeyProperty, indexContainer);
         it.setTimepointType(timetype);
 
         /* NOTE: these columns must be added in dependency order
@@ -412,11 +417,11 @@ else if (_datasetDefinition.getUseTimeKeyField())
         {
             Integer indexVisit = timetype.isVisitBased() ? it.indexSequenceNumOutput : indexVisitDate;
             // no point if required columns are missing
-            if (null != translatedIndexPTID && null != indexVisit)
+            if (null != indexPTID && null != indexVisit)
             {
                 ScrollableDataIterator scrollable = DataIteratorUtil.wrapScrollable(ret);
                 _datasetDefinition.checkForDuplicates(scrollable, indexLSID,
-                        translatedIndexPTID, null == indexVisit ? -1 : indexVisit, null == indexKeyProperty ? -1 : indexKeyProperty, null == indexReplace ? -1 : indexReplace,
+                        indexPTID, null == indexVisit ? -1 : indexVisit, null == indexKeyProperty ? -1 : indexKeyProperty, null == indexReplace ? -1 : indexReplace,
                         context, null,
                         checkDuplicates);
                 scrollable.beforeFirst();
@@ -613,7 +618,7 @@ int translateSequenceNum(Integer indexSequenceNumInput, Integer indexVisitDateIn
 
         int translatePtid(Integer indexPtidInput, User user) throws ValidationException
         {
-            ColumnInfo col = new BaseColumnInfo("ParticipantId", JdbcType.VARCHAR);
+            ColumnInfo col = new BaseColumnInfo(_datasetDefinition.getStudy().getSubjectColumnName(), JdbcType.VARCHAR);
             ParticipantIdImportHelper piih = new ParticipantIdImportHelper(_datasetDefinition.getStudy(), user, _datasetDefinition);
             Callable call = piih.getCallable(getInput(), indexPtidInput);
             return addColumn(col, call);