Skip to content

Commit

Permalink
spark: use hashset in column level lineage instead of iterating throu…
Browse files Browse the repository at this point in the history
…gh linkedlist (#2584)

Signed-off-by: Maciej Obuchowski <obuchowski.maciej@gmail.com>
  • Loading branch information
mobuchowski committed Apr 5, 2024
1 parent bc31c80 commit ccf2286
Showing 1 changed file with 3 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
public class ColumnLevelLineageBuilder {

private Map<ExprId, Set<ExprId>> exprDependencies = new HashMap<>();
@Getter private Map<ExprId, List<Pair<DatasetIdentifier, String>>> inputs = new HashMap<>();
@Getter private Map<ExprId, Set<Pair<DatasetIdentifier, String>>> inputs = new HashMap<>();
private Map<OpenLineage.SchemaDatasetFacetFields, ExprId> outputs = new HashMap<>();
private Map<ColumnMeta, ExprId> externalExpressionMappings = new HashMap<>();
private final OpenLineage.SchemaDatasetFacet schema;
Expand All @@ -61,13 +61,8 @@ public ColumnLevelLineageBuilder(
* @param attributeName
*/
public void addInput(ExprId exprId, DatasetIdentifier datasetIdentifier, String attributeName) {
inputs.computeIfAbsent(exprId, k -> new LinkedList<>());

Pair<DatasetIdentifier, String> input = Pair.of(datasetIdentifier, attributeName);

if (!inputs.get(exprId).contains(input)) {
inputs.get(exprId).add(input);
}
inputs.computeIfAbsent(exprId, k -> new HashSet<>());
inputs.get(exprId).add(Pair.of(datasetIdentifier, attributeName));
}

/**
Expand Down

0 comments on commit ccf2286

Please sign in to comment.