-
Notifications
You must be signed in to change notification settings - Fork 1.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Enhancement] Materialized views support refresh granularity splits #12926
Changes from 7 commits
2fecef3
e67e19a
b8539d2
4d9ff47
8442764
7f7d0b3
26a3af4
4d337e5
78cea18
835ac32
4f6ecd0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,8 +8,11 @@ | |
import com.google.common.collect.Maps; | ||
import com.google.common.collect.Range; | ||
import com.google.common.collect.Sets; | ||
import com.starrocks.analysis.DateLiteral; | ||
import com.starrocks.analysis.Expr; | ||
import com.starrocks.analysis.FunctionCallExpr; | ||
import com.starrocks.analysis.IntLiteral; | ||
import com.starrocks.analysis.LiteralExpr; | ||
import com.starrocks.analysis.SlotRef; | ||
import com.starrocks.analysis.StringLiteral; | ||
import com.starrocks.catalog.Column; | ||
|
@@ -29,6 +32,8 @@ | |
import com.starrocks.common.Pair; | ||
import com.starrocks.common.UserException; | ||
import com.starrocks.common.io.DeepCopy; | ||
import com.starrocks.common.util.DateUtils; | ||
import com.starrocks.common.util.RangeUtils; | ||
import com.starrocks.common.util.UUIDUtil; | ||
import com.starrocks.connector.PartitionUtil; | ||
import com.starrocks.persist.ChangeMaterializedViewRefreshSchemeLog; | ||
|
@@ -64,10 +69,12 @@ | |
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.Iterator; | ||
import java.util.LinkedHashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import java.util.concurrent.atomic.AtomicInteger; | ||
import java.util.stream.Collectors; | ||
|
||
/** | ||
* Core logic of materialized view refresh task run | ||
|
@@ -87,6 +94,15 @@ public class PartitionBasedMaterializedViewRefreshProcessor extends BaseTaskRunP | |
// table id -> <base table info, snapshot table> | ||
private Map<Long, Pair<MaterializedView.BaseTableInfo, Table>> snapshotBaseTables; | ||
|
||
@VisibleForTesting | ||
public MvTaskRunContext getMvContext() { | ||
return mvContext; | ||
} | ||
@VisibleForTesting | ||
public void setMvContext(MvTaskRunContext mvContext) { | ||
this.mvContext = mvContext; | ||
} | ||
|
||
// Core logics: | ||
// 1. prepare to check some conditions | ||
// 2. sync partitions with base tables(add or drop partitions, which will be optimized by dynamic partition creation later) | ||
|
@@ -129,12 +145,14 @@ public void processTaskRun(TaskRunContext context) throws Exception { | |
// refresh external table meta cache | ||
refreshExternalTable(context); | ||
Set<String> partitionsToRefresh = getPartitionsToRefreshForMaterializedView(context.getProperties()); | ||
LOG.debug("materialized view partitions to refresh:{}", partitionsToRefresh); | ||
if (partitionsToRefresh.isEmpty()) { | ||
LOG.info("no partitions to refresh for materialized view {}", materializedView.getName()); | ||
return; | ||
} | ||
// Only refresh the first partition refresh number partitions, other partitions will generate new tasks | ||
filterPartitionByRefreshNumber(partitionsToRefresh, materializedView); | ||
|
||
LOG.debug("materialized view partitions to refresh:{}", partitionsToRefresh); | ||
Map<String, Set<String>> sourceTablePartitions = getSourceTablePartitions(partitionsToRefresh); | ||
LOG.debug("materialized view:{} source partitions :{}", | ||
materializedView.getName(), sourceTablePartitions); | ||
|
@@ -152,6 +170,86 @@ public void processTaskRun(TaskRunContext context) throws Exception { | |
|
||
// insert execute successfully, update the meta of materialized view according to ExecPlan | ||
updateMeta(execPlan); | ||
|
||
if (mvContext.hasNextBatchPartition()) { | ||
generateNextTaskRun(); | ||
} | ||
} | ||
|
||
@VisibleForTesting | ||
public void filterPartitionByRefreshNumber(Set<String> partitionsToRefresh, MaterializedView materializedView) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. have you test if one source base table partition has two target mv partitions can run successfully? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In fact, this configuration has nothing to do with the source table, only the number of partitions of the materialized view table. |
||
int partitionRefreshNumber = materializedView.getTableProperty().getPartitionRefreshNumber(); | ||
if (partitionRefreshNumber <= 0) { | ||
return; | ||
} | ||
Map<String, Range<PartitionKey>> rangePartitionMap = materializedView.getRangePartitionMap(); | ||
if (partitionRefreshNumber >= rangePartitionMap.size()) { | ||
return; | ||
} | ||
Map<String, Range<PartitionKey>> mappedPartitionsToRefresh = Maps.newHashMap(); | ||
for (String partitionName : partitionsToRefresh) { | ||
mappedPartitionsToRefresh.put(partitionName, rangePartitionMap.get(partitionName)); | ||
} | ||
LinkedHashMap<String, Range<PartitionKey>> sortedPartition = mappedPartitionsToRefresh.entrySet().stream() | ||
.sorted(Map.Entry.comparingByValue(RangeUtils.RANGE_COMPARATOR)) | ||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new)); | ||
|
||
Iterator<String> partitionNameIter = sortedPartition.keySet().iterator(); | ||
for (int i = 0; i < partitionRefreshNumber; i++) { | ||
if (partitionNameIter.hasNext()) { | ||
partitionNameIter.next(); | ||
} | ||
} | ||
String nextPartitionStart = null; | ||
String endPartitionName = null; | ||
if (partitionNameIter.hasNext()) { | ||
String startPartitionName = partitionNameIter.next(); | ||
Range<PartitionKey> partitionKeyRange = mappedPartitionsToRefresh.get(startPartitionName); | ||
LiteralExpr lowerExpr = partitionKeyRange.lowerEndpoint().getKeys().get(0); | ||
nextPartitionStart = parseLiteralExprToDateString(lowerExpr, 0); | ||
endPartitionName = startPartitionName; | ||
partitionsToRefresh.remove(endPartitionName); | ||
} | ||
while (partitionNameIter.hasNext()) { | ||
endPartitionName = partitionNameIter.next(); | ||
partitionsToRefresh.remove(endPartitionName); | ||
} | ||
|
||
mvContext.setNextPartitionStart(nextPartitionStart); | ||
LiteralExpr upperExpr = mappedPartitionsToRefresh.get(endPartitionName).upperEndpoint().getKeys().get(0); | ||
mvContext.setNextPartitionEnd(parseLiteralExprToDateString(upperExpr, 1)); | ||
} | ||
|
||
private String parseLiteralExprToDateString(LiteralExpr expr, int offset) { | ||
if (expr instanceof DateLiteral) { | ||
DateLiteral lowerDate = (DateLiteral) expr; | ||
return DateUtils.DATE_FORMATTER.format(lowerDate.toLocalDateTime().plusDays(offset)); | ||
} else if (expr instanceof IntLiteral) { | ||
IntLiteral intLiteral = (IntLiteral) expr; | ||
return String.valueOf(intLiteral.getLongValue() + offset); | ||
} else { | ||
return null; | ||
} | ||
} | ||
|
||
private void generateNextTaskRun() { | ||
TaskManager taskManager = GlobalStateMgr.getCurrentState().getTaskManager(); | ||
Map<String, String> properties = mvContext.getProperties(); | ||
long mvId = Long.parseLong(properties.get(MV_ID)); | ||
String taskName = TaskBuilder.getMvTaskName(mvId); | ||
Map<String, String> newProperties = Maps.newHashMap(); | ||
for (Map.Entry<String, String> proEntry : properties.entrySet()) { | ||
if (proEntry.getValue() != null) { | ||
newProperties.put(proEntry.getKey(), proEntry.getValue()); | ||
} | ||
} | ||
newProperties.put(TaskRun.PARTITION_START, mvContext.getNextPartitionStart()); | ||
newProperties.put(TaskRun.PARTITION_END, mvContext.getNextPartitionEnd()); | ||
ExecuteOption option = new ExecuteOption(Constants.TaskRunPriority.LOWEST.value(), | ||
false, newProperties); | ||
taskManager.executeTask(taskName, option); | ||
LOG.info("Submit a generate taskRun for task:{}, partitionStart:{}, partitionEnd:{}", mvId, | ||
mvContext.getNextPartitionStart(), mvContext.getNextPartitionEnd()); | ||
} | ||
|
||
private void refreshExternalTable(TaskRunContext context) { | ||
|
@@ -588,7 +686,7 @@ private boolean checkBaseTablePartitionChange() { | |
} | ||
} | ||
} catch (UserException e) { | ||
LOG.warn("Materialized view compute partition change failed : {}", e); | ||
LOG.warn("Materialized view compute partition change failed", e); | ||
return true; | ||
} finally { | ||
db.readUnlock(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how to unset refresh number?
what if the user do not specify the refresh number, what is the default logic?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This feature will do in Support alter materialized view properties or later PR