Skip to content

Commit

Permalink
[opt](hive) save hive table schema in transaction (apache#37008)
Browse files Browse the repository at this point in the history
Save the table schema, reduce the number of HMS calls, and improve write
performance.
  • Loading branch information
wuwenchi committed Jul 1, 2024
1 parent 6c70782 commit c8f1b9f
Showing 1 changed file with 11 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import com.google.common.collect.Maps;
import io.airlift.concurrent.MoreFutures;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
Expand Down Expand Up @@ -88,6 +89,7 @@ public class HMSTransaction implements Transaction {
private final Map<DatabaseTableName, Action<TableAndMore>> tableActions = new HashMap<>();
private final Map<DatabaseTableName, Map<List<String>, Action<PartitionAndMore>>>
partitionActions = new HashMap<>();
private final Map<DatabaseTableName, List<FieldSchema>> tableColumns = new HashMap<>();

private final Executor fileSystemExecutor;
private HmsCommitter hmsCommitter;
Expand Down Expand Up @@ -123,7 +125,7 @@ public int hashCode() {
}
}

private Set<UncompletedMpuPendingUpload> uncompletedMpuPendingUploads = new HashSet<>();
private final Set<UncompletedMpuPendingUpload> uncompletedMpuPendingUploads = new HashSet<>();

public HMSTransaction(HiveMetadataOps hiveOps, FileSystemProvider fileSystemProvider, Executor fileSystemExecutor) {
this.hiveOps = hiveOps;
Expand Down Expand Up @@ -241,7 +243,7 @@ public void finishInsertTable(String dbName, String tbName) {
Maps.newHashMap(),
sd.getOutputFormat(),
sd.getSerdeInfo().getSerializationLib(),
hiveOps.getClient().getSchema(dbName, tbName)
getTableColumns(dbName, tbName)
);
if (updateMode == TUpdateMode.OVERWRITE) {
dropPartition(dbName, tbName, hivePartition.getPartitionValues(), true);
Expand Down Expand Up @@ -396,7 +398,7 @@ private void convertToInsertExistingPartitionAction(
partition.getParameters(),
sd.getOutputFormat(),
sd.getSerdeInfo().getSerializationLib(),
hiveOps.getClient().getSchema(dbName, tbName)
getTableColumns(dbName, tbName)
);

partitionActionsForTable.put(
Expand Down Expand Up @@ -913,6 +915,11 @@ public synchronized Table getTable(String databaseName, String tableName) {
throw new RuntimeException("Not Found table: " + databaseName + "." + tableName);
}

public synchronized List<FieldSchema> getTableColumns(String databaseName, String tableName) {
return tableColumns.computeIfAbsent(new DatabaseTableName(databaseName, tableName),
key -> hiveOps.getClient().getSchema(dbName, tbName));
}

public synchronized void finishChangingExistingTable(
ActionType actionType,
String databaseName,
Expand Down Expand Up @@ -1276,7 +1283,7 @@ public void prepareAddPartition(PartitionAndMore partitionAndMore) {
Maps.newHashMap(),
sd.getOutputFormat(),
sd.getSerdeInfo().getSerializationLib(),
hiveOps.getClient().getSchema(dbName, tbName)
getTableColumns(dbName, tbName)
);

HivePartitionWithStatistics partitionWithStats =
Expand Down

0 comments on commit c8f1b9f

Please sign in to comment.