diff --git a/metastor/managedObjectLoader/pom.xml b/metastor/managedObjectLoader/pom.xml index 9a1f5620..fdfc73d5 100644 --- a/metastor/managedObjectLoader/pom.xml +++ b/metastor/managedObjectLoader/pom.xml @@ -19,7 +19,7 @@ metastore org.finra.herd-mdl.metastore - 1.2.19 + 1.2.25 4.0.0 diff --git a/metastor/managedObjectLoader/src/main/java/org/finra/herd/metastore/managed/JobDefinition.java b/metastor/managedObjectLoader/src/main/java/org/finra/herd/metastore/managed/JobDefinition.java index 4d735e48..7432021c 100644 --- a/metastor/managedObjectLoader/src/main/java/org/finra/herd/metastore/managed/JobDefinition.java +++ b/metastor/managedObjectLoader/src/main/java/org/finra/herd/metastore/managed/JobDefinition.java @@ -15,6 +15,7 @@ **/ package org.finra.herd.metastore.managed; +import com.google.common.collect.Lists; import lombok.Getter; import lombok.Setter; import lombok.ToString; @@ -28,6 +29,8 @@ import java.io.StringReader; import java.sql.ResultSet; import java.sql.SQLException; +import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.StringJoiner; @@ -44,16 +47,15 @@ public class JobDefinition { boolean subPartitionLevelProcessing = false; - String partitionValue; String correlation; String executionID; String clusterName; String actualObjectName; String tableName; String partitionKey; - String topLevelPartitionValue; - String subPartitionKey; - String subPartitionValue; + String partitionValue; + List partitionValues; + Map partitionsKeyValue; ObjectDefinition objectDefinition; @@ -97,6 +99,15 @@ public JobDefinition(long id, String nameSpace, String objectName, String usageC public JobDefinition() { } + public String getPartitionsSpecForStats(){ + StringJoiner partitionStats = new StringJoiner( "," , "(", ")"); + partitionsKeyValue.forEach( (k, v) -> { + partitionStats.add( String.format( "`%s`='%s'", k, v ) ); + } ); + + return partitionStats.toString(); + } + @Slf4j public static class ObjectDefinitionMapper implements RowMapper { @@ -111,12 +122,12 @@ public JobDefinition mapRow(ResultSet resultSet, int i) throws SQLException { jobDefinition.objectDefinition.usageCode=resultSet.getString("USAGE_CODE"); jobDefinition.objectDefinition.fileType=resultSet.getString("FILE_TYPE"); - jobDefinition.partitionValue=resultSet.getString("PARTITION_VALUES"); + jobDefinition.partitionValue=resultSet.getString("PARTITION_VALUES"); if ( jobDefinition.getPartitionValue().contains( SUB_PARTITION_VAL_SEPARATOR ) ) { - jobDefinition.topLevelPartitionValue = jobDefinition.getPartitionValue().split( SUB_PARTITION_VAL_SEPARATOR )[0]; - jobDefinition.subPartitionValue = jobDefinition.getPartitionValue().split( SUB_PARTITION_VAL_SEPARATOR )[1]; jobDefinition.subPartitionLevelProcessing = true; + jobDefinition.partitionValues = Lists.newArrayList( jobDefinition.getPartitionValue().split( SUB_PARTITION_VAL_SEPARATOR )); } + // Execution ID not being used, other than filenaming .hql files it just have to unique jobDefinition.executionID=resultSet.getString("ID"); jobDefinition.partitionKey=resultSet.getString("PARTITION_KEY"); diff --git a/metastor/managedObjectLoader/src/main/java/org/finra/herd/metastore/managed/datamgmt/DataMgmtSvc.java b/metastor/managedObjectLoader/src/main/java/org/finra/herd/metastore/managed/datamgmt/DataMgmtSvc.java index b563c6dc..8f49f60f 100644 --- a/metastor/managedObjectLoader/src/main/java/org/finra/herd/metastore/managed/datamgmt/DataMgmtSvc.java +++ b/metastor/managedObjectLoader/src/main/java/org/finra/herd/metastore/managed/datamgmt/DataMgmtSvc.java @@ -16,6 +16,7 @@ package org.finra.herd.metastore.managed.datamgmt; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import lombok.extern.slf4j.Slf4j; import org.finra.herd.metastore.managed.JobDefinition; import org.finra.herd.metastore.managed.ObjectProcessor; @@ -32,6 +33,8 @@ import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.List; +import java.util.Map; +import java.util.stream.IntStream; /** * Herd Client @@ -100,38 +103,22 @@ public BusinessObjectDataDdl getBusinessObjectDataDdl( org.finra.herd.metastore. request.setTableName( jd.getTableName() ); List partitionValueFilters = Lists.newArrayList(); - PartitionValueFilter filter = new PartitionValueFilter(); - - filter.setPartitionKey( jd.getPartitionKey() ); if ( jd.getWfType() == ObjectProcessor.WF_TYPE_SINGLETON && !jd.getPartitionKey().equalsIgnoreCase( "PARTITION" ) ) { - Calendar c = Calendar.getInstance(); - c.add( Calendar.DATE, 1 ); - String date = new SimpleDateFormat( "YYYY-MM-dd" ).format( c.getTime() ); - LatestBeforePartitionValue value = new LatestBeforePartitionValue(); - value.setPartitionValue( date ); - filter.setLatestBeforePartitionValue( value ); - - filter.setPartitionValues( null ); - filter.setLatestAfterPartitionValue( null ); - - + addPartitionedSingletonFilter( jd, partitionValueFilters ); } else { log.info( "Partitions: {}", partitions ); if ( jd.getWfType() == ObjectProcessor.WF_TYPE_SINGLETON && jd.getPartitionKey().equalsIgnoreCase( "PARTITION" ) ) { - filter.setPartitionValues( Lists.newArrayList( "none" ) ); + addPartitionFilter( jd.getPartitionKey(), Lists.newArrayList( "none" ), partitionValueFilters ); } else { if ( jd.isSubPartitionLevelProcessing() ) { - log.info( "Top Level Partition: {}, SubPartition: {}", jd.getTopLevelPartitionValue(), jd.getSubPartitionValue() ); - filter.setPartitionValues( Lists.newArrayList( jd.getTopLevelPartitionValue() ) ); addSubPartitionFilter( jd, partitionValueFilters); } else { - filter.setPartitionValues( partitions ); + addPartitionFilter( jd.getPartitionKey(), partitions, partitionValueFilters ); } } } - partitionValueFilters.add( filter ); request.setPartitionValueFilter( null ); request.setPartitionValueFilters( partitionValueFilters ); @@ -141,8 +128,30 @@ public BusinessObjectDataDdl getBusinessObjectDataDdl( org.finra.herd.metastore. return businessObjectDataApi.businessObjectDataGenerateBusinessObjectDataDdl( request ); } + private void addPartitionFilter( String partitionKey, List partitions, List partitionValueFilters ) { + PartitionValueFilter filter = new PartitionValueFilter(); + filter.setPartitionKey( partitionKey ); + filter.setPartitionValues( partitions ); + partitionValueFilters.add( filter ); + } + + private void addPartitionedSingletonFilter( JobDefinition jd, List partitionValueFilters ) { + Calendar c = Calendar.getInstance(); + c.add( Calendar.DATE, 1 ); + String date = new SimpleDateFormat( "YYYY-MM-dd" ).format( c.getTime() ); + LatestBeforePartitionValue value = new LatestBeforePartitionValue(); + value.setPartitionValue( date ); + + PartitionValueFilter filter = new PartitionValueFilter(); + filter.setPartitionKey( jd.getPartitionKey() ); + filter.setLatestBeforePartitionValue( value ); + filter.setPartitionValues( null ); + filter.setLatestAfterPartitionValue( null ); + partitionValueFilters.add( filter ); + } + /** - * To add Sub Partition value filter + * To partition filter with sub partitions * * @param jd * @param partitionValueFilters @@ -150,19 +159,20 @@ public BusinessObjectDataDdl getBusinessObjectDataDdl( org.finra.herd.metastore. */ private void addSubPartitionFilter( JobDefinition jd, List partitionValueFilters ) throws ApiException { List partitionKeys = getDMFormat( jd ).getSchema().getPartitions(); - log.debug( "Partition Keys {} for {}", partitionKeys, jd.getTableName() ); - - if ( partitionKeys.size() >= 2 ) { - PartitionValueFilter subPartitionFilter = new PartitionValueFilter(); - String subPartitionKey = partitionKeys.get( 1 ).getName(); - log.debug( "SubPartition Partition Key: {}", subPartitionKey ); - jd.setSubPartitionKey( subPartitionKey ); - subPartitionFilter.setPartitionKey( subPartitionKey ); - subPartitionFilter.setPartitionValues( Lists.newArrayList( jd.getSubPartitionValue() ) ); - partitionValueFilters.add( subPartitionFilter ); - } else { - log.warn( "Object not partitioned correctly, not enough partition keys to find sub partition" ); - } + log.info( "Partition Keys {} for {}", partitionKeys, jd.getTableName() ); + Map partitionKeyValues = Maps.newLinkedHashMap(); + + IntStream.range( 0, jd.getPartitionValues().size() ) + .forEach( i -> { + String partitionKey = partitionKeys.get( i ).getName(); + String partitionValue = jd.getPartitionValues().get( i ); + log.info( "Partition Key: {}\t Value: {}", partitionKey, partitionValue ); + partitionKeyValues.put( partitionKey, partitionValue ); + + addPartitionFilter( partitionKey, Lists.newArrayList( partitionValue ), partitionValueFilters ); + } ); + + jd.setPartitionsKeyValue( partitionKeyValues ); } public BusinessObjectFormatKeys getBOAllFormatVersions( org.finra.herd.metastore.managed.JobDefinition od, boolean latestBusinessObjectFormatVersion ) throws ApiException { diff --git a/metastor/managedObjectLoader/src/main/java/org/finra/herd/metastore/managed/jobProcessor/HiveHqlGenerator.java b/metastor/managedObjectLoader/src/main/java/org/finra/herd/metastore/managed/jobProcessor/HiveHqlGenerator.java index 4b1fd39b..21de5eb7 100644 --- a/metastor/managedObjectLoader/src/main/java/org/finra/herd/metastore/managed/jobProcessor/HiveHqlGenerator.java +++ b/metastor/managedObjectLoader/src/main/java/org/finra/herd/metastore/managed/jobProcessor/HiveHqlGenerator.java @@ -269,8 +269,7 @@ protected void addAnalyzeStats( JobDefinition jd, List partitions, List< } } else if (partitions.size() == 1) { if ( jd.isSubPartitionLevelProcessing() ) { - schemaHql.add( String.format( "analyze table %s partition(`%s`='%s', `%s`='%s') compute statistics noscan;" - , jd.getTableName(), jd.getPartitionKey(), jd.getTopLevelPartitionValue(), jd.getSubPartitionKey(), jd.getSubPartitionValue() ) ); + schemaHql.add( String.format( "analyze table %s partition %s compute statistics noscan;", jd.getTableName(), jd.getPartitionsSpecForStats() ) ); } else { schemaHql.add( String.format( "analyze table %s partition(`%s`='%s') compute statistics noscan;" , jd.getTableName(), jd.getPartitionKey(), partitions.get( 0 ) ) diff --git a/metastor/metastoreOperations/pom.xml b/metastor/metastoreOperations/pom.xml index fe37dc19..270d9109 100644 --- a/metastor/metastoreOperations/pom.xml +++ b/metastor/metastoreOperations/pom.xml @@ -20,7 +20,7 @@ metastore org.finra.herd-mdl.metastore - 1.2.19 + 1.2.25 4.0.0 diff --git a/metastor/pom.xml b/metastor/pom.xml index 93eb61f0..75927c7f 100644 --- a/metastor/pom.xml +++ b/metastor/pom.xml @@ -29,7 +29,7 @@ metastore org.finra.herd-mdl.metastore - 1.2.19 + 1.2.25 pom