Skip to content

Commit

Permalink
Merge branch 'GoogleCloudPlatform:main' into sharantej-dataflowTemplates
Browse files Browse the repository at this point in the history
  • Loading branch information
sharan-malyala committed May 8, 2024
2 parents d9916c4 + b910d17 commit 623e467
Show file tree
Hide file tree
Showing 21 changed files with 617 additions and 139 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/java-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ on:
- cron: "0 */12 * * *"
workflow_dispatch:

concurrency:
group: java-pr-${{ github.event.issue.number || github.run_id }}
cancel-in-progress: true

env:
MAVEN_OPTS: -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=error

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,18 @@
import com.google.cloud.bigquery.TableId;
import java.time.format.DateTimeFormatter;
import java.util.regex.Pattern;
import org.apache.commons.lang3.RandomStringUtils;

/** Utilities for {@link BigQueryResourceManager} implementations. */
public final class BigQueryResourceManagerUtils {

private static final int MAX_DATASET_ID_LENGTH = 1024;
private static final Pattern ILLEGAL_DATASET_ID_CHARS = Pattern.compile("[^a-zA-Z0-9_]");
private static final String REPLACE_CHAR = "_";
private static final int MIN_TABLE_ID_LENGTH = 1;
private static final int MAX_TABLE_ID_LENGTH = 1024;
private static final Pattern ILLEGAL_TABLE_CHARS = Pattern.compile("[^a-zA-Z0-9-_]");
private static final DateTimeFormatter TIME_FORMAT =
DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss_SSSSSS");
private static final String TIME_FORMAT = "yyyyMMdd_HHmmss";

private BigQueryResourceManagerUtils() {}

Expand All @@ -46,8 +47,31 @@ private BigQueryResourceManagerUtils() {}
* @return a BigQuery compatible dataset name.
*/
static String generateDatasetId(String datasetName) {

// Take substring of datasetName to account for random suffix
// TODO(polber) - remove with Beam 2.57.0
int randomSuffixLength = 6;
datasetName =
datasetName.substring(
0,
Math.min(
datasetName.length(),
MAX_DATASET_ID_LENGTH
- REPLACE_CHAR.length()
- TIME_FORMAT.length()
- REPLACE_CHAR.length()
- randomSuffixLength));

// Add random suffix to avoid collision
// TODO(polber) - remove with Beam 2.57.0
return generateResourceId(
datasetName, ILLEGAL_DATASET_ID_CHARS, "_", MAX_DATASET_ID_LENGTH, TIME_FORMAT);
datasetName,
ILLEGAL_DATASET_ID_CHARS,
REPLACE_CHAR,
MAX_DATASET_ID_LENGTH,
DateTimeFormatter.ofPattern(TIME_FORMAT))
+ REPLACE_CHAR
+ RandomStringUtils.randomAlphanumeric(6).toLowerCase();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.List;
import java.util.regex.Pattern;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
import org.apache.commons.lang3.RandomStringUtils;

/** Utilities for {@link BigtableResourceManager} implementations. */
public final class BigtableResourceManagerUtils {
Expand All @@ -39,8 +40,7 @@ public final class BigtableResourceManagerUtils {
private static final Pattern ILLEGAL_TABLE_CHARS = Pattern.compile("[^a-zA-Z0-9-_.]");
private static final String REPLACE_TABLE_ID_CHAR = "-";

private static final DateTimeFormatter TIME_FORMAT =
DateTimeFormatter.ofPattern("yyyyMMdd-HHmmss-SSSSSS");
private static final String TIME_FORMAT = "yyyyMMdd-HHmmss";

private BigtableResourceManagerUtils() {}

Expand All @@ -57,13 +57,35 @@ private BigtableResourceManagerUtils() {}
static List<BigtableResourceManagerCluster> generateDefaultClusters(
String baseString, String zone, int numNodes, StorageType storageType) {

// Take substring of baseString to account for random suffix
// TODO(polber) - remove with Beam 2.57.0
int randomSuffixLength = 6;
baseString =
baseString
.toLowerCase()
.substring(
0,
Math.min(
baseString.length(),
MAX_CLUSTER_ID_LENGTH
- REPLACE_CLUSTER_CHAR.length()
- TIME_FORMAT.length()
- REPLACE_CLUSTER_CHAR.length()
- randomSuffixLength));

String clusterId =
generateResourceId(
baseString.toLowerCase(),
ILLEGAL_CLUSTER_CHARS,
REPLACE_CLUSTER_CHAR,
MAX_CLUSTER_ID_LENGTH,
TIME_FORMAT);
DateTimeFormatter.ofPattern(TIME_FORMAT));

// Add random suffix to avoid collision
// TODO(polber) - remove with Beam 2.57.0
clusterId =
clusterId + REPLACE_CLUSTER_CHAR + RandomStringUtils.randomAlphanumeric(6).toLowerCase();

BigtableResourceManagerCluster cluster =
BigtableResourceManagerCluster.create(clusterId, zone, numNodes, storageType);

Expand All @@ -77,12 +99,31 @@ static List<BigtableResourceManagerCluster> generateDefaultClusters(
* @return The instance id string.
*/
static String generateInstanceId(String baseString) {

// Take substring of baseString to account for random suffix
// TODO(polber) - remove with Beam 2.57.0
int randomSuffixLength = 6;
baseString =
baseString.substring(
0,
Math.min(
baseString.length(),
MAX_INSTANCE_ID_LENGTH
- REPLACE_INSTANCE_ID_CHAR.length()
- TIME_FORMAT.length()
- REPLACE_INSTANCE_ID_CHAR.length()
- randomSuffixLength));

// Add random suffix to avoid collision
// TODO(polber) - remove with Beam 2.57.0
return generateResourceId(
baseString.toLowerCase(),
ILLEGAL_INSTANCE_ID_CHARS,
REPLACE_INSTANCE_ID_CHAR,
MAX_INSTANCE_ID_LENGTH,
TIME_FORMAT);
baseString.toLowerCase(),
ILLEGAL_INSTANCE_ID_CHARS,
REPLACE_INSTANCE_ID_CHAR,
MAX_INSTANCE_ID_LENGTH,
DateTimeFormatter.ofPattern(TIME_FORMAT))
+ REPLACE_INSTANCE_ID_CHAR
+ RandomStringUtils.randomAlphanumeric(6).toLowerCase();
}

/**
Expand All @@ -92,12 +133,31 @@ static String generateInstanceId(String baseString) {
* @return The instance id string.
*/
public static String generateTableId(String baseString) {

// Take substring of baseString to account for random suffix
// TODO(polber) - remove with Beam 2.57.0
int randomSuffixLength = 6;
baseString =
baseString.substring(
0,
Math.min(
baseString.length(),
MAX_TABLE_ID_LENGTH
- REPLACE_TABLE_ID_CHAR.length()
- TIME_FORMAT.length()
- REPLACE_TABLE_ID_CHAR.length()
- randomSuffixLength));

// Add random suffix to avoid collision
// TODO(polber) - remove with Beam 2.57.0
return generateResourceId(
baseString.toLowerCase(),
ILLEGAL_TABLE_CHARS,
REPLACE_TABLE_ID_CHAR,
MAX_TABLE_ID_LENGTH,
TIME_FORMAT);
baseString.toLowerCase(),
ILLEGAL_TABLE_CHARS,
REPLACE_TABLE_ID_CHAR,
MAX_TABLE_ID_LENGTH,
DateTimeFormatter.ofPattern(TIME_FORMAT))
+ REPLACE_TABLE_ID_CHAR
+ RandomStringUtils.randomAlphanumeric(6).toLowerCase();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@

import java.time.format.DateTimeFormatter;
import java.util.regex.Pattern;
import org.apache.commons.lang3.RandomStringUtils;

public class DatastreamResourceManagerUtils {

private static final int MAX_RESOURCE_ID_LENGTH = 60;
private static final Pattern ILLEGAL_RESOURCE_ID_CHARS = Pattern.compile("[^a-zA-Z0-9- ]");
private static final DateTimeFormatter TIME_FORMAT =
DateTimeFormatter.ofPattern("yyyyMMdd-HHmmss-SSSSSS");
private static final String REPLACE_CHAR = "_";
private static final String TIME_FORMAT = "yyyyMMdd-HHmmss";

private DatastreamResourceManagerUtils() {}

Expand All @@ -41,7 +42,30 @@ private DatastreamResourceManagerUtils() {}
* @return a Datastream compatible resource ID.
*/
static String generateDatastreamId(String resourceId) {

// Take substring of baseString to account for random suffix
// TODO(polber) - remove with Beam 2.57.0
int randomSuffixLength = 6;
resourceId =
resourceId.substring(
0,
Math.min(
resourceId.length(),
MAX_RESOURCE_ID_LENGTH
- REPLACE_CHAR.length()
- TIME_FORMAT.length()
- REPLACE_CHAR.length()
- randomSuffixLength));

// Add random suffix to avoid collision
// TODO(polber) - remove with Beam 2.57.0
return generateResourceId(
resourceId, ILLEGAL_RESOURCE_ID_CHARS, "-", MAX_RESOURCE_ID_LENGTH, TIME_FORMAT);
resourceId,
ILLEGAL_RESOURCE_ID_CHARS,
REPLACE_CHAR,
MAX_RESOURCE_ID_LENGTH,
DateTimeFormatter.ofPattern(TIME_FORMAT))
+ REPLACE_CHAR
+ RandomStringUtils.randomAlphanumeric(6).toLowerCase();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.regex.Pattern;
import org.apache.beam.it.gcp.spanner.SpannerResourceManager;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.CharMatcher;
import org.apache.commons.lang3.RandomStringUtils;

/** Utilities for {@link SpannerResourceManager} implementations. */
public final class SpannerResourceManagerUtils {
Expand All @@ -34,10 +35,8 @@ public final class SpannerResourceManagerUtils {
private static final String REPLACE_DATABASE_CHAR = "_";
public static final int MAX_INSTANCE_ID_LENGTH = 30;
public static final int MAX_DATABASE_ID_LENGTH = 30;
private static final DateTimeFormatter INSTANCE_TIME_FORMAT =
DateTimeFormatter.ofPattern("yyyyMMdd-HHmmss-SSSSSS");
private static final DateTimeFormatter DATABASE_TIME_FORMAT =
DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss_SSSSSS");
private static final String INSTANCE_TIME_FORMAT = "yyyyMMdd-HHmmss";
private static final String DATABASE_TIME_FORMAT = "yyyyMMdd_HHmmss";

private SpannerResourceManagerUtils() {}

Expand All @@ -50,13 +49,27 @@ private SpannerResourceManagerUtils() {}
public static String generateDatabaseId(String baseString) {
checkArgument(baseString.length() != 0, "baseString cannot be empty!");

// Take substring of baseString to account for random suffix
// TODO(polber) - remove with Beam 2.57.0
int randomSuffixLength = 6;
baseString =
baseString.substring(
0,
Math.min(
baseString.length(),
MAX_DATABASE_ID_LENGTH
- REPLACE_DATABASE_CHAR.length()
- DATABASE_TIME_FORMAT.length()
- REPLACE_DATABASE_CHAR.length()
- randomSuffixLength));

String databaseId =
generateResourceId(
baseString,
ILLEGAL_DATABASE_CHARS,
REPLACE_DATABASE_CHAR,
MAX_DATABASE_ID_LENGTH,
DATABASE_TIME_FORMAT);
DateTimeFormatter.ofPattern(DATABASE_TIME_FORMAT));

// replace hyphen with underscore, so there's no need for backticks
String trimmed = CharMatcher.is('_').trimTrailingFrom(databaseId);
Expand All @@ -71,6 +84,14 @@ public static String generateDatabaseId(String baseString) {
if (!Character.isLetter(trimmed.charAt(0))) {
trimmed = padding + trimmed.substring(1);
}

// Add random suffix to avoid collision
// TODO(polber) - remove with Beam 2.57.0
trimmed =
trimmed
+ REPLACE_DATABASE_CHAR
+ RandomStringUtils.randomAlphanumeric(randomSuffixLength).toLowerCase();

return trimmed;
}

Expand All @@ -81,13 +102,28 @@ public static String generateDatabaseId(String baseString) {
* @return The instance id string.
*/
public static String generateInstanceId(String baseString) {

// Take substring of baseString to account for random suffix
// TODO(polber) - remove with Beam 2.57.0
int randomSuffixLength = 6;
baseString =
baseString.substring(
0,
Math.min(
baseString.length(),
MAX_INSTANCE_ID_LENGTH
- REPLACE_INSTANCE_CHAR.length()
- INSTANCE_TIME_FORMAT.length()
- REPLACE_INSTANCE_CHAR.length()
- randomSuffixLength));

String instanceId =
generateResourceId(
baseString,
ILLEGAL_INSTANCE_CHARS,
REPLACE_INSTANCE_CHAR,
MAX_INSTANCE_ID_LENGTH,
INSTANCE_TIME_FORMAT);
DateTimeFormatter.ofPattern(INSTANCE_TIME_FORMAT));

// if first char is not a letter, replace with letter, so it doesn't
// violate spanner's instance naming rules
Expand All @@ -96,6 +132,11 @@ public static String generateInstanceId(String baseString) {
instanceId = padding + instanceId.substring(1);
}

// Add random suffix to avoid collision
// TODO(polber) - remove with Beam 2.57.0
instanceId =
instanceId + REPLACE_INSTANCE_CHAR + RandomStringUtils.randomAlphanumeric(6).toLowerCase();

return instanceId;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ public void testGetProjectIdReturnsCorrectValue() {
public void testGetDatasetIdReturnsCorrectValue() {
BigQueryResourceManager tm = BigQueryResourceManager.builder(TEST_ID, PROJECT_ID, null).build();

assertThat(tm.getDatasetId()).matches(TEST_ID.replace('-', '_') + "_\\d{8}_\\d{6}_\\d{6}");
assertThat(tm.getDatasetId())
.matches(TEST_ID.replace('-', '_') + "_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}");
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public void testCreateResourceManagerCreatesCorrectIdValues() throws IOException
BigtableResourceManager.builder(TEST_ID, PROJECT_ID, null),
bigtableResourceManagerClientFactory);

assertThat(rm.getInstanceId()).matches(TEST_ID + "-\\d{8}-\\d{6}-\\d{6}");
assertThat(rm.getInstanceId()).matches(TEST_ID + "-\\d{8}-\\d{6}-[a-zA-Z0-9]{6}");
assertThat(rm.getProjectId()).matches(PROJECT_ID);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public void testGenerateDefaultClustersShouldWorkWhenAllParametersValid() {
generateDefaultClusters(TEST_ID, ZONE, NUM_NODES, STORAGE_TYPE);
BigtableResourceManagerCluster thisCluster = cluster.iterator().next();

assertThat(thisCluster.clusterId()).matches(TEST_ID + "-\\d{8}-\\d{6}-\\d{6}");
assertThat(thisCluster.clusterId()).matches(TEST_ID + "-\\d{8}-\\d{6}-[a-zA-Z0-9]{6}");
assertThat(thisCluster.zone()).isEqualTo(ZONE);
assertThat(thisCluster.numNodes()).isEqualTo(NUM_NODES);
assertThat(thisCluster.storageType()).isEqualTo(STORAGE_TYPE);
Expand All @@ -58,7 +58,8 @@ public void testGenerateDefaultClustersShouldThrowErrorWhenTestIdIsEmpty() {
public void testGenerateDefaultClustersShouldShortenTestIdWhenTooLong() {
Iterable<BigtableResourceManagerCluster> cluster =
generateDefaultClusters("longer-id", ZONE, NUM_NODES, STORAGE_TYPE);
assertThat(cluster.iterator().next().clusterId()).matches("longer--\\d{8}-\\d{6}-\\d{6}");
assertThat(cluster.iterator().next().clusterId())
.matches("longer--\\d{8}-\\d{6}-[a-zA-Z0-9]{6}");
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,9 @@ public void testExecuteDdlStatementShouldWorkWhenSpannerDoesntThrowAnyError()
String actualDatabaseId = databaseIdCaptor.getValue();
Iterable<String> actualStatement = statementCaptor.getValue();

assertThat(actualInstanceId).matches(TEST_ID + "-\\d{8}-\\d{6}-\\d{6}");
assertThat(actualInstanceId).matches(TEST_ID + "-\\d{8}-\\d{6}-[a-zA-Z0-9]{6}");

assertThat(actualDatabaseId).matches(TEST_ID + "_\\d{8}_\\d{6}_\\d{6}");
assertThat(actualDatabaseId).matches(TEST_ID + "_\\d{8}_\\d{6}_[a-zA-Z0-9]{6}");
assertThat(actualStatement).containsExactlyElementsIn(ImmutableList.of(statement));
}

Expand Down

0 comments on commit 623e467

Please sign in to comment.