Skip to content

Commit

Permalink
Add groupName attribute to v2 template where it's needed (part 1)
Browse files Browse the repository at this point in the history
  • Loading branch information
akashorabek authored and Amar3tto committed May 13, 2024
1 parent 4f44197 commit b50fa42
Show file tree
Hide file tree
Showing 30 changed files with 232 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ interface BigQueryWriteOptions extends PipelineOptions, DataflowPipelineOptions
@TemplateParameter.BigQueryTable(
order = 1,
description = "BigQuery output table",
groupName = "Target",
optional = true,
helpText =
"The BigQuery table location to write the output to. Use the format `<PROJECT_ID>:<DATASET_NAME>.<TABLE_NAME>`. The table's schema must match the input objects.")
Expand All @@ -45,6 +46,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
@TemplateParameter.Text(
order = 1,
description = "Astra token",
groupName = "Source",
helpText = "The token value or secret resource ID.",
example = "AstraCS:abcdefghij")
@Validation.Required
Expand All @@ -57,6 +59,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
@TemplateParameter.Text(
order = 2,
description = "Database identifier",
groupName = "Source",
helpText = "The database unique identifier (UUID).",
example = "cf7af129-d33a-498f-ad06-d97a6ee6eb7")
@Validation.Required
Expand All @@ -69,6 +72,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
@TemplateParameter.Text(
order = 3,
description = "Cassandra keyspace",
groupName = "Source",
regexes = {"^[a-zA-Z0-9][a-zA-Z0-9_]{0,47}$"},
helpText = "The name of the Cassandra keyspace inside of the Astra database.")
String getAstraKeyspace();
Expand All @@ -79,6 +83,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
@TemplateParameter.Text(
order = 4,
description = "Cassandra table",
groupName = "Source",
regexes = {"^[a-zA-Z][a-zA-Z0-9_]*$"},
helpText = "The name of the table inside of the Cassandra database.",
example = "my_table")
Expand All @@ -91,6 +96,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
@TemplateParameter.Text(
order = 5,
optional = true,
groupName = "Source",
description = "Cassandra CQL Query",
helpText = "The query to use to filter rows instead of reading the whole table.")
@SuppressWarnings("unused")
Expand All @@ -102,6 +108,7 @@ interface AstraDbSourceOptions extends PipelineOptions {
@TemplateParameter.Text(
order = 6,
optional = true,
groupName = "Source",
description = "Astra Database Region",
helpText =
"If not provided, a default is chosen, which is useful with multi-region databases.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ public interface AzureEventhubToPubsubOptions extends PipelineOptions {
order = 1,
optional = false,
regexes = {"[,\\/:a-zA-Z0-9._-]+"},
groupName = "Source",
description = "Azure Event Hub endpoint",
helpText = "Server IP or DNS for Azure Eventhub Endpoint",
example = "mynamespace.servicebus.windows.net:9093")
Expand All @@ -132,6 +133,7 @@ public interface AzureEventhubToPubsubOptions extends PipelineOptions {
order = 2,
optional = false,
regexes = {"[a-zA-Z0-9._-]+"},
groupName = "Source",
description = "Azure Eventhub topic(s) to read the input from",
helpText = "Azure Eventhub topic(s) to read the input from",
example = "topic")
Expand All @@ -143,6 +145,7 @@ public interface AzureEventhubToPubsubOptions extends PipelineOptions {
@TemplateParameter.PubsubTopic(
order = 3,
description = "Output Pub/Sub topic",
groupName = "Target",
helpText =
"The name of the topic to which data should published, in the format of 'projects/your-project-id/topics/your-topic-name'",
example = "projects/your-project-id/topics/your-topic-name")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ public interface BigQueryToParquetOptions extends PipelineOptions {
@TemplateParameter.BigQueryTable(
order = 1,
description = "BigQuery table to export",
groupName = "Source",
helpText = "The BigQuery input table location.",
example = "your-project:your-dataset.your-table-name")
@Required
Expand All @@ -156,6 +157,7 @@ public interface BigQueryToParquetOptions extends PipelineOptions {
@TemplateParameter.GcsWriteFile(
order = 2,
description = "Output Cloud Storage file(s)",
groupName = "Target",
helpText = "The Cloud Storage folder to write the Parquet files to.",
example = "gs://your-bucket/export/")
@Required
Expand All @@ -166,6 +168,7 @@ public interface BigQueryToParquetOptions extends PipelineOptions {
@TemplateParameter.Integer(
order = 3,
optional = true,
groupName = "Target",
description = "Maximum output shards",
helpText = "The number of output file shards. The default value is 1.")
@Default.Integer(0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ public interface BigtableToHbasePipelineOptions
/** Hbase specific configs. Mirrors configurations on hbase-site.xml. */
@TemplateParameter.Text(
description = "Zookeeper quorum host",
groupName = "Target",
helpText = "Zookeeper quorum host, corresponds to hbase.zookeeper.quorum host")
String getHbaseZookeeperQuorumHost();

Expand All @@ -83,6 +84,7 @@ public interface BigtableToHbasePipelineOptions
@TemplateParameter.Text(
optional = true,
description = "Zookeeper quorum port",
groupName = "Target",
helpText = "Zookeeper quorum port, corresponds to hbase.zookeeper.quorum port")
@Default.String("2181")
String getHbaseZookeeperQuorumPort();
Expand All @@ -91,6 +93,7 @@ public interface BigtableToHbasePipelineOptions

@TemplateParameter.Text(
description = "Hbase root directory",
groupName = "Target",
helpText = "Hbase root directory, corresponds to hbase.rootdir")
String getHbaseRootDir();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ public interface CdcApplierOptions extends PipelineOptions, BigQueryStorageApiSt
@TemplateParameter.Text(
order = 1,
optional = true,
groupName = "Source",
regexes = {"[,a-zA-Z0-9._-]+"},
description = "Pub/Sub topic(s) to read from",
helpText = "Comma-separated list of PubSub topics to where CDC data is being pushed.")
Expand All @@ -106,6 +107,7 @@ public interface CdcApplierOptions extends PipelineOptions, BigQueryStorageApiSt
@TemplateParameter.Text(
order = 2,
regexes = {"[^/]+"},
groupName = "Source",
description = "Input subscriptions to the template",
helpText =
"The comma-separated list of Pub/Sub input subscriptions to read from, in the format `<SUBSCRIPTION_NAME>,<SUBSCRIPTION_NAME>, ...`")
Expand All @@ -116,6 +118,7 @@ public interface CdcApplierOptions extends PipelineOptions, BigQueryStorageApiSt
@TemplateParameter.Text(
order = 3,
regexes = {".+"},
groupName = "Target",
description = "Output BigQuery dataset for Changelog tables",
helpText =
"The BigQuery dataset to store the staging tables in, in the format <DATASET_NAME>.")
Expand All @@ -126,6 +129,7 @@ public interface CdcApplierOptions extends PipelineOptions, BigQueryStorageApiSt
@TemplateParameter.Text(
order = 4,
regexes = {".+"},
groupName = "Target",
description = "Output BigQuery dataset for replica tables",
helpText =
"The location of the BigQuery dataset to store the replica tables in, in the format <DATASET_NAME>.")
Expand All @@ -136,6 +140,7 @@ public interface CdcApplierOptions extends PipelineOptions, BigQueryStorageApiSt
@TemplateParameter.Integer(
order = 5,
optional = true,
groupName = "Target",
description = "Frequency to issue updates to BigQuery tables (seconds).",
helpText =
"The interval at which the pipeline updates the BigQuery table replicating the MySQL database.")
Expand All @@ -146,6 +151,7 @@ public interface CdcApplierOptions extends PipelineOptions, BigQueryStorageApiSt
@TemplateParameter.Boolean(
order = 6,
optional = true,
groupName = "Source",
description = "Whether to use a single topic for all MySQL table changes.",
helpText =
"Set this to true if you have configured your Debezium connector to publish all table"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
public interface BigQueryDeadletterOptions extends PipelineOptions {

@TemplateParameter.BigQueryTable(
groupName = "Target",
order = 4,
optional = true,
description = "Table for messages failed to reach the output table (i.e., Deadletter table)",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ public interface DataplexBigQueryToGcsOptions
@TemplateParameter.Text(
order = 1,
optional = false,
groupName = "Source",
regexes = {
"^(projects\\/[^\\n\\r\\/]+\\/locations\\/[^\\n\\r\\/]+\\/lakes\\/[^\\n\\r\\/]+\\/zones\\/[^\\n\\r\\/]+\\/assets\\/[^\\n\\r\\/]+|projects\\/[^\\n\\r\\/]+\\/datasets\\/[^\\n\\r\\/]+)$"
},
Expand All @@ -54,6 +55,7 @@ public interface DataplexBigQueryToGcsOptions
order = 2,
optional = true,
regexes = {"^[a-zA-Z0-9_-]+(,[a-zA-Z0-9_-]+)*$"},
groupName = "Source",
description = "Source BigQuery tables to tier.",
helpText =
"A comma-separated list of BigQuery tables to tier. If none specified, all tables will be tiered. Tables should be specified by their name only (no project/dataset prefix). Case-sensitive!")
Expand All @@ -67,6 +69,7 @@ public interface DataplexBigQueryToGcsOptions
regexes = {
"^projects\\/[^\\n\\r\\/]+\\/locations\\/[^\\n\\r\\/]+\\/lakes\\/[^\\n\\r\\/]+\\/zones\\/[^\\n\\r\\/]+\\/assets\\/[^\\n\\r\\/]+$"
},
groupName = "Target",
description = "Dataplex asset name for the destination Cloud Storage bucket.",
helpText =
"Dataplex asset name for the Cloud Storage bucket to tier data to. Format: projects/<name>/locations/<loc>/lakes/<lake-name>/zones/<zone-name>/assets/<asset name>.")
Expand All @@ -90,6 +93,7 @@ public interface DataplexBigQueryToGcsOptions

@TemplateParameter.Integer(
order = 5,
groupName = "Source",
description = "Maximum number of parallel requests.",
helpText =
"The maximum number of parallel requests that will be sent to BigQuery when loading table/partition metadata.")
Expand All @@ -103,6 +107,7 @@ public interface DataplexBigQueryToGcsOptions
order = 6,
enumOptions = {@TemplateEnumOption("AVRO"), @TemplateEnumOption("PARQUET")},
optional = true,
groupName = "Target",
description = "Output file format in Cloud Storage.",
helpText = "Output file format in Cloud Storage. Format: PARQUET or AVRO.")
@Default.Enum("PARQUET")
Expand All @@ -120,6 +125,7 @@ public interface DataplexBigQueryToGcsOptions
@TemplateEnumOption("BZIP2")
},
optional = true,
groupName = "Target",
description = "Output file compression in Cloud Storage.",
helpText =
"Output file compression. Format: UNCOMPRESSED, SNAPPY, GZIP, or BZIP2. BZIP2 not supported for PARQUET files.")
Expand All @@ -146,6 +152,7 @@ public interface DataplexBigQueryToGcsOptions
@TemplateEnumOption("SKIP")
},
optional = true,
groupName = "Target",
description = "Action that occurs if a destination file already exists.",
helpText =
"Specifies the action that occurs if a destination file already exists. Format: OVERWRITE, FAIL, SKIP. If SKIP, only files that don't exist in the destination directory will be processed. If FAIL and at least one file already exists, no data will be processed and an error will be produced.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public interface DataplexJdbcIngestionOptions
regexes = {
"(^jdbc:[a-zA-Z0-9/:@.?_+!*=&-;]+$)|(^([A-Za-z0-9+/]{4}){1,}([A-Za-z0-9+/]{0,3})={0,3})"
},
groupName = "Source",
description = "JDBC connection URL string.",
helpText =
"Url connection string to connect to the JDBC source. Connection string can be passed in"
Expand All @@ -52,6 +53,7 @@ public interface DataplexJdbcIngestionOptions
void setConnectionURL(String connectionURL);

@TemplateParameter.Text(
groupName = "Source",
order = 2,
optional = false,
regexes = {"^.+$"},
Expand All @@ -64,6 +66,7 @@ public interface DataplexJdbcIngestionOptions
void setDriverClassName(String driverClassName);

@TemplateParameter.Text(
groupName = "Source",
order = 3,
optional = false,
regexes = {"^.+$"},
Expand All @@ -76,6 +79,7 @@ public interface DataplexJdbcIngestionOptions
void setDriverJars(String driverJar);

@TemplateParameter.Text(
groupName = "Source",
order = 4,
optional = true,
regexes = {"^[a-zA-Z0-9_;!*&=@#-:\\/]+$"},
Expand All @@ -90,6 +94,7 @@ public interface DataplexJdbcIngestionOptions
void setConnectionProperties(String connectionProperties);

@TemplateParameter.Text(
groupName = "Source",
order = 5,
optional = true,
regexes = {"^.+$"},
Expand All @@ -103,6 +108,7 @@ public interface DataplexJdbcIngestionOptions
void setUsername(String username);

@TemplateParameter.Password(
groupName = "Source",
order = 6,
optional = true,
description = "JDBC connection password.",
Expand All @@ -115,6 +121,7 @@ public interface DataplexJdbcIngestionOptions
void setPassword(String password);

@TemplateParameter.Text(
groupName = "Source",
order = 7,
optional = false,
regexes = {"^.+$"},
Expand All @@ -127,6 +134,7 @@ public interface DataplexJdbcIngestionOptions
void setQuery(String query);

@TemplateParameter.Text(
groupName = "Target",
order = 8,
optional = false,
regexes = {"^.+$"},
Expand All @@ -144,6 +152,7 @@ public interface DataplexJdbcIngestionOptions
@TemplateParameter.KmsEncryptionKey(
order = 9,
optional = true,
groupName = "Target",
description = "Google Cloud KMS key",
helpText =
"If this parameter is provided, password, user name and connection string should all be"
Expand All @@ -170,6 +179,7 @@ public interface DataplexJdbcIngestionOptions
+ "\\r"
+ "\\/]+$"
},
groupName = "Target",
description = "Dataplex output asset ID",
helpText =
"Dataplex output asset ID to which the results are stored to. Should be in the format of"
Expand All @@ -187,6 +197,7 @@ public interface DataplexJdbcIngestionOptions
@TemplateEnumOption("MONTHLY")
},
optional = true,
groupName = "Target",
description = "The partition scheme when writing the file.",
helpText = "The partition scheme when writing the file. Format: DAILY or MONTHLY or HOURLY.")
@Default.Enum("DAILY")
Expand All @@ -197,6 +208,7 @@ public interface DataplexJdbcIngestionOptions
@TemplateParameter.Text(
order = 12,
optional = true,
groupName = "Target",
description = "The partition column on which the partition is based.",
helpText =
"The partition column on which the partition is based. The column type must be of"
Expand All @@ -218,6 +230,7 @@ public interface DataplexJdbcIngestionOptions
@TemplateEnumOption("WRITE_EMPTY")
},
optional = true,
groupName = "Target",
description = "BigQuery write disposition type",
helpText =
"Strategy to employ if the target file/table exists. If the table exists - should it"
Expand All @@ -232,6 +245,7 @@ public interface DataplexJdbcIngestionOptions
order = 14,
enumOptions = {@TemplateEnumOption("AVRO"), @TemplateEnumOption("PARQUET")},
optional = true,
groupName = "Target",
description = "Output file format in Cloud Storage.",
helpText = "Output file format in Cloud Storage. Format: PARQUET or AVRO.")
@Default.Enum("PARQUET")
Expand All @@ -243,6 +257,7 @@ public interface DataplexJdbcIngestionOptions
order = 15,
optional = true,
description = "Whether to use column alias to map the rows.",
groupName = "Target",
helpText =
"If enabled (set to true) the pipeline will consider column alias (\"AS\") instead of the"
+ " column name to map the rows to BigQuery. Defaults to false.")
Expand All @@ -254,6 +269,7 @@ public interface DataplexJdbcIngestionOptions
@TemplateParameter.Integer(
order = 16,
optional = true,
groupName = "Target",
description = "Set the data size going to be fetched and loaded in memory per Jdbc call.",
helpText =
"It should ONLY be used if the default value throws memory errors. If not set, using Beam's default "
Expand Down
Loading

0 comments on commit b50fa42

Please sign in to comment.