Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modified parameters for Dataflow templates #1502

Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
21fd114
Modified parameters for testing
tamannakakkar93 Apr 26, 2024
0c3e37e
Modified parameters for Dataflow templates
tamannakakkar93 May 7, 2024
b2a3b84
Merge branch 'main' into tamannakakkar-dataflowTemplates
tamannakakkar93 May 13, 2024
e84c30a
Apply suggestions from code review
tamannakakkar93 May 15, 2024
d539607
Added changes based on comments
tamannakakkar93 May 15, 2024
cb89886
spotless apply
tamannakakkar93 May 15, 2024
8ffccb3
fixing spaces
tamannakakkar93 May 15, 2024
aa3bf9f
Update v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableTo…
tamannakakkar93 May 16, 2024
279b28e
Update v1/src/main/java/com/google/cloud/teleport/templates/common/Sp…
tamannakakkar93 May 16, 2024
c33f5b7
Update v2/common/src/main/java/com/google/cloud/teleport/v2/transform…
tamannakakkar93 May 16, 2024
de33b1b
Update v2/common/src/main/java/com/google/cloud/teleport/v2/transform…
tamannakakkar93 May 16, 2024
3daa6bb
Update v2/common/src/main/java/com/google/cloud/teleport/v2/transform…
tamannakakkar93 May 16, 2024
5f5561d
Update v2/elasticsearch-common/src/main/java/com/google/cloud/telepor…
tamannakakkar93 May 16, 2024
0efa344
Update v2/pubsub-to-redis/src/main/java/com/google/cloud/teleport/v2/…
tamannakakkar93 May 16, 2024
4c271be
Update v1/src/main/java/com/google/cloud/teleport/templates/common/Sp…
tamannakakkar93 May 16, 2024
6f8d8e1
Update v1/src/main/java/com/google/cloud/teleport/templates/common/Sp…
tamannakakkar93 May 16, 2024
5a53bed
Update v2/elasticsearch-common/src/main/java/com/google/cloud/telepor…
tamannakakkar93 May 16, 2024
c2994bb
Merge branch 'GoogleCloudPlatform:main' into tamannakakkar-dataflowTe…
sharan-malyala May 20, 2024
cb48c4c
Merge branch 'GoogleCloudPlatform:main' into tamannakakkar-dataflowTe…
sharan-malyala May 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ public interface Options extends PipelineOptions {
order = 1,
description = "Project ID",
helpText =
"The ID of the Google Cloud project of the Cloud Bigtable instance that you want to"
+ " read data from")
"The ID for the Google Cloud project that contains the Bigtable instance that you want to read data from.")
ValueProvider<String> getBigtableProjectId();

@SuppressWarnings("unused")
Expand All @@ -87,7 +86,7 @@ public interface Options extends PipelineOptions {
order = 2,
regexes = {"[a-z][a-z0-9\\-]+[a-z0-9]"},
description = "Instance ID",
helpText = "The ID of the Cloud Bigtable instance that contains the table")
helpText = "The ID of the Bigtable instance that contains the table")
tamannakakkar93 marked this conversation as resolved.
Show resolved Hide resolved
ValueProvider<String> getBigtableInstanceId();

@SuppressWarnings("unused")
Expand All @@ -97,7 +96,7 @@ public interface Options extends PipelineOptions {
order = 3,
regexes = {"[_a-zA-Z0-9][-_.a-zA-Z0-9]*"},
description = "Table ID",
helpText = "The ID of the Cloud Bigtable table to read")
helpText = "The ID of the Bigtable table to read from.")
ValueProvider<String> getBigtableTableId();

@SuppressWarnings("unused")
Expand All @@ -107,7 +106,7 @@ public interface Options extends PipelineOptions {
order = 4,
optional = true,
description = "Cloud Storage directory for storing JSON files",
helpText = "The Cloud Storage path where the output JSON files can be stored.",
helpText = "The Cloud Storage path where the output JSON files are stored.",
example = "gs://your-bucket/your-path/")
ValueProvider<String> getOutputDirectory();

Expand All @@ -117,7 +116,8 @@ public interface Options extends PipelineOptions {
@TemplateParameter.Text(
order = 5,
description = "JSON file prefix",
helpText = "The prefix of the JSON file name. For example, \"table1-\"")
helpText =
"The prefix of the JSON file name. For example, \"table1-\". If no value is provided, defaults to `part`.")
@Default.String("part")
ValueProvider<String> getFilenamePrefix();

Expand All @@ -130,7 +130,7 @@ public interface Options extends PipelineOptions {
enumOptions = {@TemplateEnumOption("FLATTEN"), @TemplateEnumOption("NONE")},
description = "User option",
helpText =
"User option: `FLATTEN` or `NONE`. `FLATTEN` flattens the row to the single level. `NONE` stores the whole row as a JSON string.")
"Possible values are `FLATTEN` or `NONE`. `FLATTEN` flattens the row to the single level. `NONE` stores the whole row as a JSON string. Defaults to `NONE`.")
@Default.String("NONE")
String getUserOption();

Expand All @@ -144,12 +144,11 @@ public interface Options extends PipelineOptions {
parentTriggerValues = {"FLATTEN"},
description = "Columns aliases",
helpText =
"Comma separated list of columns which are required for Vertex AI Vector Search Index."
+ " The `id` & `embedding` are required columns for Vertex Vector Search."
+ " You can use the notation `fromfamily:fromcolumn;to`. For example, if the columns are"
+ " `rowkey` and `cf:my_embedding`, in which `rowkey` and the embedding column is named differently,"
+ " `cf:my_embedding;embedding` and `rowkey;id` should be specified."
+ " Only used when FLATTEN user option is specified.")
"A comma-separated list of columns that are required for the Vertex AI Vector Search index. The"
+ " columns `id` and `embedding` are required for Vertex AI Vector Search. You can use the notation"
+ " `fromfamily:fromcolumn;to`. For example, if the columns are `rowkey` and `cf:my_embedding`, where"
+ " `rowkey` has a different name than the embedding column, specify `cf:my_embedding;embedding` and,"
+ " `rowkey;id`. Only use this option when the value for `userOption` is `FLATTEN`.")
ValueProvider<String> getColumnsAliases();

@SuppressWarnings("unused")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ public interface Options extends DataflowPipelineOptions, CsvPipelineOptions {
@TemplateParameter.Text(
order = 1,
description = "Cloud Storage Input File(s)",
helpText = "Path of the file pattern glob to read from.",
helpText = "The Cloud Storage path to the CSV file that contains the text to process.",
regexes = {"^gs:\\/\\/[^\\n\\r]+$"},
example = "gs://your-bucket/path/*.csv")
ValueProvider<String> getInputFilePattern();
Expand All @@ -112,31 +112,7 @@ public interface Options extends DataflowPipelineOptions, CsvPipelineOptions {
@TemplateParameter.GcsReadFile(
order = 2,
description = "Cloud Storage location of your BigQuery schema file, described as a JSON",
helpText =
"JSON file with BigQuery Schema description. JSON Example: {\n"
+ "\t\"BigQuery Schema\": [\n"
+ "\t\t{\n"
+ "\t\t\t\"name\": \"location\",\n"
+ "\t\t\t\"type\": \"STRING\"\n"
+ "\t\t},\n"
+ "\t\t{\n"
+ "\t\t\t\"name\": \"name\",\n"
+ "\t\t\t\"type\": \"STRING\"\n"
+ "\t\t},\n"
+ "\t\t{\n"
+ "\t\t\t\"name\": \"age\",\n"
+ "\t\t\t\"type\": \"STRING\"\n"
+ "\t\t},\n"
+ "\t\t{\n"
+ "\t\t\t\"name\": \"color\",\n"
+ "\t\t\t\"type\": \"STRING\"\n"
+ "\t\t},\n"
+ "\t\t{\n"
+ "\t\t\t\"name\": \"coffee\",\n"
+ "\t\t\t\"type\": \"STRING\"\n"
+ "\t\t}\n"
+ "\t]\n"
+ "}")
helpText = "The Cloud Storage path to the JSON file that defines your BigQuery schema.")
ValueProvider<String> getSchemaJSONPath();

void setSchemaJSONPath(ValueProvider<String> value);
Expand All @@ -145,16 +121,16 @@ public interface Options extends DataflowPipelineOptions, CsvPipelineOptions {
order = 3,
description = "BigQuery output table",
helpText =
"BigQuery table location to write the output to. The table's schema must match the "
+ "input objects.")
"The name of the BigQuery table that stores your processed data. If you reuse an existing "
+ "BigQuery table, the data is appended to the destination table.")
ValueProvider<String> getOutputTable();

void setOutputTable(ValueProvider<String> value);

@TemplateParameter.GcsWriteFolder(
order = 4,
description = "Temporary directory for BigQuery loading process",
helpText = "Temporary directory for BigQuery loading process",
helpText = "The temporary directory to use during the BigQuery loading process.",
example = "gs://your-bucket/your-files/temp_dir")
@Validation.Required
ValueProvider<String> getBigQueryLoadingTemporaryDirectory();
Expand All @@ -165,7 +141,10 @@ public interface Options extends DataflowPipelineOptions, CsvPipelineOptions {
order = 5,
description = "BigQuery output table for bad records",
helpText =
"BigQuery table location to write the bad record. The table's schema must match the {RawContent: STRING, ErrorMsg:STRING}")
"The name of the BigQuery table to use to store the rejected data when processing the"
+ " CSV files. If you reuse an existing BigQuery table, the data is appended to the"
+ " destination table. The schema of this table must match the"
+ " error table schema (https://cloud.google.com/dataflow/docs/guides/templates/provided/cloud-storage-csv-to-bigquery#GcsCSVToBigQueryBadRecordsSchema).")
ValueProvider<String> getBadRecordsOutputTable();

void setBadRecordsOutputTable(ValueProvider<String> value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,7 @@ public interface Options
order = 2,
description = "Pub/Sub input topic",
helpText =
"Pub/Sub topic to read the input from, in the format of "
+ "'projects/your-project-id/topics/your-topic-name'")
"The Pub/Sub topic to subscribe to for message consumption. The topic name must be in the format projects/<PROJECT_ID>/topics/<TOPIC_NAME>.")
ValueProvider<String> getInputTopic();

void setInputTopic(ValueProvider<String> value);
Expand All @@ -126,8 +125,7 @@ public interface Options
order = 4,
description = "Output file directory in Cloud Storage",
helpText =
"The path and filename prefix for writing output files. Must end with a slash. DateTime"
+ " formatting is used to parse directory path for date & time formatters.")
"The output directory where output Avro files are archived. Must contain / at the end. For example: gs://example-bucket/example-directory/")
@Required
ValueProvider<String> getOutputDirectory();

Expand All @@ -137,7 +135,7 @@ public interface Options
order = 5,
optional = true,
description = "Output filename prefix of the files to write",
helpText = "The prefix to place on each windowed file.",
helpText = "The output filename prefix for the Avro files.",
regexes = "^[a-zA-Z\\-]+$")
@Default.String("output")
ValueProvider<String> getOutputFilenamePrefix();
Expand All @@ -148,9 +146,7 @@ public interface Options
order = 6,
optional = true,
description = "Output filename suffix of the files to write",
helpText =
"The suffix to place on each windowed file. Typically a file extension such "
+ "as .txt or .csv.")
helpText = "The output filename suffix for the Avro files.")
@Default.String("")
ValueProvider<String> getOutputFilenameSuffix();

Expand All @@ -159,7 +155,8 @@ public interface Options
@TemplateParameter.GcsWriteFolder(
order = 7,
description = "Temporary Avro write directory",
helpText = "Directory for temporary Avro files.")
helpText =
"The directory for temporary Avro files. Must contain / at the end. For example: gs://example-bucket/example-directory/.")
@Required
ValueProvider<String> getAvroTempDirectory();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ public interface SpannerToTextOptions
order = 1,
optional = true,
description = "Cloud Storage temp directory for storing CSV files",
helpText = "The Cloud Storage path where the temporary CSV files can be stored.",
helpText = "The Cloud Storage path where temporary CSV files are written.",
example = "gs://your-bucket/your-path")
ValueProvider<String> getCsvTempDirectory();

Expand All @@ -105,8 +105,8 @@ public interface SpannerToTextOptions
optional = true,
description = "Priority for Spanner RPC invocations",
helpText =
"The request priority for Cloud Spanner calls. The value must be one of:"
+ " [HIGH,MEDIUM,LOW].")
"The request priority (https://cloud.google.com/spanner/docs/reference/rest/v1/RequestOptions)"
+ " for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.")
ValueProvider<RpcPriority> getSpannerPriority();

void setSpannerPriority(ValueProvider<RpcPriority> value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ public interface CsvPipelineOptions extends PipelineOptions {
order = 1,
optional = true,
description = "Whether input CSV files contain a header record.",
helpText = "Input CSV files contain a header record (true/false).")
helpText = "Whether headers are included in the CSV file. Defaults to: false.")
@Default.Boolean(false)
ValueProvider<Boolean> getContainsHeaders();

Expand All @@ -84,8 +84,7 @@ public interface CsvPipelineOptions extends PipelineOptions {
@TemplateParameter.Text(
order = 2,
description = "Column delimiter of the data files.",
helpText =
"The column delimiter of the input text files. Default: use delimiter provided in csvFormat",
helpText = "The column delimiter that the CSV file uses.",
example = ",")
ValueProvider<String> getDelimiter();

Expand All @@ -94,9 +93,7 @@ public interface CsvPipelineOptions extends PipelineOptions {
@TemplateParameter.Text(
order = 3,
description = "CSV Format to use for parsing records.",
helpText =
"CSV format specification to use for parsing records. See https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html for more details. Must match format names exactly found at: "
+ "https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.Predefined.html")
helpText = "The CSV format according to Apache Commons CSV format. Defaults to: Default.")
ValueProvider<String> getCsvFormat();

void setCsvFormat(ValueProvider<String> csvFormat);
Expand All @@ -107,8 +104,7 @@ public interface CsvPipelineOptions extends PipelineOptions {
regexes = {"^(US-ASCII|ISO-8859-1|UTF-8|UTF-16)$"},
description = "CSV file encoding",
helpText =
"CSV file character encoding format. Allowed Values are US-ASCII"
+ ", ISO-8859-1, UTF-8, UTF-16")
"The CSV file character encoding format. Allowed Values are US-ASCII, ISO-8859-1, UTF-8, and UTF-16.")
@Default.String("UTF-8")
ValueProvider<String> getCsvFileEncoding();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public interface SpannerReadOptions extends PipelineOptions {
order = 1,
regexes = {"^.+$"},
description = "Spanner Table",
helpText = "Spanner Table to read from")
helpText = "The Spanner table to read the data from.")
ValueProvider<String> getSpannerTable();

@SuppressWarnings("unused")
Expand All @@ -95,7 +95,7 @@ public interface SpannerReadOptions extends PipelineOptions {
order = 2,
description = "Read data from Cloud Spanner Project Id",
helpText =
"The Google Cloud Project Id of the Cloud Spanner database that you want to read data from")
"The ID of the Google Cloud project that contains the Spanner database to read data from.")
ValueProvider<String> getSpannerProjectId();

@SuppressWarnings("unused")
Expand All @@ -105,7 +105,7 @@ public interface SpannerReadOptions extends PipelineOptions {
order = 3,
regexes = {".+"},
description = "Read data from Cloud Spanner Instance",
helpText = "Instance of requested table.")
helpText = "The instance ID of the requested table.")
ValueProvider<String> getSpannerInstanceId();

@SuppressWarnings("unused")
Expand All @@ -115,7 +115,7 @@ public interface SpannerReadOptions extends PipelineOptions {
order = 4,
regexes = {".+"},
description = "Read data from Cloud Spanner Database ",
helpText = "Database of requested table.")
helpText = "The database ID of the requested table.")
ValueProvider<String> getSpannerDatabaseId();

@SuppressWarnings("unused")
Expand All @@ -141,10 +141,10 @@ public interface SpannerReadOptions extends PipelineOptions {
},
description = "Snapshot time",
helpText =
"If set, specifies the time when the snapshot must be taken."
+ " String is in the RFC 3339 format in UTC time. "
+ " Timestamp must be in the past and Maximum timestamp staleness applies."
+ "https://cloud.google.com/spanner/docs/timestamp-bounds#maximum_timestamp_staleness",
"The timestamp that corresponds to the version of the Spanner database that you want to read."
tamannakakkar93 marked this conversation as resolved.
Show resolved Hide resolved
+ " The timestamp must be specified as per RFC 3339 (https://tools.ietf.org/html/rfc3339) UTC \"Zulu\" format."
tamannakakkar93 marked this conversation as resolved.
Show resolved Hide resolved
+ " The timestamp must be in the past and"
+ " Maximum timestamp staleness (https://cloud.google.com/spanner/docs/timestamp-bounds#maximum_timestamp_staleness) applies.",
tamannakakkar93 marked this conversation as resolved.
Show resolved Hide resolved
example = "1990-12-31T23:59:60Z")
@Default.String(value = "")
ValueProvider<String> getSpannerSnapshotTime();
Expand All @@ -157,9 +157,10 @@ public interface SpannerReadOptions extends PipelineOptions {
optional = true,
description = "Use independent compute resource (Spanner DataBoost).",
helpText =
"Use Spanner on-demand compute so the export job will run on independent compute"
+ " resources and have no impact to current Spanner workloads. This will incur"
+ " additional charges in Spanner.")
"Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero"
+ " impact on Spanner OLTP workflows. When true, requires the `spanner.databases.useDataBoost` Identity and"
+ " Access Management (IAM) permission. For more information, see"
+ " Data Boost overview (https://cloud.google.com/spanner/docs/databoost/databoost-overview).")
@Default.Boolean(false)
ValueProvider<Boolean> getDataBoostEnabled();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ public interface BigQueryToBigtableOptions
order = 1,
regexes = {"[A-Za-z_][A-Za-z_0-9]*"},
description = "Unique identifier column",
helpText = "Name of the BigQuery column storing the unique identifier of the row")
helpText = "The name of the BigQuery column storing the unique identifier of the row.")
@Required
String getReadIdColumn();

Expand Down