GoogleCloudPlatform · copybara-service · May 21, 2024 · Apr 26, 2024 · May 7, 2024 · May 13, 2024
diff --git a/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToJson.java b/v1/src/main/java/com/google/cloud/teleport/bigtable/BigtableToJson.java
@@ -76,8 +76,7 @@ public interface Options extends PipelineOptions {
         order = 1,
         description = "Project ID",
         helpText =
-            "The ID of the Google Cloud project of the Cloud Bigtable instance that you want to"
-                + " read data from")
+            "The ID for the Google Cloud project that contains the Bigtable instance that you want to read data from.")
     ValueProvider<String> getBigtableProjectId();
 
     @SuppressWarnings("unused")
@@ -87,7 +86,7 @@ public interface Options extends PipelineOptions {
         order = 2,
         regexes = {"[a-z][a-z0-9\\-]+[a-z0-9]"},
         description = "Instance ID",
-        helpText = "The ID of the Cloud Bigtable instance that contains the table")
+        helpText = "The ID of the Bigtable instance that contains the table")
     ValueProvider<String> getBigtableInstanceId();
 
     @SuppressWarnings("unused")
@@ -97,7 +96,7 @@ public interface Options extends PipelineOptions {
         order = 3,
         regexes = {"[_a-zA-Z0-9][-_.a-zA-Z0-9]*"},
         description = "Table ID",
-        helpText = "The ID of the Cloud Bigtable table to read")
+        helpText = "The ID of the Bigtable table to read from.")
     ValueProvider<String> getBigtableTableId();
 
     @SuppressWarnings("unused")
@@ -107,7 +106,7 @@ public interface Options extends PipelineOptions {
         order = 4,
         optional = true,
         description = "Cloud Storage directory for storing JSON files",
-        helpText = "The Cloud Storage path where the output JSON files can be stored.",
+        helpText = "The Cloud Storage path where the output JSON files are stored.",
         example = "gs://your-bucket/your-path/")
     ValueProvider<String> getOutputDirectory();
 
@@ -117,7 +116,8 @@ public interface Options extends PipelineOptions {
     @TemplateParameter.Text(
         order = 5,
         description = "JSON file prefix",
-        helpText = "The prefix of the JSON file name. For example, \"table1-\"")
+        helpText =
+            "The prefix of the JSON file name. For example, \"table1-\". If no value is provided, defaults to `part`.")
     @Default.String("part")
     ValueProvider<String> getFilenamePrefix();
 
@@ -130,7 +130,7 @@ public interface Options extends PipelineOptions {
         enumOptions = {@TemplateEnumOption("FLATTEN"), @TemplateEnumOption("NONE")},
         description = "User option",
         helpText =
-            "User option: `FLATTEN` or `NONE`. `FLATTEN` flattens the row to the single level. `NONE` stores the whole row as a JSON string.")
+            "Possible values are `FLATTEN` or `NONE`. `FLATTEN` flattens the row to the single level. `NONE` stores the whole row as a JSON string. Defaults to `NONE`.")
     @Default.String("NONE")
     String getUserOption();
 
@@ -144,12 +144,11 @@ public interface Options extends PipelineOptions {
         parentTriggerValues = {"FLATTEN"},
         description = "Columns aliases",
         helpText =
-            "Comma separated list of columns which are required for Vertex AI Vector Search Index."
-                + " The `id` & `embedding` are required columns for Vertex Vector Search."
-                + " You can use the notation `fromfamily:fromcolumn;to`. For example, if the columns are"
-                + " `rowkey` and `cf:my_embedding`, in which `rowkey` and the embedding column is named differently,"
-                + " `cf:my_embedding;embedding` and `rowkey;id` should be specified."
-                + " Only used when FLATTEN user option is specified.")
+            "A comma-separated list of columns that are required for the Vertex AI Vector Search index. The"
+                + " columns `id` and `embedding` are required for Vertex AI Vector Search. You can use the notation"
+                + " `fromfamily:fromcolumn;to`. For example, if the columns are `rowkey` and `cf:my_embedding`, where"
+                + " `rowkey` has a different name than the embedding column, specify `cf:my_embedding;embedding` and,"
+                + " `rowkey;id`. Only use this option when the value for `userOption` is `FLATTEN`.")
     ValueProvider<String> getColumnsAliases();
 
     @SuppressWarnings("unused")

diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/CSVToBigQuery.java b/v1/src/main/java/com/google/cloud/teleport/templates/CSVToBigQuery.java
@@ -102,7 +102,7 @@ public interface Options extends DataflowPipelineOptions, CsvPipelineOptions {
     @TemplateParameter.Text(
         order = 1,
         description = "Cloud Storage Input File(s)",
-        helpText = "Path of the file pattern glob to read from.",
+        helpText = "The Cloud Storage path to the CSV file that contains the text to process.",
         regexes = {"^gs:\\/\\/[^\\n\\r]+$"},
         example = "gs://your-bucket/path/*.csv")
     ValueProvider<String> getInputFilePattern();
@@ -112,31 +112,7 @@ public interface Options extends DataflowPipelineOptions, CsvPipelineOptions {
     @TemplateParameter.GcsReadFile(
         order = 2,
         description = "Cloud Storage location of your BigQuery schema file, described as a JSON",
-        helpText =
-            "JSON file with BigQuery Schema description. JSON Example: {\n"
-                + "\t\"BigQuery Schema\": [\n"
-                + "\t\t{\n"
-                + "\t\t\t\"name\": \"location\",\n"
-                + "\t\t\t\"type\": \"STRING\"\n"
-                + "\t\t},\n"
-                + "\t\t{\n"
-                + "\t\t\t\"name\": \"name\",\n"
-                + "\t\t\t\"type\": \"STRING\"\n"
-                + "\t\t},\n"
-                + "\t\t{\n"
-                + "\t\t\t\"name\": \"age\",\n"
-                + "\t\t\t\"type\": \"STRING\"\n"
-                + "\t\t},\n"
-                + "\t\t{\n"
-                + "\t\t\t\"name\": \"color\",\n"
-                + "\t\t\t\"type\": \"STRING\"\n"
-                + "\t\t},\n"
-                + "\t\t{\n"
-                + "\t\t\t\"name\": \"coffee\",\n"
-                + "\t\t\t\"type\": \"STRING\"\n"
-                + "\t\t}\n"
-                + "\t]\n"
-                + "}")
+        helpText = "The Cloud Storage path to the JSON file that defines your BigQuery schema.")
     ValueProvider<String> getSchemaJSONPath();
 
     void setSchemaJSONPath(ValueProvider<String> value);
@@ -145,16 +121,16 @@ public interface Options extends DataflowPipelineOptions, CsvPipelineOptions {
         order = 3,
         description = "BigQuery output table",
         helpText =
-            "BigQuery table location to write the output to. The table's schema must match the "
-                + "input objects.")
+            "The name of the BigQuery table that stores your processed data. If you reuse an existing "
+                + "BigQuery table, the data is appended to the destination table.")
     ValueProvider<String> getOutputTable();
 
     void setOutputTable(ValueProvider<String> value);
 
     @TemplateParameter.GcsWriteFolder(
         order = 4,
         description = "Temporary directory for BigQuery loading process",
-        helpText = "Temporary directory for BigQuery loading process",
+        helpText = "The temporary directory to use during the BigQuery loading process.",
         example = "gs://your-bucket/your-files/temp_dir")
     @Validation.Required
     ValueProvider<String> getBigQueryLoadingTemporaryDirectory();
@@ -165,7 +141,10 @@ public interface Options extends DataflowPipelineOptions, CsvPipelineOptions {
         order = 5,
         description = "BigQuery output table for bad records",
         helpText =
-            "BigQuery table location to write the bad record. The table's schema must match the {RawContent: STRING, ErrorMsg:STRING}")
+            "The name of the BigQuery table to use to store the rejected data when processing the"
+                + " CSV files. If you reuse an existing BigQuery table, the data is appended to the"
+                + " destination table. The schema of this table must match the"
+                + " error table schema (https://cloud.google.com/dataflow/docs/guides/templates/provided/cloud-storage-csv-to-bigquery#GcsCSVToBigQueryBadRecordsSchema).")
     ValueProvider<String> getBadRecordsOutputTable();
 
     void setBadRecordsOutputTable(ValueProvider<String> value);

diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToAvro.java b/v1/src/main/java/com/google/cloud/teleport/templates/PubsubToAvro.java
@@ -108,8 +108,7 @@ public interface Options
         order = 2,
         description = "Pub/Sub input topic",
         helpText =
-            "Pub/Sub topic to read the input from, in the format of "
-                + "'projects/your-project-id/topics/your-topic-name'")
+            "The Pub/Sub topic to subscribe to for message consumption. The topic name must be in the format projects/<PROJECT_ID>/topics/<TOPIC_NAME>.")
     ValueProvider<String> getInputTopic();
 
     void setInputTopic(ValueProvider<String> value);
@@ -126,8 +125,7 @@ public interface Options
         order = 4,
         description = "Output file directory in Cloud Storage",
         helpText =
-            "The path and filename prefix for writing output files. Must end with a slash. DateTime"
-                + " formatting is used to parse directory path for date & time formatters.")
+            "The output directory where output Avro files are archived. Must contain / at the end. For example: gs://example-bucket/example-directory/")
     @Required
     ValueProvider<String> getOutputDirectory();
 
@@ -137,7 +135,7 @@ public interface Options
         order = 5,
         optional = true,
         description = "Output filename prefix of the files to write",
-        helpText = "The prefix to place on each windowed file.",
+        helpText = "The output filename prefix for the Avro files.",
         regexes = "^[a-zA-Z\\-]+$")
     @Default.String("output")
     ValueProvider<String> getOutputFilenamePrefix();
@@ -148,9 +146,7 @@ public interface Options
         order = 6,
         optional = true,
         description = "Output filename suffix of the files to write",
-        helpText =
-            "The suffix to place on each windowed file. Typically a file extension such "
-                + "as .txt or .csv.")
+        helpText = "The output filename suffix for the Avro files.")
     @Default.String("")
     ValueProvider<String> getOutputFilenameSuffix();
 
@@ -159,7 +155,8 @@ public interface Options
     @TemplateParameter.GcsWriteFolder(
         order = 7,
         description = "Temporary Avro write directory",
-        helpText = "Directory for temporary Avro files.")
+        helpText =
+            "The directory for temporary Avro files. Must contain / at the end. For example: gs://example-bucket/example-directory/.")
     @Required
     ValueProvider<String> getAvroTempDirectory();
 

@@ -88,7 +88,7 @@ public interface SpannerToTextOptions
         order = 1,
         optional = true,
         description = "Cloud Storage temp directory for storing CSV files",
-        helpText = "The Cloud Storage path where the temporary CSV files can be stored.",
+        helpText = "The Cloud Storage path where temporary CSV files are written.",
         example = "gs://your-bucket/your-path")
     ValueProvider<String> getCsvTempDirectory();
 
@@ -105,8 +105,8 @@ public interface SpannerToTextOptions
         optional = true,
         description = "Priority for Spanner RPC invocations",
         helpText =
-            "The request priority for Cloud Spanner calls. The value must be one of:"
-                + " [HIGH,MEDIUM,LOW].")
+            "The request priority (https://cloud.google.com/spanner/docs/reference/rest/v1/RequestOptions)"
+                + " for Spanner calls. Possible values are `HIGH`, `MEDIUM`, `LOW`. The default value is `MEDIUM`.")
     ValueProvider<RpcPriority> getSpannerPriority();
 
     void setSpannerPriority(ValueProvider<RpcPriority> value);

diff --git a/v1/src/main/java/com/google/cloud/teleport/templates/common/CsvConverters.java b/v1/src/main/java/com/google/cloud/teleport/templates/common/CsvConverters.java
@@ -75,7 +75,7 @@ public interface CsvPipelineOptions extends PipelineOptions {
         order = 1,
         optional = true,
         description = "Whether input CSV files contain a header record.",
-        helpText = "Input CSV files contain a header record (true/false).")
+        helpText = "Whether headers are included in the CSV file. Defaults to: false.")
     @Default.Boolean(false)
     ValueProvider<Boolean> getContainsHeaders();
 
@@ -84,8 +84,7 @@ public interface CsvPipelineOptions extends PipelineOptions {
     @TemplateParameter.Text(
         order = 2,
         description = "Column delimiter of the data files.",
-        helpText =
-            "The column delimiter of the input text files. Default: use delimiter provided in csvFormat",
+        helpText = "The column delimiter that the CSV file uses.",
         example = ",")
     ValueProvider<String> getDelimiter();
 
@@ -94,9 +93,7 @@ public interface CsvPipelineOptions extends PipelineOptions {
     @TemplateParameter.Text(
         order = 3,
         description = "CSV Format to use for parsing records.",
-        helpText =
-            "CSV format specification to use for parsing records. See https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html for more details. Must match format names exactly found at: "
-                + "https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.Predefined.html")
+        helpText = "The CSV format according to Apache Commons CSV format. Defaults to: Default.")
     ValueProvider<String> getCsvFormat();
 
     void setCsvFormat(ValueProvider<String> csvFormat);
@@ -107,8 +104,7 @@ public interface CsvPipelineOptions extends PipelineOptions {
         regexes = {"^(US-ASCII|ISO-8859-1|UTF-8|UTF-16)$"},
         description = "CSV file encoding",
         helpText =
-            "CSV file character encoding format. Allowed Values are US-ASCII"
-                + ", ISO-8859-1, UTF-8, UTF-16")
+            "The CSV file character encoding format. Allowed Values are US-ASCII, ISO-8859-1, UTF-8, and UTF-16.")
     @Default.String("UTF-8")
     ValueProvider<String> getCsvFileEncoding();
 

@@ -85,7 +85,7 @@ public interface SpannerReadOptions extends PipelineOptions {
         order = 1,
         regexes = {"^.+$"},
         description = "Spanner Table",
-        helpText = "Spanner Table to read from")
+        helpText = "The Spanner table to read the data from.")
     ValueProvider<String> getSpannerTable();
 
     @SuppressWarnings("unused")
@@ -95,7 +95,7 @@ public interface SpannerReadOptions extends PipelineOptions {
         order = 2,
         description = "Read data from Cloud Spanner Project Id",
         helpText =
-            "The Google Cloud Project Id of the Cloud Spanner database that you want to read data from")
+            "The ID of the Google Cloud project that contains the Spanner database to read data from.")
     ValueProvider<String> getSpannerProjectId();
 
     @SuppressWarnings("unused")
@@ -105,7 +105,7 @@ public interface SpannerReadOptions extends PipelineOptions {
         order = 3,
         regexes = {".+"},
         description = "Read data from Cloud Spanner Instance",
-        helpText = "Instance of requested table.")
+        helpText = "The instance ID of the requested table.")
     ValueProvider<String> getSpannerInstanceId();
 
     @SuppressWarnings("unused")
@@ -115,7 +115,7 @@ public interface SpannerReadOptions extends PipelineOptions {
         order = 4,
         regexes = {".+"},
         description = "Read data from Cloud Spanner Database ",
-        helpText = "Database of requested table.")
+        helpText = "The database ID of the requested table.")
     ValueProvider<String> getSpannerDatabaseId();
 
     @SuppressWarnings("unused")
@@ -141,10 +141,10 @@ public interface SpannerReadOptions extends PipelineOptions {
         },
         description = "Snapshot time",
         helpText =
-            "If set, specifies the time when the snapshot must be taken."
-                + " String is in the RFC 3339 format in UTC time. "
-                + " Timestamp must be in the past and Maximum timestamp staleness applies."
-                + "https://cloud.google.com/spanner/docs/timestamp-bounds#maximum_timestamp_staleness",
+            "The timestamp that corresponds to the version of the Spanner database that you want to read."
+                + " The timestamp must be specified as per RFC 3339 (https://tools.ietf.org/html/rfc3339) UTC \"Zulu\" format."
+                + " The timestamp must be in the past and"
+                + " Maximum timestamp staleness (https://cloud.google.com/spanner/docs/timestamp-bounds#maximum_timestamp_staleness) applies.",
         example = "1990-12-31T23:59:60Z")
     @Default.String(value = "")
     ValueProvider<String> getSpannerSnapshotTime();
@@ -157,9 +157,10 @@ public interface SpannerReadOptions extends PipelineOptions {
         optional = true,
         description = "Use independent compute resource (Spanner DataBoost).",
         helpText =
-            "Use Spanner on-demand compute so the export job will run on independent compute"
-                + " resources and have no impact to current Spanner workloads. This will incur"
-                + " additional charges in Spanner.")
+            "Set to `true` to use the compute resources of Spanner Data Boost to run the job with near-zero"
+                + " impact on Spanner OLTP workflows. When true, requires the `spanner.databases.useDataBoost` Identity and"
+                + " Access Management (IAM) permission. For more information, see"
+                + " Data Boost overview (https://cloud.google.com/spanner/docs/databoost/databoost-overview).")
     @Default.Boolean(false)
     ValueProvider<Boolean> getDataBoostEnabled();
 

diff --git a/...-to-bigtable/src/main/java/com/google/cloud/teleport/v2/templates/BigQueryToBigtable.java b/...-to-bigtable/src/main/java/com/google/cloud/teleport/v2/templates/BigQueryToBigtable.java
@@ -81,7 +81,7 @@ public interface BigQueryToBigtableOptions
         order = 1,
         regexes = {"[A-Za-z_][A-Za-z_0-9]*"},
         description = "Unique identifier column",
-        helpText = "Name of the BigQuery column storing the unique identifier of the row")
+        helpText = "The name of the BigQuery column storing the unique identifier of the row.")
     @Required
     String getReadIdColumn();