GoogleCloudPlatform · tgroh · Dec 13, 2016 · Dec 13, 2016 · Dec 13, 2016
diff --git a/checkstyle.xml b/checkstyle.xml
@@ -180,6 +180,9 @@ page at http://checkstyle.sourceforge.net/config.html -->
       <property name="allowUndeclaredRTE" value="true"/>
     </module>
 
+    <!-- Check that paragraph tags are used correctly in Javadoc. -->
+    <module name="JavadocParagraph"/>
+
     <module name="JavadocType">
       <property name="scope" value="protected"/>
       <property name="severity" value="error"/>

diff --git a/contrib/kafka/src/main/java/com/google/cloud/dataflow/contrib/kafka/KafkaIO.java b/contrib/kafka/src/main/java/com/google/cloud/dataflow/contrib/kafka/KafkaIO.java
@@ -106,7 +106,7 @@
  * <p>Although most applications consumer single topic, the source can be configured to consume
  * multiple topics or even a specific set of {@link TopicPartition}s.
  *
- * <p> To configure a Kafka source, you must specify at the minimum Kafka <tt>bootstrapServers</tt>
+ * <p>To configure a Kafka source, you must specify at the minimum Kafka <tt>bootstrapServers</tt>
  * and one or more topics to consume. The following example illustrates various options for
  * configuring the source :
  *
@@ -153,7 +153,7 @@
  *
  * <h3>Writing to Kafka</h3>
  *
- * KafkaIO sink supports writing key-value pairs to a Kafka topic. Users can also write just the
+ * <p>KafkaIO sink supports writing key-value pairs to a Kafka topic. Users can also write just the
  * values. To configure a Kafka sink, you must specify at the minimum Kafka
  * <tt>bootstrapServers</tt> and the topic to write to. The following example illustrates various
  * options for configuring the sink:
@@ -175,7 +175,7 @@
  *    );
  * }</pre>
  *
- * Often you might want to write just values without any keys to Kafka. Use {@code values()} to
+ * <p>Often you might want to write just values without any keys to Kafka. Use {@code values()} to
  * write records with default empty(null) key:
  *
  * <pre>{@code
@@ -578,7 +578,7 @@ private TypedRead(
     }
 
     /**
-     * Creates an {@link UnboundedSource<KafkaRecord<K, V>, ?>} with the configuration in {@link
+     * Creates an {@link UnboundedSource} with the configuration in {@link
      * TypedRead}. Primary use case is unit tests, should not be used in an application.
      */
     @VisibleForTesting
@@ -751,7 +751,7 @@ public int compare(TopicPartition tp1, TopicPartition tp2) {
     /**
      * Returns one split for each of the Kafka partitions.
      *
-     * <p> It is important to sort the partitions deterministically so that we can support
+     * <p>It is important to sort the partitions deterministically so that we can support
      * resuming a split from last checkpoint. The Kafka partitions are sorted by {@code <topic,
      * partition>}.
      */

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
@@ -66,26 +66,26 @@
  * data into {@link Window windows} to be processed, and demonstrates using various kinds of {@link
  * Trigger triggers} to control when the results for each window are emitted.
  *
- * <p> This example uses a portion of real traffic data from San Diego freeways. It contains
+ * <p>This example uses a portion of real traffic data from San Diego freeways. It contains
  * readings from sensor stations set up along each freeway. Each sensor reading includes a
  * calculation of the 'total flow' across all lanes in that freeway direction.
  *
- * <p> Concepts:
+ * <p>Concepts:
  * <pre>
  *   1. The default triggering behavior
  *   2. Late data with the default trigger
  *   3. How to get speculative estimates
  *   4. Combining late data and speculative estimates
  * </pre>
  *
- * <p> Before running this example, it will be useful to familiarize yourself with Dataflow triggers
+ * <p>Before running this example, it will be useful to familiarize yourself with Dataflow triggers
  * and understand the concept of 'late data',
  * See:  <a href="https://cloud.google.com/dataflow/model/triggers">
  * https://cloud.google.com/dataflow/model/triggers </a> and
  * <a href="https://cloud.google.com/dataflow/model/windowing#Advanced">
  * https://cloud.google.com/dataflow/model/windowing#Advanced </a>
  *
- * <p> The example pipeline reads data from a Pub/Sub topic. By default, running the example will
+ * <p>The example pipeline reads data from a Pub/Sub topic. By default, running the example will
  * also run an auxiliary pipeline to inject data from the default {@code --input} file to the
  * {@code --pubsubTopic}. The auxiliary pipeline puts a timestamp on the injected data so that the
  * example pipeline can operate on <i>event time</i> (rather than arrival time). The auxiliary
@@ -94,24 +94,24 @@
  * choosing or set {@code --input=""} which will disable the automatic Pub/Sub injection, and allow
  * you to use a separate tool to publish to the given topic.
  *
- * <p> The example is configured to use the default Pub/Sub topic and the default BigQuery table
+ * <p>The example is configured to use the default Pub/Sub topic and the default BigQuery table
  * from the example common package (there are no defaults for a general Dataflow pipeline).
  * You can override them by using the {@code --pubsubTopic}, {@code --bigQueryDataset}, and
  * {@code --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
  * the example will try to create them.
  *
- * <p> The pipeline outputs its results to a BigQuery table.
+ * <p>The pipeline outputs its results to a BigQuery table.
  * Here are some queries you can use to see interesting results:
  * Replace {@code <enter_table_name>} in the query below with the name of the BigQuery table.
  * Replace {@code <enter_window_interval>} in the query below with the window interval.
  *
- * <p> To see the results of the default trigger,
+ * <p>To see the results of the default trigger,
  * Note: When you start up your pipeline, you'll initially see results from 'late' data. Wait after
  * the window duration, until the first pane of non-late data has been emitted, to see more
  * interesting results.
  * {@code SELECT * FROM enter_table_name WHERE trigger_type = "default" ORDER BY window DESC}
  *
- * <p> To see the late data i.e. dropped by the default trigger,
+ * <p>To see the late data i.e. dropped by the default trigger,
  * {@code SELECT * FROM <enter_table_name> WHERE trigger_type = "withAllowedLateness" and
  * (timing = "LATE" or timing = "ON_TIME") and freeway = "5" ORDER BY window DESC, processing_time}
  *
@@ -120,23 +120,23 @@
  * (trigger_type = "withAllowedLateness" or trigger_type = "sequential") and freeway = "5" ORDER BY
  * window DESC, processing_time}
  *
- * <p> To see speculative results every minute,
+ * <p>To see speculative results every minute,
  * {@code SELECT * FROM <enter_table_name> WHERE trigger_type = "speculative" and freeway = "5"
  * ORDER BY window DESC, processing_time}
  *
- * <p> To see speculative results every five minutes after the end of the window
+ * <p>To see speculative results every five minutes after the end of the window
  * {@code SELECT * FROM <enter_table_name> WHERE trigger_type = "sequential" and timing != "EARLY"
  * and freeway = "5" ORDER BY window DESC, processing_time}
  *
- * <p> To see the first and the last pane for a freeway in a window for all the trigger types,
+ * <p>To see the first and the last pane for a freeway in a window for all the trigger types,
  * {@code SELECT * FROM <enter_table_name> WHERE (isFirst = true or isLast = true) ORDER BY window}
  *
- * <p> To reduce the number of results for each query we can add additional where clauses.
+ * <p>To reduce the number of results for each query we can add additional where clauses.
  * For examples, To see the results of the default trigger,
  * {@code SELECT * FROM <enter_table_name> WHERE trigger_type = "default" AND freeway = "5" AND
  * window = "<enter_window_interval>"}
  *
- * <p> The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
+ * <p>The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
  * and then exits.
  */
 
@@ -170,13 +170,13 @@ public class TriggerExample {
    * 5             | 60                 | 10:27:20   | 10:27:25
    * 5             | 60                 | 10:29:00   | 11:11:00
    *
-   * <p> Dataflow tracks a watermark which records up to what point in event time the data is
+   * <p>Dataflow tracks a watermark which records up to what point in event time the data is
    * complete. For the purposes of the example, we'll assume the watermark is approximately 15m
    * behind the current processing time. In practice, the actual value would vary over time based
    * on the systems knowledge of the current PubSub delay and contents of the backlog (data
    * that has not yet been processed).
    *
-   * <p> If the watermark is 15m behind, then the window [10:00:00, 10:30:00) (in event time) would
+   * <p>If the watermark is 15m behind, then the window [10:00:00, 10:30:00) (in event time) would
    * close at 10:44:59, when the watermark passes 10:30:00.
    */
   static class CalculateTotalFlow

diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java
@@ -65,21 +65,21 @@
  * New concepts: session windows and finding session duration; use of both
  * singleton and non-singleton side inputs.
  *
- * <p> This pipeline builds on the {@link LeaderBoard} functionality, and adds some "business
+ * <p>This pipeline builds on the {@link LeaderBoard} functionality, and adds some "business
  * intelligence" analysis: abuse detection and usage patterns. The pipeline derives the Mean user
  * score sum for a window, and uses that information to identify likely spammers/robots. (The robots
  * have a higher click rate than the human users). The 'robot' users are then filtered out when
  * calculating the team scores.
  *
- * <p> Additionally, user sessions are tracked: that is, we find bursts of user activity using
+ * <p>Additionally, user sessions are tracked: that is, we find bursts of user activity using
  * session windows. Then, the mean session duration information is recorded in the context of
  * subsequent fixed windowing. (This could be used to tell us what games are giving us greater
  * user retention).
  *
- * <p> Run {@link injector.Injector} to generate pubsub data for this pipeline.  The Injector
+ * <p>Run {@link injector.Injector} to generate pubsub data for this pipeline.  The Injector
  * documentation provides more detail.
  *
- * <p> To execute this pipeline using the Dataflow service, specify the pipeline configuration
+ * <p>To execute this pipeline using the Dataflow service, specify the pipeline configuration
  * like this:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID

diff --git a/...ples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java b/...ples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
@@ -46,7 +46,7 @@
  * domain, following {@link UserScore}. In addition to the concepts introduced in {@link UserScore},
  * new concepts include: windowing and element timestamps; use of {@code Filter.byPredicate()}.
  *
- * <p> This pipeline processes data collected from gaming events in batch, building on {@link
+ * <p>This pipeline processes data collected from gaming events in batch, building on {@link
  * UserScore} but using fixed windows. It calculates the sum of scores per team, for each window,
  * optionally allowing specification of two timestamps before and after which data is filtered out.
  * This allows a model where late data collected after the intended analysis window can be included,
@@ -55,7 +55,7 @@
  * {@link UserScore} pipeline. However, our batch processing is high-latency, in that we don't get
  * results from plays at the beginning of the batch's time period until the batch is processed.
  *
- * <p> To execute this pipeline using the Dataflow service, specify the pipeline configuration
+ * <p>To execute this pipeline using the Dataflow service, specify the pipeline configuration
  * like this:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
@@ -66,7 +66,7 @@
  * </pre>
  * where the BigQuery dataset you specify must already exist.
  *
- * <p> Optionally include {@code --input} to specify the batch input file path.
+ * <p>Optionally include {@code --input} to specify the batch input file path.
  * To indicate a time after which the data should be filtered out, include the
  * {@code --stopMin} arg. E.g., {@code --stopMin=2015-10-18-23-59} indicates that any data
  * timestamped after 23:59 PST on 2015-10-18 should not be included in the analysis.

diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java
@@ -57,26 +57,26 @@
  * early/speculative results; using .accumulatingFiredPanes() to do cumulative processing of late-
  * arriving data.
  *
- * <p> This pipeline processes an unbounded stream of 'game events'. The calculation of the team
+ * <p>This pipeline processes an unbounded stream of 'game events'. The calculation of the team
  * scores uses fixed windowing based on event time (the time of the game play event), not
  * processing time (the time that an event is processed by the pipeline). The pipeline calculates
  * the sum of scores per team, for each window. By default, the team scores are calculated using
  * one-hour windows.
  *
- * <p> In contrast-- to demo another windowing option-- the user scores are calculated using a
+ * <p>In contrast-- to demo another windowing option-- the user scores are calculated using a
  * global window, which periodically (every ten minutes) emits cumulative user score sums.
  *
- * <p> In contrast to the previous pipelines in the series, which used static, finite input data,
+ * <p>In contrast to the previous pipelines in the series, which used static, finite input data,
  * here we're using an unbounded data source, which lets us provide speculative results, and allows
  * handling of late data, at much lower latency. We can use the early/speculative results to keep a
  * 'leaderboard' updated in near-realtime. Our handling of late data lets us generate correct
  * results, e.g. for 'team prizes'. We're now outputting window results as they're
  * calculated, giving us much lower latency than with the previous batch examples.
  *
- * <p> Run {@link injector.Injector} to generate pubsub data for this pipeline.  The Injector
+ * <p>Run {@link injector.Injector} to generate pubsub data for this pipeline.  The Injector
  * documentation provides more detail on how to do this.
  *
- * <p> To execute this pipeline using the Dataflow service, specify the pipeline configuration
+ * <p>To execute this pipeline using the Dataflow service, specify the pipeline configuration
  * like this:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID

diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/UserScore.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/UserScore.java
@@ -49,15 +49,15 @@
  * BigQuery; using standalone DoFns; use of the sum by key transform; examples of
  * Java 8 lambda syntax.
  *
- * <p> In this gaming scenario, many users play, as members of different teams, over the course of a
+ * <p>In this gaming scenario, many users play, as members of different teams, over the course of a
  * day, and their actions are logged for processing.  Some of the logged game events may be late-
  * arriving, if users play on mobile devices and go transiently offline for a period.
  *
- * <p> This pipeline does batch processing of data collected from gaming events. It calculates the
+ * <p>This pipeline does batch processing of data collected from gaming events. It calculates the
  * sum of scores per user, over an entire batch of gaming data (collected, say, for each day). The
  * batch processing will not include any late data that arrives after the day's cutoff point.
  *
- * <p> To execute this pipeline using the Dataflow service and static example input data, specify
+ * <p>To execute this pipeline using the Dataflow service and static example input data, specify
  * the pipeline configuration like this:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
@@ -68,7 +68,7 @@
  * </pre>
  * where the BigQuery dataset you specify must already exist.
  *
- * <p> Optionally include the --input argument to specify a batch input file.
+ * <p>Optionally include the --input argument to specify a batch input file.
  * See the --input default value for example batch data file, or use {@link injector.Injector} to
  * generate your own batch data.
   */

diff --git a/...es/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java b/...es/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
@@ -41,22 +41,22 @@
  * This is a generator that simulates usage data from a mobile game, and either publishes the data
  * to a pubsub topic or writes it to a file.
  *
- * <p> The general model used by the generator is the following. There is a set of teams with team
+ * <p>The general model used by the generator is the following. There is a set of teams with team
  * members. Each member is scoring points for their team. After some period, a team will dissolve
  * and a new one will be created in its place. There is also a set of 'Robots', or spammer users.
  * They hop from team to team. The robots are set to have a higher 'click rate' (generate more
  * events) than the regular team members.
  *
- * <p> Each generated line of data has the following form:
+ * <p>Each generated line of data has the following form:
  * username,teamname,score,timestamp_in_ms,readable_time
  * e.g.:
  * user2_AsparagusPig,AsparagusPig,10,1445230923951,2015-11-02 09:09:28.224
  *
- * <p> The Injector writes either to a PubSub topic, or a file. It will use the PubSub topic if
+ * <p>The Injector writes either to a PubSub topic, or a file. It will use the PubSub topic if
  * specified. It takes the following arguments:
  * {@code Injector project-name (topic-name|none) (filename|none)}.
  *
- * <p> To run the Injector in the mode where it publishes to PubSub, you will need to authenticate
+ * <p>To run the Injector in the mode where it publishes to PubSub, you will need to authenticate
  * locally using project-based service account credentials to avoid running over PubSub
  * quota.
  * See https://developers.google.com/identity/protocols/application-default-credentials
@@ -75,7 +75,7 @@
  * </pre>
  * The pubsub topic will be created if it does not exist.
  *
- * <p> To run the injector in write-to-file-mode, set the topic name to "none" and specify the
+ * <p>To run the injector in write-to-file-mode, set the topic name to "none" and specify the
  * filename:
  * <pre>{@code
  * Injector <project-name> none <filename>

diff --git a/...pes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java b/...pes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
@@ -158,7 +158,7 @@ public void processElement(ProcessContext c) {
    * <p>Inherits standard configuration options and all options defined in
    * {@link WordCount.WordCountOptions}.
    */
-  public static interface WordCountOptions extends WordCount.WordCountOptions {
+  public interface WordCountOptions extends WordCount.WordCountOptions {
 
     @Description("Regex filter pattern to use in DebuggingWordCount. "
         + "Only words matching this pattern will be counted.")

diff --git a/...ypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java b/...ypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java
@@ -178,7 +178,7 @@ private static TableReference getTableReference(Options options) {
    * table and the PubSub topic, as well as the {@link WordCount.WordCountOptions} support for
    * specification of the input file.
    */
-  public static interface Options
+  public interface Options
         extends WordCount.WordCountOptions, DataflowExampleUtils.DataflowExampleUtilsOptions {
     @Description("Fixed window duration, in minutes")
     @Default.Integer(WINDOW_SIZE)

diff --git a/...n-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java b/...n-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java
@@ -158,7 +158,7 @@ public PCollection<KV<String, Long>> apply(PCollection<String> lines) {
    *
    * <p>Inherits standard configuration options.
    */
-  public static interface WordCountOptions extends PipelineOptions {
+  public interface WordCountOptions extends PipelineOptions {
     @Description("Path of the file to read from")
     @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
     String getInputFile();
@@ -172,7 +172,7 @@ public static interface WordCountOptions extends PipelineOptions {
     /**
      * Returns "gs://${YOUR_STAGING_DIRECTORY}/counts.txt" as the default destination.
      */
-    public static class OutputFactory implements DefaultValueFactory<String> {
+    class OutputFactory implements DefaultValueFactory<String> {
       @Override
       public String create(PipelineOptions options) {
         DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);

diff --git a/...les/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java b/...les/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java
@@ -67,7 +67,7 @@ public class DataflowExampleUtils {
   /**
    * Define an interface that supports the PubSub and BigQuery example options.
    */
-  public static interface DataflowExampleUtilsOptions
+  public interface DataflowExampleUtilsOptions
         extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
   }