forked from apache/beam
-
Notifications
You must be signed in to change notification settings - Fork 0
/
build.gradle
209 lines (189 loc) · 8.61 KB
/
build.gradle
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import groovy.json.JsonOutput
import java.util.stream.Collectors
plugins { id 'org.apache.beam.module' }
applyJavaNature(
automaticModuleName: 'org.apache.beam.sdk.io.hadoop.format',
)
provideIntegrationTestingDependencies()
enableJavaPerformanceTesting()
description = "Apache Beam :: SDKs :: Java :: IO :: Hadoop Format"
ext.summary = "IO to read data from sources and to write data to sinks that implement Hadoop MapReduce Format."
def hadoopVersions = [
"285": "2.8.5",
"292": "2.9.2",
"2102": "2.10.2",
"324": "3.2.4",
]
hadoopVersions.each {kv -> configurations.create("hadoopVersion$kv.key")}
def elastic_search_version = "7.12.0"
// Ban dependencies from the test runtime classpath
configurations.testRuntimeClasspath {
// Prevent a StackOverflow because of wiring LOG4J -> SLF4J -> LOG4J
exclude group: "org.slf4j", module: "log4j-over-slf4j"
}
// Force use the old version of JAMM that cassandra relies on
configurations.all {
resolutionStrategy {
exclude group: "io.github.stephankoelle", module: "jamm"
}
}
dependencies {
implementation project(path: ":sdks:java:core", configuration: "shadow")
implementation library.java.vendored_guava_32_1_2_jre
implementation library.java.slf4j_api
implementation project(":sdks:java:io:hadoop-common")
implementation library.java.joda_time
provided library.java.hadoop_common
provided library.java.hadoop_hdfs
permitUnusedDeclared library.java.hadoop_hdfs
provided library.java.hadoop_hdfs_client
provided library.java.hadoop_mapreduce_client_core
// Ensure that the older version of JAMM that cassandra relies on appears
// on the classpath before the one provided by :sdks:java:core shadowTest.
testImplementation "com.github.jbellis:jamm:0.3.0"
testImplementation project(path: ":sdks:java:core", configuration: "shadowTest")
testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration")
testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration")
testImplementation project(path: ":sdks:java:testing:test-utils", configuration: "testRuntimeMigration")
testImplementation project(":sdks:java:io:jdbc")
testImplementation "org.elasticsearch.plugin:transport-netty4-client:$elastic_search_version"
testImplementation library.java.testcontainers_elasticsearch
testImplementation "org.elasticsearch.client:elasticsearch-rest-high-level-client:$elastic_search_version"
testImplementation "org.elasticsearch:elasticsearch:$elastic_search_version"
testImplementation ("org.elasticsearch:elasticsearch-hadoop:$elastic_search_version") {
// TODO(https://issues.apache.org/jira/browse/BEAM-3715)
// These are all optional deps of elasticsearch-hadoop. Why do they have to be excluded?
exclude group: "cascading", module: "cascading-local"
exclude group: "cascading", module: "cascading-hadoop"
exclude group: "org.apache.hive", module: "hive-service"
exclude group: "org.apache.pig", module: "pig"
exclude group: "org.apache.spark", module: "spark-core_2.10"
exclude group: "org.apache.spark", module: "spark-streaming_2.10"
exclude group: "org.apache.spark", module: "spark-sql_2.10"
exclude group: "org.apache.storm", module: "storm-core"
}
testImplementation "org.apache.httpcomponents:httpclient:4.5.13"
testImplementation library.java.commons_lang3
testImplementation library.java.commons_io
testImplementation library.java.cassandra_driver_core
testImplementation library.java.cassandra_driver_mapping
testImplementation "org.apache.cassandra:cassandra-all:3.11.8"
testImplementation library.java.hadoop_common
testImplementation library.java.hadoop_hdfs
testImplementation library.java.hadoop_mapreduce_client_core
testImplementation library.java.postgres
testImplementation library.java.junit
testImplementation library.java.hamcrest
testImplementation library.java.testcontainers_postgresql
testImplementation library.java.netty_all
testRuntimeOnly library.java.slf4j_jdk14
testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
hadoopVersions.each {kv ->
"hadoopVersion$kv.key" "org.apache.hadoop:hadoop-common:$kv.value"
"hadoopVersion$kv.key" "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value"
"hadoopVersion$kv.key" "org.apache.hadoop:hadoop-hdfs:$kv.value"
"hadoopVersion$kv.key" "org.apache.httpcomponents:httpclient:4.5.13"
}
}
hadoopVersions.each {kv ->
configurations."hadoopVersion$kv.key" {
resolutionStrategy {
force "org.apache.hadoop:hadoop-common:$kv.value"
force "org.apache.hadoop:hadoop-mapreduce-client-core:$kv.value"
force "org.apache.hadoop:hadoop-hdfs:$kv.value"
// without forcing httpclient httpcore gets below 4.4.9 which has incompatible API
force "org.apache.httpcomponents:httpclient:4.5.13"
}
}
}
// Hadoop dependencies require old version of Guava (BEAM-11626)
configurations.all (Configuration it) -> {
// error-prone requires newer guava, don't override for annotation processing
// https://github.com/google/error-prone/issues/2745
if (it.name == "annotationProcessor" || it.name =="testAnnotationProcessor") {
return
}
resolutionStrategy {
force 'com.google.guava:guava:25.1-jre'
}
}
// The cassandra.yaml file currently assumes "target/..." exists.
// TODO: Update cassandra.yaml to inject new properties representing
// the root path. Also migrate cassandra.yaml to use any open ports
// instead of a static port.
task createTargetDirectoryForCassandra() {
doLast {
if (!project.file("target").exists()) {
project.file("target").mkdirs()
}
}
}
test.dependsOn createTargetDirectoryForCassandra
task hadoopVersionsTest(group: "Verification") {
description = "Runs Hadoop format tests with different Hadoop versions"
dependsOn createTaskNames(hadoopVersions, "Test")
dependsOn createTaskNames(hadoopVersions, "IT")
dependsOn createTaskNames(hadoopVersions, "ElasticIT")
}
hadoopVersions.each {kv ->
task "runHadoopFormatIO${kv.key}ElasticIT"(type: Test, group: "Verification") {
description = "Runs HadoopFormatIOElasticIT with Hadoop '${kv.value}'"
outputs.upToDateWhen { false }
testClassesDirs = sourceSets.test.output.classesDirs
classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath
def pipelineOptions = JsonOutput.toJson(["--withTestcontainers=true"])
systemProperty "beamTestPipelineOptions", pipelineOptions
include '**/HadoopFormatIOElasticIT.class'
}
}
hadoopVersions.each {kv ->
task "runHadoopFormatIO${kv.key}IT"(type: Test, group: "Verification") {
description = "Runs HadoopFormatIOIT with Hadoop '${kv.value}'"
outputs.upToDateWhen { false }
testClassesDirs = sourceSets.test.output.classesDirs
classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath
def pipelineOptions = JsonOutput.toJson([
"--postgresServerName=dummy_value",
"--postgresUsername=postgres",
"--postgresDatabaseName=postgres",
"--postgresPassword=postgres",
"--numberOfRecords=1000",
"--withTestcontainers=true",
"--postgresSsl=false",
])
systemProperty "beamTestPipelineOptions", pipelineOptions
include '**/HadoopFormatIOIT.class'
}
}
hadoopVersions.each{kv ->
task "runHadoopFormatIO${kv.key}Test"(type: Test, group: "Verification") {
description = "Runs HadoopFormatIO tests with Hadoop '${kv.value}'"
classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath
outputs.upToDateWhen { false }
include '**/*Test.class'
// Cassandra test cannot run in parallel
exclude '**/HadoopFormatIOCassandraTest.class'
}
}
static def createTaskNames(Map<String, String> hadoopVersions, String suffix) {
return hadoopVersions.keySet().stream()
.map{num -> "runHadoopFormatIO$num$suffix"}
.collect(Collectors.toList())
}