24
24
import io .quarkus .test .junit .QuarkusIntegrationTest ;
25
25
import java .io .File ;
26
26
import java .nio .file .Path ;
27
- import java .util .Arrays ;
28
27
import java .util .List ;
29
28
import org .apache .commons .io .FileUtils ;
30
29
import org .apache .polaris .service .it .env .IntegrationTestsHelper ;
31
- import org .apache .spark .sql .AnalysisException ;
32
- import org .apache .spark .sql .Dataset ;
33
- import org .apache .spark .sql .Row ;
34
- import org .apache .spark .sql .RowFactory ;
35
30
import org .apache .spark .sql .SparkSession ;
36
- import org .apache .spark .sql .types .DataTypes ;
37
- import org .apache .spark .sql .types .Metadata ;
38
- import org .apache .spark .sql .types .StructField ;
39
- import org .apache .spark .sql .types .StructType ;
40
31
import org .junit .jupiter .api .AfterEach ;
41
32
import org .junit .jupiter .api .BeforeEach ;
42
33
import org .junit .jupiter .api .Test ;
@@ -48,28 +39,30 @@ public class SparkHudiIT extends SparkIntegrationBase {
48
39
@ Override
49
40
protected SparkSession .Builder withCatalog (SparkSession .Builder builder , String catalogName ) {
50
41
return builder
51
- .config (
52
- "spark.sql.extensions" ,
53
- "org.apache.spark.sql.hudi.HoodieSparkSessionExtension" )
54
- .config (
55
- "spark.sql.catalog.spark_catalog" , "org.apache.spark.sql.hudi.catalog.HoodieCatalog" )
56
- .config (
57
- String .format ("spark.sql.catalog.%s" , catalogName ),
58
- "org.apache.polaris.spark.SparkCatalog" )
59
- .config ("spark.sql.warehouse.dir" , warehouseDir .toString ())
60
- .config (String .format ("spark.sql.catalog.%s.type" , catalogName ), "rest" )
61
- .config (
62
- String .format ("spark.sql.catalog.%s.uri" , catalogName ),
63
- endpoints .catalogApiEndpoint ().toString ())
64
- .config (String .format ("spark.sql.catalog.%s.warehouse" , catalogName ), catalogName )
65
- .config (String .format ("spark.sql.catalog.%s.scope" , catalogName ), "PRINCIPAL_ROLE:ALL" )
66
- .config (
67
- String .format ("spark.sql.catalog.%s.header.realm" , catalogName ), endpoints .realmId ())
68
- .config (String .format ("spark.sql.catalog.%s.token" , catalogName ), sparkToken )
69
- .config (String .format ("spark.sql.catalog.%s.s3.access-key-id" , catalogName ), "fakekey" )
70
- .config (
71
- String .format ("spark.sql.catalog.%s.s3.secret-access-key" , catalogName ), "fakesecret" )
72
- .config (String .format ("spark.sql.catalog.%s.s3.region" , catalogName ), "us-west-2" );
42
+ .config ("spark.sql.extensions" , "org.apache.spark.sql.hudi.HoodieSparkSessionExtension" )
43
+ .config (
44
+ "spark.sql.catalog.spark_catalog" , "org.apache.spark.sql.hudi.catalog.HoodieCatalog" )
45
+ .config (
46
+ String .format ("spark.sql.catalog.%s" , catalogName ),
47
+ "org.apache.polaris.spark.SparkCatalog" )
48
+ .config ("spark.sql.warehouse.dir" , warehouseDir .toString ())
49
+ .config (String .format ("spark.sql.catalog.%s.type" , catalogName ), "rest" )
50
+ .config (
51
+ String .format ("spark.sql.catalog.%s.uri" , catalogName ),
52
+ endpoints .catalogApiEndpoint ().toString ())
53
+ .config (String .format ("spark.sql.catalog.%s.warehouse" , catalogName ), catalogName )
54
+ .config (String .format ("spark.sql.catalog.%s.scope" , catalogName ), "PRINCIPAL_ROLE:ALL" )
55
+ .config (
56
+ String .format ("spark.sql.catalog.%s.header.realm" , catalogName ), endpoints .realmId ())
57
+ .config (String .format ("spark.sql.catalog.%s.token" , catalogName ), sparkToken )
58
+ .config (String .format ("spark.sql.catalog.%s.s3.access-key-id" , catalogName ), "fakekey" )
59
+ .config (
60
+ String .format ("spark.sql.catalog.%s.s3.secret-access-key" , catalogName ), "fakesecret" )
61
+ .config (String .format ("spark.sql.catalog.%s.s3.region" , catalogName ), "us-west-2" )
62
+ .config ("spark.serializer" , "org.apache.spark.serializer.KryoSerializer" )
63
+ .config ("spark.kryo.registrator" , "org.apache.spark.HoodieSparkKryoRegistrar" )
64
+ // for intial integration test have disabled for now, to revisit enabling in future
65
+ .config ("hoodie.metadata.enable" , "false" );
73
66
}
74
67
75
68
private String defaultNs ;
@@ -85,7 +78,7 @@ private String getTableNameWithRandomSuffix() {
85
78
86
79
@ BeforeEach
87
80
public void createDefaultResources (@ TempDir Path tempDir ) {
88
- spark .sparkContext ().setLogLevel ("WARN " );
81
+ spark .sparkContext ().setLogLevel ("INFO " );
89
82
defaultNs = generateName ("hudi" );
90
83
// create a default namespace
91
84
sql ("CREATE NAMESPACE %s" , defaultNs );
@@ -110,7 +103,7 @@ public void testBasicTableOperations() {
110
103
"CREATE TABLE %s (id INT, name STRING) USING HUDI LOCATION '%s'" ,
111
104
huditb1 , getTableLocation (huditb1 ));
112
105
sql ("INSERT INTO %s VALUES (1, 'anna'), (2, 'bob')" , huditb1 );
113
- List <Object []> results = sql ("SELECT * FROM %s WHERE id > 1 ORDER BY id DESC" , huditb1 );
106
+ List <Object []> results = sql ("SELECT id,name FROM %s WHERE id > 1 ORDER BY id DESC" , huditb1 );
114
107
assertThat (results .size ()).isEqualTo (1 );
115
108
assertThat (results .get (0 )).isEqualTo (new Object [] {2 , "bob" });
116
109
@@ -145,63 +138,6 @@ public void testBasicTableOperations() {
145
138
assertThat (tables .size ()).isEqualTo (0 );
146
139
}
147
140
148
- @ Test
149
- public void testAlterOperations () {
150
- String huditb = getTableNameWithRandomSuffix ();
151
- sql (
152
- "CREATE TABLE %s (id INT, name STRING) USING HUDI LOCATION '%s'" ,
153
- huditb , getTableLocation (huditb ));
154
- sql ("INSERT INTO %s VALUES (1, 'anna'), (2, 'bob')" , huditb );
155
-
156
- // test alter columns
157
- // add two new columns to the table
158
- sql ("Alter TABLE %s ADD COLUMNS (city STRING, age INT)" , huditb );
159
- // add one more row to the table
160
- sql ("INSERT INTO %s VALUES (3, 'john', 'SFO', 20)" , huditb );
161
- // verify the table now have 4 columns with correct result
162
- List <Object []> results = sql ("SELECT * FROM %s ORDER BY id" , huditb );
163
- assertThat (results .size ()).isEqualTo (3 );
164
- assertThat (results ).contains (new Object [] {1 , "anna" , null , null });
165
- assertThat (results ).contains (new Object [] {2 , "bob" , null , null });
166
- assertThat (results ).contains (new Object [] {3 , "john" , "SFO" , 20 });
167
-
168
- // drop and rename column require set the hoodie.keep.max.commits property
169
- sql ("ALTER TABLE %s SET TBLPROPERTIES ('hoodie.keep.max.commits' = '50')" , huditb );
170
- // drop column age
171
- sql ("Alter TABLE %s DROP COLUMN age" , huditb );
172
- // verify the table now have 3 columns with correct result
173
- results = sql ("SELECT * FROM %s ORDER BY id" , huditb );
174
- assertThat (results .size ()).isEqualTo (3 );
175
- assertThat (results ).contains (new Object [] {1 , "anna" , null });
176
- assertThat (results ).contains (new Object [] {2 , "bob" , null });
177
- assertThat (results ).contains (new Object [] {3 , "john" , "SFO" });
178
-
179
- // rename column city to address
180
- sql ("Alter TABLE %s RENAME COLUMN city TO address" , huditb );
181
- // verify column address exists
182
- results = sql ("SELECT id, address FROM %s ORDER BY id" , huditb );
183
- assertThat (results .size ()).isEqualTo (3 );
184
- assertThat (results ).contains (new Object [] {1 , null });
185
- assertThat (results ).contains (new Object [] {2 , null });
186
- assertThat (results ).contains (new Object [] {3 , "SFO" });
187
-
188
- // test alter properties
189
- sql (
190
- "ALTER TABLE %s SET TBLPROPERTIES ('description' = 'people table', 'test-owner' = 'test-user')" ,
191
- huditb );
192
- List <Object []> tableInfo = sql ("DESCRIBE TABLE EXTENDED %s" , huditb );
193
- // find the table properties result
194
- String properties = null ;
195
- for (Object [] info : tableInfo ) {
196
- if (info [0 ].equals ("Table Properties" )) {
197
- properties = (String ) info [1 ];
198
- break ;
199
- }
200
- }
201
- assertThat (properties ).contains ("description=people table,test-owner=test-user" );
202
- sql ("DROP TABLE %s" , huditb );
203
- }
204
-
205
141
@ Test
206
142
public void testUnsupportedAlterTableOperations () {
207
143
String huditb = getTableNameWithRandomSuffix ();
@@ -215,7 +151,7 @@ public void testUnsupportedAlterTableOperations() {
215
151
216
152
// ALTER TABLE ... SET LOCATION ... fails
217
153
assertThatThrownBy (() -> sql ("ALTER TABLE %s SET LOCATION '/tmp/new/path'" , huditb ))
218
- .isInstanceOf (AnalysisException .class );
154
+ .isInstanceOf (UnsupportedOperationException .class );
219
155
220
156
sql ("DROP TABLE %s" , huditb );
221
157
}
@@ -235,50 +171,4 @@ public void testUnsupportedTableCreateOperations() {
235
171
huditb , getTableLocation (huditb )))
236
172
.isInstanceOf (IllegalArgumentException .class );
237
173
}
238
-
239
- @ Test
240
- public void testDataframeSaveOperations () {
241
- List <Row > data = Arrays .asList (RowFactory .create ("Alice" , 30 ), RowFactory .create ("Bob" , 25 ));
242
- StructType schema =
243
- new StructType (
244
- new StructField [] {
245
- new StructField ("name" , DataTypes .StringType , false , Metadata .empty ()),
246
- new StructField ("age" , DataTypes .IntegerType , false , Metadata .empty ())
247
- });
248
- Dataset <Row > df = spark .createDataFrame (data , schema );
249
-
250
- String huditb = getTableNameWithRandomSuffix ();
251
- // saveAsTable requires support for hudi requires CTAS support for third party catalog
252
- // in hudi catalog, which is currently not supported.
253
- assertThatThrownBy (
254
- () ->
255
- df .write ()
256
- .format ("hudi" )
257
- .option ("path" , getTableLocation (huditb ))
258
- .saveAsTable (huditb ))
259
- .isInstanceOf (IllegalArgumentException .class );
260
-
261
- // verify regular dataframe saving still works
262
- df .write ().format ("hudi" ).save (getTableLocation (huditb ));
263
-
264
- // verify the partition dir is created
265
- List <String > subDirs = listDirs (getTableLocation (huditb ));
266
- assertThat (subDirs ).contains (".hoodie" );
267
-
268
- // verify we can create a table out of the exising hudi location
269
- sql ("CREATE TABLE %s USING HUDI LOCATION '%s'" , huditb , getTableLocation (huditb ));
270
- List <Object []> tables = sql ("SHOW TABLES" );
271
- assertThat (tables .size ()).isEqualTo (1 );
272
- assertThat (tables ).contains (new Object [] {defaultNs , huditb , false });
273
-
274
- sql ("INSERT INTO %s VALUES ('Anna', 11)" , huditb );
275
-
276
- List <Object []> results = sql ("SELECT * FROM %s ORDER BY name" , huditb );
277
- assertThat (results .size ()).isEqualTo (3 );
278
- assertThat (results .get (0 )).isEqualTo (new Object [] {"Alice" , 30 });
279
- assertThat (results .get (1 )).isEqualTo (new Object [] {"Anna" , 11 });
280
- assertThat (results .get (2 )).isEqualTo (new Object [] {"Bob" , 25 });
281
-
282
- sql ("DROP TABLE %s" , huditb );
283
- }
284
174
}
0 commit comments