# Demo: Accessing iceberg tables managed by watsonx.data using Databricks Spark on AWS 

In [0]:
# let's create a table dbr_unity_table within default unity catalog
spark.sql("create table if not exists dbr_unity_table (id int)");

In [0]:
# Show tables in the catalog. This connects to databricks Unity catalog by default
spark.sql("show tables").show()

+--------+---------------+-----------+
|database|      tableName|isTemporary|
+--------+---------------+-----------+
| default|dbr_unity_table|      false|
+--------+---------------+-----------+



In [0]:

spark.sql("insert into dbr_unity_table values (1), (2)").show()

print("SELECT * FROM spark_catalog.default.dbr_unity_table")
spark.sql("SELECT * FROM dbr_unity_table").show()

+-----------------+-----------------+
|num_affected_rows|num_inserted_rows|
+-----------------+-----------------+
|                2|                2|
+-----------------+-----------------+

SELECT * FROM spark_catalog.default.dbr_unity_table
+---+
| id|
+---+
|  1|
|  2|
|  1|
|  2|
+---+



#### Set watsonx.data metastore as the metastore for the default catalog

In [0]:
# Set confidential configurations
spark.conf.set("spark.sql.catalog.iceberg_data_demo.header.Authorization","Bearer <token>");
spark.conf.set("spark.sql.catalog.iceberg_data_demo.s3.endpoint","https://s3.us-south.cloud-object-storage.appdomain.cloud");
spark.conf.set("spark.sql.catalog.iceberg_data_demo.client.region","us-south");
spark.conf.set("spark.sql.catalog.iceberg_data_demo.s3.access-key-id","<access-key>");
spark.conf.set("spark.sql.catalog.iceberg_data_demo.s3.secret-access-key","<secret-key>");

In [0]:
# Set watsonx.data metastore as the metastore for spark from now on
spark.conf.set("spark.sql.iceberg.vectorization.enabled","false");

spark.conf.set("spark.sql.catalog.<wxd-catalog>","org.apache.iceberg.spark.SparkCatalog");
spark.conf.set("spark.sql.catalog.<wxd-catalog>.type","rest");

spark.conf.set("spark.sql.catalog.<wxd-catalog>.warehouse","<wxd-catalog>");
spark.conf.set("spark.sql.catalog.<wxd-catalog>.s3.path-style-access","true");
# spark.conf.set("spark.sql.catalog.iceberg_data_demo.io-impl","org.apache.iceberg.aws.s3.S3FileIO");


spark.conf.set("spark.sql.catalog.<wxd-catalog>.uri","https://<mds-rest-endpoint>/mds/iceberg");
spark.conf.set("spark.sql.defaultCatalog", "<wxd-catalog>");


#### Now whatever you query will come from watsonx.data catalog

In [0]:
spark.sql("USE <wxd-catalog>")
print("show schemas")
spark.sql("show schemas").show()


print("USE <wxd-schema> and show tables")
spark.sql("USE wxd_demo")
spark.sql("show tables").show()

show schemas
+------------+
|databaseName|
+------------+
|    wxd_demo|
|      sports|
+------------+

USE wxd_demo and show tables
+--------+-------------------+-----------+
|database|          tableName|isTemporary|
+--------+-------------------+-----------+
|wxd_demo|merge_on_read_table|      false|
|wxd_demo|copy_on_write_table|      false|
+--------+-------------------+-----------+



**QUERY SAME ICEBERG TABLE CREATED IN watsonx.data from the Z through Datagate**

In [0]:
print("select * from <wxd-table>")
spark.sql("select * from <wxd-table>").show(truncate=False)

select * from merge_on_read_table
+---+----------------------+
|id |name                  |
+---+----------------------+
|1  |wxd-merge-on-read-data|
|2  |wxd-merge-on-read-data|
+---+----------------------+



**ACCESSING ICEBERG TABLES with mode COPY ON WRITE**

In [0]:
print("select * from copy_on_write_table")
spark.sql("select * from copy_on_write_table").show(truncate=False)

select * from copy_on_write_table
+---+----------------------+
|id |name                  |
+---+----------------------+
|1  |wxd-copy-on-write-data|
|2  |wxd-copy-on-write-data|
+---+----------------------+



**You can also INSERT data into watsonx.data tables using DATABRICKS SPARK**

In [0]:
print("insert into unity_to_wxd VALUES (1, 'unity-data')")
spark.sql("insert into unity_to_wxd VALUES (1, 'unity-data')")

print("select * from unity_to_wxd")
spark.sql("select * from unity_to_wxd").show()

insert into unity_to_wxd VALUES (1, 'unity-data')
select * from unity_to_wxd
+---+----------+
| id|      name|
+---+----------+
|  1|unity-data|
+---+----------+



#### You can also join tables from watsonx.data and what is there in Databricks Unity catalog!

In [0]:
#Referring tables in Unity catalog with fully qualified name as default spark catalog is set to the one in watsonx.data
spark.sql("select * from <wxd-catalog>.<wxd-schema>.<wxd-table> mor JOIN spark_catalog.default.<dbr_unity_table> dbr_unity ON dbr_unity.id=mor.id").show()

+---+--------------------+---+
| id|                name| id|
+---+--------------------+---+
|  1|wxd-merge-on-read...|  1|
|  2|wxd-merge-on-read...|  2|
+---+--------------------+---+



In [0]:
# Clean up the demo

# spark.sql("delete from wxd_demo.unity_to_wxd")
spark.sql("delete from spark_catalog.default.dbr_unity_table")
# spark.sql("delete from wxd_demo.merge_on_read_table")
# spark.sql("delete from wxd_demo.copy_on_write_table")
print ("Cleaned up the tables")

Cleaned up the tables
