SQL for creating a Delta table from existing json data and then querying it

In [0]:
spark.sql("USE CATALOG dataops_dev")
spark.sql("USE schema_test")

DataFrame[]

In [0]:
%sql
-- Create a Delta table
CREATE OR REPLACE TABLE sales_delta_json
USING DELTA
AS
SELECT * FROM read_files(
  '/Volumes/dataops_dev/schema_test/volume_test/delta_tables/json/',
  format => 'json',
  multiline => 'true'
);

-- Query the table
SELECT SUM(money) AS total_money
FROM sales_delta_json;

total_money
17600


SQL for creating a Delta table from existing csv data and then querying it

In [0]:
%sql

-- Create a Delta table
CREATE OR REPLACE TABLE sales_delta_csv
USING DELTA
AS
SELECT * FROM read_files(
  '/Volumes/dataops_dev/schema_test/volume_test/delta_tables/csv/',
  format => 'csv',
  header => 'true'
);

-- Query the table
SELECT SUM(money) AS total_money
FROM sales_delta_csv;

total_money
17600


PySpark for creating a Delta table from existing csv data and then querying it

In [0]:
# Read raw csv files
df = (
    spark.read
    .option("header", True)
    .csv("/Volumes/dataops_dev/schema_test/volume_test/delta_tables/csv/")
)

# Write as Delta table
df.write.format("delta").mode("overwrite").option("mergeSchema", "true").saveAsTable("sales_delta_pyspark_csv")

# Query
result = spark.sql("SELECT sum(money) FROM sales_delta_pyspark_csv")
result.show()

+----------+
|sum(money)|
+----------+
|   17600.0|
+----------+



PySpark for creating a Delta table from existing json data and then querying it

In [0]:
# Read raw json files,
df = (
    spark.read
    .option("multiline", True)
    .json("/Volumes/dataops_dev/schema_test/volume_test/delta_tables/json/")
)

# Write as Delta table
df.write.format("delta").mode("overwrite").saveAsTable("sales_delta_pyspark_json")

# Query
result = spark.sql("SELECT sum(money) FROM sales_delta_pyspark_json")
result.show()

+----------+
|sum(money)|
+----------+
|     17600|
+----------+



In [0]:
result = spark.sql("SELECT * FROM sales_delta_pyspark_json")
result.show()

+---+---+-----+-------+-----+-----+
| Id|age|money|   name|sales|units|
+---+---+-----+-------+-----+-----+
|  1| 25| 1200|  Alice| 4500|   45|
|  2| 32|  850|    Bob| 3000|   30|
|  3| 29| 1500|Charlie| 6000|   60|
|  4| 41| 2100|  Diana| 7500|   75|
|  5| 27|  950|  Ethan| 2500|   25|
|  6| 34| 1800|  Fiona| 5000|   50|
|  7| 30| 1100| George| 2200|   22|
|  8| 28| 1600| Hannah| 4000|   40|
|  9| 36| 2000|    Ian| 6500|   65|
| 10| 26| 1300|  Julia| 2800|   28|
| 11| 39| 1750|  Kevin| 5500|   55|
| 12| 31| 1450|  Laura| 3800|   38|
+---+---+-----+-------+-----+-----+

