# Convert Parquet table to Delta table

In [None]:
# Generate dummy data

from pyspark.sql.functions import expr, lit, col
from pyspark.sql.types import *
from datetime import date


df = spark.range(5) \
  .selectExpr("if(id % 2 = 0, 'Open', 'Close') as action") \
  .withColumn("date", expr("cast(concat('2023-06-', cast(rand(5) * 30 as int) + 1) as date)")) \
  .withColumn("device_id", expr("cast(rand(5) * 100 as int)"))

# Registering a Parquet table in the catalog
parquet_table_name = 'demo.device'
spark.sql(f"DROP TABLE IF EXISTS {parquet_table_name}")

df.write.format("parquet").mode("overwrite").saveAsTable(parquet_table_name)

Let's take a look at the table. You can see that the icon is different in the lakehouse explorer under tables section and pay attention to provider (line 13) 

In [None]:
%%sql

DESCRIBE EXTENDED demo.device


## Converting to Delta table

You can use CONVERT TO DELTA to transform a directory of Parquet files into a Delta table with a single command. Once you have converted a table to Delta Lake, you should stop reading and writing from the table using Parquet logic.
The code is simple and the Parquet files don't need to be rewritten, so it requires fewer computational resources than you might imagine

In [None]:
from delta.tables import *

deltaTable = DeltaTable.convertToDelta(spark, "demo.device")

In [None]:
%%sql

CONVERT TO DELTA demo.device

Note that the icon on the lakehouse explorer and the provider (line 11) have changed

In [None]:
%%sql

DESCRIBE EXTENDED demo.device


In [None]:
df = spark.range(100) \
  .selectExpr("if(id % 2 = 0, 'Open', 'Close') as action") \
  .withColumn("date", expr("cast(concat('2023-06-', cast(rand(5) * 30 as int) + 1) as date)")) \
  .withColumn("device_id", expr("cast(rand(5) * 100 as int)"))


parquet_table_name = 'demo.device_partitioned'

spark.sql("DROP TABLE IF EXISTS " + parquet_table_name)
df.write.format("parquet").partitionBy("date").mode("overwrite").saveAsTable(parquet_table_name)

In [None]:
%%sql

DESCRIBE EXTENDED demo.device_partitioned

In [None]:
from delta.tables import *

deltaTable = DeltaTable. convertToDelta(spark, "demo.device_partitioned", "date date")

In [None]:
%%sql

CONVERT TO DELTA demo.device_partitioned PARTITIONED BY (date date)

In [None]:
%%sql

DESCRIBE EXTENDED demo.device_partitioned

# Clean up

In [None]:
spark.sql("DROP TABLE IF EXISTS device")
spark.sql("DROP TABLE IF EXISTS device_partitioned")