In [1]:
from delta import *
from pyspark.sql.types import *
from delta.tables import *
from pyspark.sql.functions import *
from pyspark.sql import SparkSession
import os

In [2]:
os.environ['PYSPARK_PYTHON'] = sys.executable
os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable

In [3]:
builder = SparkSession.builder.appName("DeltaTutorial")\
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

In [4]:
#Resilent Distributed Dataset(RDD)
spark = configure_spark_with_delta_pip(builder).getOrCreate()
spark.sparkContext.setLogLevel("ERROR")

In [5]:
data = [("Anurag","Karki",23),("Anuska","Karki",18),("Sujan","Karki",48)]
schema = StructType([
    StructField("firstname",StringType(),True),
    StructField("lastname",StringType(),True),
    StructField("age",IntegerType(),True)
])

In [6]:
sample_dataframe = spark.createDataFrame(data=data, schema=schema)

In [7]:
sample_dataframe.write.mode(saveMode="overwrite").format("delta").save("../Delta_Data/delta_table")

In [8]:
#Reading Data
getting_df = spark.read.format("delta").load("../Delta_Data/delta_table")

In [9]:
getting_df.show()

+---------+--------+---+
|firstname|lastname|age|
+---------+--------+---+
|   Anuska|   Karki| 18|
|   Anurag|   Karki| 23|
|    Sujan|   Karki| 48|
+---------+--------+---+



In [26]:
#Updating Data
data = [("Robert", "Baratheon", 49),
        ("Eddard", "Stark", 47),
        ("Jamie", "Lannister", 30)
        ]
schema = StructType([
    StructField("firstname", StringType(), True),
    StructField("lastname", StringType(), True),
    StructField("age", IntegerType(), True)
])
sample_dataframe = spark.createDataFrame(data=data, schema=schema)
sample_dataframe.write.mode(saveMode="overwrite").format("delta").save("../Delta_Data/delta_table")

In [27]:
getting_df = spark.read.format("delta").load("../Delta_Data/delta_table")

In [28]:
getting_df.show()

+---------+---------+---+
|firstname| lastname|age|
+---------+---------+---+
|   Robert|Baratheon| 49|
|    Jamie|Lannister| 30|
|   Eddard|    Stark| 47|
+---------+---------+---+



In [31]:
#Retriving Old Versions
df_ver0 = spark.read.format("delta").option("versionAsOf", 0).load("../Delta_Data/delta_table")
df_ver0.show()

+---------+--------+---+
|firstname|lastname|age|
+---------+--------+---+
|   Anuska|   Karki| 18|
|   Anurag|   Karki| 23|
|    Sujan|   Karki| 48|
+---------+--------+---+



In [32]:
df_ver1 = spark.read.format("delta").option("versionAsOf", 1).load("../Delta_Data/delta_table")
df_ver1.show()

+---------+---------+---+
|firstname| lastname|age|
+---------+---------+---+
|   Robert|Baratheon| 49|
|    Jamie|Lannister| 30|
|   Eddard|    Stark| 47|
+---------+---------+---+

