In [0]:
spark.sql("""
          CREATE TABLE population_metrics.default.country_regions_scd_2
          (
              id int,
              name string,
              effective_date timestamp,
              end_date timestamp
          )
          """
        )

In [0]:
spark.read.table("population_metrics.default.country_regions_scd_2").display()

In [0]:
df_changes = spark.read.format("csv").option("header", True).schema("id int, name string").load("/Volumes/population_metrics/landing/datasets/countries_dataset/csv_data/country_regions/")

df_changes.display()

In [0]:
from delta.tables import DeltaTable
from pyspark.sql.functions import lit, current_timestamp
from pyspark.sql.types import TimestampType, IntegerType, StringType

dt = DeltaTable.forName(spark, "population_metrics.default.country_regions_scd_2")

# Step 1. Update current row if condition is met
dt.alias("t").merge(
    source = df_changes.alias("s"),
    condition="t.id = s.id AND t.end_date IS NULL"
).whenMatchedUpdate(
    set = {
        "t.end_date": current_timestamp()
    }
).execute()

# Step 2. Insert new rows
df_changes = df_changes.withColumn("effective_date", current_timestamp()).withColumn("end_date", lit(None).cast(TimestampType())).select(df_changes.id.cast(IntegerType()), df_changes.name.cast(StringType()), "effective_date", "end_date").write.mode("append").saveAsTable("population_metrics.default.country_regions_scd_2")

In [0]:
spark.read.table("population_metrics.default.country_regions_scd_2").display()

In [0]:
data = [{
    "id": 10,
    "name": "USA"
}]

df_changes = spark.createDataFrame(data)
df_changes.display()