In [0]:
# 1. Read variables
catalog = dbutils.widgets.get("catalog")
schema = dbutils.widgets.get("schema")
dataset = dbutils.widgets.get("dataset")

path = f"{catalog}.{schema}.{dataset}"
print(f"Iris Dataset Path: {path}")

kaggle_dataset_id = dbutils.widgets.get("kaggle_dataset_id")
kaggle_dataset_path = dbutils.widgets.get("kaggle_dataset_path")

In [0]:
import kagglehub
from kagglehub import KaggleDatasetAdapter
from pyspark.sql.functions import col

# 2. Load the Iris dataset from KaggleHub
raw_df = kagglehub.dataset_load(
  KaggleDatasetAdapter.PANDAS,
  kaggle_dataset_id,
  kaggle_dataset_path
)

iris_df = spark.createDataFrame(raw_df)

# 3. Inspect and preprocess (optional)
#    Rename columns to snake_case and cast numeric types
iris_clean = (
    iris_df
    .withColumnRenamed("Id", "id")
    .withColumnRenamed("SepalLengthCm", "sepal_length")
    .withColumnRenamed("SepalWidthCm", "sepal_width")
    .withColumnRenamed("PetalLengthCm", "petal_length")
    .withColumnRenamed("PetalWidthCm", "petal_width")
    .withColumnRenamed("Species", "species")
    .withColumn("sepal_length", col("sepal_length").cast("double"))
    .withColumn("sepal_width",  col("sepal_width").cast("double"))
    .withColumn("petal_length", col("petal_length").cast("double"))
    .withColumn("petal_width",  col("petal_width").cast("double"))
)

# 4. Save table to UC
iris_clean.write.format("delta").mode("overwrite").saveAsTable(path)

constraint_name = f"{dataset}_pk"
drop_sql = f"ALTER TABLE {path} DROP CONSTRAINT {constraint_name}"
try:
    spark.sql(drop_sql)
    print(f"Dropped existing constraint {constraint_name}")
except Exception:
    pass

# 3. Enforce NOT NULL on the key column
spark.sql(f"ALTER TABLE {path} ALTER COLUMN id SET NOT NULL")

# 4. Add the primary‐key constraint
add_sql = f"ALTER TABLE {path} ADD CONSTRAINT {constraint_name} PRIMARY KEY(id)"
spark.sql(add_sql)
print(f"Added primary‐key constraint {constraint_name}")