# Ingest drivers data

In [0]:
dbutils.widgets.text("env", "dev", "Environment")

env = dbutils.widgets.get("env")

In [0]:
%run ../config $env=$env

In [0]:
csv_file_path = raw_data_folder_path + "drivers.csv"

df = (
    spark.read.format("csv")
    .option("header", True)
    .option("inferSchema", True)
    .load(csv_file_path)
)

df.display() if env == "dev" else None

In [0]:
import pyspark.sql.functions as F

df_transformed = (
    df
    .withColumnRenamed("driverId", "driver_id")
    .withColumnRenamed("driverRef", "driver_ref")
    .withColumnRenamed("dob", "date_of_birth")
    .withColumn("ingestion_date", F.current_timestamp())
    .withColumn("name", F.concat(F.col("forename"), F.lit(" "), F.col("surname")))
    .withColumn(
        "number",
        F.when(
            F.col("number") == F.lit(r"\N"), 
            F.lit(None)
        ).otherwise(F.col("number")),
    )
    .withColumn("number", F.col("number").cast("integer"))
    .drop("url", "forename", "surname")
)

df_transformed.display() if env == "dev" else None

In [0]:
df_transformed.write.format("delta").saveAsTable("bronze_tbl_drivers", mode="overwrite")