# Ingest Lap Times Data

In [0]:
dbutils.widgets.text("env", "dev", "Environment")

env = dbutils.widgets.get("env")

In [0]:
%run ../config $env=$env

In [0]:
%run ../utils

lap_times table
| Field        | Type         | Null | Key | Default | Extra | Description                       |
|--------------|--------------|------|-----|---------|-------|-----------------------------------|
| raceId       | int(11)      | NO   | PRI | NULL    |       | Foreign key link to races table   |
| driverId     | int(11)      | NO   | PRI | NULL    |       | Foreign key link to drivers table |
| lap          | int(11)      | NO   | PRI | NULL    |       | Lap number                        |
| position     | int(11)      | YES  |     | NULL    |       | Driver race position              |
| time         | varchar(255) | YES  |     | NULL    |       | Lap time e.g. "1:43.762"          |
| milliseconds | int(11)      | YES  |     | NULL    |       | Lap time in milliseconds          |


In [0]:
csv_file_path = raw_data_folder_path + "lap_times.csv"

df = (
    spark.read.format("csv")
    .option("header", True)
    .option("inferSchema", True)
    .load(csv_file_path)
)

df.display() if env == "dev" else None

In [0]:
import pyspark.sql.functions as F

df_transformed = (
    df
    .withColumnRenamed("driverId", "driver_id")
    .withColumnRenamed("raceId", "race_id")
    .withColumn("ingestion_date", F.current_timestamp())
)

df_transformed.display() if env == "dev" else None

In [0]:
df_transformed = fillna_str(df_transformed, r"\N")

df_transformed.display()  if env == "dev" else None

In [0]:
df_transformed.write.format("delta").saveAsTable("bronze_tbl_lap_times", mode="overwrite")