# Ingest lap times data

In [0]:
%run ../SetUp

lap_times table
| Field        | Type         | Null | Key | Default | Extra | Description                       |
|--------------|--------------|------|-----|---------|-------|-----------------------------------|
| raceId       | int(11)      | NO   | PRI | NULL    |       | Foreign key link to races table   |
| driverId     | int(11)      | NO   | PRI | NULL    |       | Foreign key link to drivers table |
| lap          | int(11)      | NO   | PRI | NULL    |       | Lap number                        |
| position     | int(11)      | YES  |     | NULL    |       | Driver race position              |
| time         | varchar(255) | YES  |     | NULL    |       | Lap time e.g. "1:43.762"          |
| milliseconds | int(11)      | YES  |     | NULL    |       | Lap time in milliseconds          |


In [0]:
from pyspark.sql.functions import current_timestamp
from pyspark.sql.types import StructType, StructField, IntegerType, StringType

csv_file_path = session_helper.get_storage_account_url(folder="raw/lap_times")

lap_times_schema = StructType([
    StructField("raceId", IntegerType(), nullable=False),
    StructField("driverId", IntegerType(), nullable=False),
    StructField("lap", IntegerType(), nullable=False),
    StructField("position", IntegerType(), nullable=True),
    StructField("time", StringType(), nullable=True),
    StructField("milliseconds", IntegerType(), nullable=True)
])

lap_times_df = (
    spark.read
    .format("csv")
    .option("header", False)
    .option("inferSchema", False)
    .schema(lap_times_schema)
    .load(csv_file_path)
)

#lap_times_df.display()

In [0]:
import pyspark.sql.functions as F

transformed_lap_times_df = (
    lap_times_df
    .withColumnRenamed("driverId", "driver_id")
    .withColumnRenamed("raceId", "race_id")
    .withColumn("ingestion_date", F.current_timestamp())
)

#transformed_lap_times_df.display()

In [0]:
spark.sql("DROP TABLE IF EXISTS dev.lap_times_bronze")
transformed_lap_times_df.write.format("delta").saveAsTable("dev.lap_times_bronze", mode="overwrite")