#### ingest a set of multiline files in qualifying folder

- read json file
- rename columns and add new columns
- write output to silver container

In [0]:
dbutils.widgets.text("data_source","testing")
value_data_source = dbutils.widgets.get("data_source")

In [0]:
%run "../../constants/configuration"

In [0]:
%run "../../utils/common_functions"

In [0]:
from pyspark.sql.functions import current_timestamp, col, concat, lit
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DateType

In [0]:
qualifying_schema = StructType([
    StructField("qualifyId", IntegerType(), False),
    StructField("raceId", IntegerType(), True),
    StructField("driverId", IntegerType(), True),
    StructField("constructorId", IntegerType(), True),
    StructField("number", IntegerType(), True),
    StructField("position", IntegerType(), True),
    StructField("q1", StringType(), True),
    StructField("q2", StringType(), True),
    StructField("q3", StringType(), True)
])

In [0]:
qualifying_df = spark.read \
    .schema(qualifying_schema)\
    .option("multiline", "true")\
    .json(f"{bronze_container_path}/qualifying")

In [0]:
qualifying_with_ingestion_date_df = add_ingestion_date(qualifying_df)


##### rename columns and add columns

In [0]:
qualifying_final_df = qualifying_with_ingestion_date_df.withColumnRenamed("qualifyId", "qualifying_id")\
                                 .withColumnRenamed("driverId", "driver_id")\
                                 .withColumnRenamed("raceId", "race_id")\
                                 .withColumnRenamed("constructorId", "constructor_id")\
                                 .withColumn("data_source", lit(value_data_source))

In [0]:
#qualifying_final_df.write.mode("overwrite").format("delta").saveAsTable("motor_dev.silver.qualifying")

merge_condition = "tgt.qualifying_id = src.qualifying_id AND tgt.race_id = src.race_id"
merge_delta_data(qualifying_final_df, 'silver', 'qualifying', merge_condition, 'race_id')