####ingesting circuit csv file

In [0]:
dbutils.widgets.text("data_source","testing")
value_data_source = dbutils.widgets.get("data_source")


In [0]:
%run "../../constants/configuration"


In [0]:
%run "../../utils/common_functions"

In [0]:
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType
from pyspark.sql.functions import col, current_timestamp, lit

In [0]:
 circuit_schema = StructType([
     StructField("circuitId", IntegerType(), False),
     StructField("circuitRef", StringType(), True),
     StructField("name", StringType(), True),
     StructField("location", StringType(), True),
     StructField("country", StringType(), True),
     StructField("lat", DoubleType(), True),
     StructField("lng", DoubleType(), True),
     StructField("alt", IntegerType(), True),
     StructField("url", StringType(), True)
     ])

In [0]:
circuit_df = spark.read \
    .option("header", "true")\
    .schema(circuit_schema)\
    .csv(f"{bronze_container_path}/circuits.csv")

#####selecting required columns

In [0]:
circuit_selected_df = circuit_df.select(col("circuitId"), col("circuitRef"), col("name"), col("location"), col("country"), col("lat"), col("lng"), col("alt"))

##### rename the columns required 

In [0]:
circuit_renamed_df = circuit_selected_df.withColumnRenamed("circuitId", "circuit_id")\
     .withColumnRenamed("circuitRef", "circuit_ref")\
     .withColumnRenamed("lat", "latitude") \
     .withColumnRenamed("lng", "longitude") \
     .withColumnRenamed("alt", "altitude")\
     .withColumn("data_source", lit(value_data_source))

##### adding column ingestion date to the dataframe

In [0]:
circuit_final_df = add_ingestion_date(circuit_renamed_df)


##### write the data to the silver layer as parquet

In [0]:
circuit_final_df.write.mode("overwrite").format("delta").saveAsTable("motor_dev.silver.circuits")