###read all files

In [0]:
dbutils.widgets.text("p_file_date", "2021-03-21")
v_file_date=dbutils.widgets.get("p_file_date")

In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

In [0]:
drivers_df = spark.read.format("delta").load(f"{processed_folder_path}/drivers")\
    .withColumnRenamed("number", "driver_number")\
    .withColumnRenamed("name", "driver_name")\
    .withColumnRenamed("nationality", "driver_nationality")

In [0]:
constructors_df = spark.read.format("delta").load(f"{processed_folder_path}/constructors")\
.withColumnRenamed("name", "team")

In [0]:
circuits_df= spark.read.format("delta").load(f"{processed_folder_path}/circuits")\
    .withColumnRenamed("location", "circuit_location")

In [0]:
races_df= spark.read.format("delta").load(f"{processed_folder_path}/races")\
    .withColumnRenamed("name", "race_name")\
    .withColumnRenamed("race_timestamp", "race_date")

In [0]:
results_df= spark.read.format("delta").load(f"{processed_folder_path}/results")\
    .filter(f"file_date = '{v_file_date}'")\
    .withColumnRenamed("time", "race_time")\
    .withColumnRenamed("race_id", "result_race_id")\
    .withColumnRenamed("file_date", "result_file_date")

join circuits to races

In [0]:
race_circuits_df = races_df.join(circuits_df, races_df.circuit_id == circuits_df.circuit_id, "inner")\
    .select(races_df.race_id, races_df.race_year, races_df.race_name, races_df.race_date, circuits_df.circuit_location)

###join results to other dfs

In [0]:
race_results_df = results_df.join(race_circuits_df, results_df.result_race_id == race_circuits_df.race_id)\
    .join(drivers_df, results_df.driver_id == drivers_df.driver_id)\
    .join(constructors_df, results_df.constructor_id == constructors_df.constructor_id)

In [0]:
from pyspark.sql.functions import *

In [0]:
final_df = race_results_df.select("race_id","race_year","race_name","race_date","circuit_location", "driver_name", "driver_number", "driver_nationality","team","grid", "fastest_lap", "race_time", "points", "position","result_file_date")\
    .withColumn("created_date", current_timestamp())\
    .withColumnRenamed("result_file_date", "file_date")

In [0]:
display(final_df.filter("race_year == 2020 and race_name == 'Abu Dhabi Grand Prix'").orderBy(desc("points")))

race_id,race_year,race_name,race_date,circuit_location,driver_name,driver_number,driver_nationality,team,grid,fastest_lap,race_time,points,position,file_date,created_date


In [0]:
#overwrite_partition(final_df, "f1_presentation", "race_results", "race_id")

In [0]:
merge_condition = "tgt.driver_name = src.driver_name AND tgt.race_id = src.race_id"
merge_delta_data(final_df, 'f1_presentation', 'race_results', presentation_folder_path, merge_condition, 'race_id')

In [0]:
%sql
select * from f1_presentation.race_results where file_date = '2021-04-18';

race_id,race_year,race_name,race_date,circuit_location,driver_name,driver_number,driver_nationality,team,grid,fastest_lap,race_time,points,position,file_date,created_date
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Mick Schumacher,,German,Haas F1 Team,18,58.0,\N,0.0,16.0,2021-04-18,2025-08-06T05:22:04.534198Z
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Nikita Mazepin,,Russian,Haas F1 Team,19,55.0,\N,0.0,17.0,2021-04-18,2025-08-06T05:22:04.534198Z
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Yuki Tsunoda,,Japanese,AlphaTauri,20,49.0,+73.184,0.0,12.0,2021-04-18,2025-08-06T05:22:04.534198Z
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Nicholas Latifi,6.0,Canadian,Williams,14,,\N,0.0,,2021-04-18,2025-08-06T05:22:04.534198Z
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,George Russell,63.0,British,Williams,12,28.0,\N,0.0,,2021-04-18,2025-08-06T05:22:04.534198Z
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Lando Norris,4.0,British,McLaren,7,63.0,+23.702,15.0,3.0,2021-04-18,2025-08-06T05:22:04.534198Z
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Charles Leclerc,16.0,Monegasque,Ferrari,4,60.0,+25.579,12.0,4.0,2021-04-18,2025-08-06T05:22:04.534198Z
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Pierre Gasly,10.0,French,AlphaTauri,5,52.0,+52.818,6.0,7.0,2021-04-18,2025-08-06T05:22:04.534198Z
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Antonio Giovinazzi,99.0,Italian,Alfa Romeo,17,57.0,\N,0.0,14.0,2021-04-18,2025-08-06T05:22:04.534198Z
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Lance Stroll,18.0,Canadian,Aston Martin,10,59.0,+56.909,4.0,8.0,2021-04-18,2025-08-06T05:22:04.534198Z
