In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

In [0]:
dbutils.widgets.text("p_file_date", "2021-03-21")
v_file_date= dbutils.widgets.get("p_file_date")

In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import desc, rank, sum, when, count, col


### Find year for which the data is to be processes

In [0]:
race_results_df_list = spark.read.format("delta").load(f"{presentation_folder_path}/race_results") \
    .filter(f"file_date = '{v_file_date}'") \
        .select("race_year") \
            .distinct() \
                .collect()

In [0]:
race_year_list = []
for race_year in race_results_df_list:
    race_year_list.append(race_year.race_year)

In [0]:
race_results_df = spark.read.format("delta").load(f"{presentation_folder_path}/race_results") \
                            .filter(col("race_year").isin(race_year_list))

In [0]:
driver_standings_df = race_results_df \
                        .groupBy("race_year", "driver_name", "driver_nationality", "team") \
                            .agg(sum("points").alias("total_points"),
                                count(when(col("position") == 1, True)).alias("wins"))

In [0]:
driver_rank_spec = Window.partitionBy("race_year").orderBy(desc("total_points"), desc("wins"))
final_df = driver_standings_df.withColumn("rank", rank().over(driver_rank_spec))


### Write to DB

In [0]:
# final_df.write.mode("overwrite").parquet(f"{presentation_folder_path}/driver_standings")

In [0]:
# final_df.write.mode("overwrite").format("parquet").saveAsTable("f1_presentation.driver_standings")

In [0]:
%sql
--DROP TABLE f1_presentation.driver_standings;

In [0]:
final_df_arranged = move_column_to_last(final_df, "race_year")
# write_to_database(final_df, "f1_presentation", "driver_standings", "race_year")

In [0]:
final_df_arranged_to_merge = final_df_arranged.dropDuplicates(["driver_name","race_year"])

In [0]:
merge_condition = "tgt.driver_name = src.driver_name AND tgt.race_year = src.race_year"
merge_delta_data(final_df_arranged_to_merge, 'f1_presentation', 'driver_standings', presentation_folder_path, merge_condition, 'race_year')

In [0]:
%sql
SELECT * FROM f1_presentation.driver_standings ORDER BY race_year DESC;