# 0. Prepare configurations and race_results table

In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

In [0]:
dbutils.widgets.text('p_file_date', '2021-03-21')
param_file_date = dbutils.widgets.get('p_file_date')

In [0]:
race_results_df = spark.read\
    .table('f1_presentation.race_results')\
    .filter(f"file_date = '{param_file_date}'")
race_results_df.columns

['race_id',
 'race_year',
 'race_name',
 'race_date',
 'circuit_location',
 'driver_name',
 'driver_number',
 'driver_nationality',
 'team',
 'grid',
 'fastest_lap',
 'race_time',
 'points',
 'position',
 'created_date',
 'file_date']

# 1. Get driver standing with their corresponding year, team and nationality

In [0]:
from pyspark.sql.functions import sum, count, when, rank, desc, lit
from pyspark.sql.window import Window

In [0]:
constructor_rank_spec = Window.partitionBy('race_year').orderBy(desc('points'), desc('wins'))

constructor_standings_df = race_results_df\
    .groupBy('team', 'race_year')\
    .agg(
        sum('points').alias('points'),
        count(when(race_results_df.position == 1, True)).alias('wins')
    )\
    .withColumn('rank', rank().over(constructor_rank_spec))\
    .withColumn('file_date', lit(param_file_date))\
    .select('*')

In [0]:
constructor_standings_df.filter('race_year = 2020').display()

team,race_year,points,wins,rank,file_date


#2. Save to parquet in presentation folder

In [0]:
merge_condition = 'target.team = source.team and\
                   target.race_year = source.race_year'
upsert_to_delta_table('f1_presentation', 'constructor_standings', presentation_folder_path, constructor_standings_df, merge_condition, 'race_year')

In [0]:
final_df = spark.read.table('f1_presentation.constructor_standings')
display(final_df.filter(f"file_date = '{param_file_date}'").count())

10