# 0. Prepare configurations and race_results table

In [0]:
%run "../includes/configuration"

In [0]:
race_results_df = spark.read.parquet(f'{presentation_folder_path}/race_results')
race_results_df.columns

['race_year',
 'race_name',
 'race_date',
 'circuit_location',
 'driver_name',
 'driver_number',
 'driver_nationality',
 'team',
 'grid',
 'fastest_lap',
 'race_time',
 'points',
 'position',
 'created_date']

# 1. Get driver standing with their corresponding year, team and nationality

In [0]:
from pyspark.sql.functions import sum, count, when, rank, desc
from pyspark.sql.window import Window

In [0]:
constructor_rank_spec = Window.partitionBy('race_year').orderBy(desc('points'), desc('wins'))

constructor_standings_df = race_results_df\
    .groupBy('team', 'race_year')\
    .agg(
        sum('points').alias('points'),
        count(when(race_results_df.position == 1, True)).alias('wins')
    )\
    .withColumn('rank', rank().over(constructor_rank_spec))\
    .select('*')

In [0]:
constructor_standings_df.filter('race_year = 2020').display()

team,race_year,points,wins,rank
Mercedes,2020,573.0,13,1
Red Bull,2020,319.0,2,2
Racing Point,2020,210.0,1,3
McLaren,2020,202.0,0,4
Renault,2020,181.0,0,5
Ferrari,2020,131.0,0,6
AlphaTauri,2020,107.0,1,7
Alfa Romeo,2020,8.0,0,8
Haas F1 Team,2020,3.0,0,9
Williams,2020,0.0,0,10


#2. Save to parquet in presentation folder

In [0]:
driver_standings_df.write.mode('overwrite').parquet(f'{presentation_folder_path}/driver_standings')