# 0. Prepare configurations and tables

In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

In [0]:
dbutils.widgets.text('p_file_date', '2021-03-21')
param_file_date = dbutils.widgets.get('p_file_date')

In [0]:
races_df = spark.read.table('f1_processed.races')
races_df.columns

['race_id',
 'race_year',
 'round',
 'circuit_id',
 'name',
 'ingestion_date',
 'race_timestamp',
 'data_source',
 'file_date']

In [0]:
circuits_df = spark.read.table('f1_processed.circuits')
circuits_df.columns

['circuit_id',
 'circuit_ref',
 'name',
 'location',
 'country',
 'latitude',
 'longitude',
 'altitude',
 'data_source',
 'file_date',
 'ingestion_date']

In [0]:
drivers_df = spark.read.table('f1_processed.drivers')
drivers_df.columns

['driver_id',
 'driver_ref',
 'number',
 'code',
 'name',
 'dob',
 'nationality',
 'data_source',
 'file_date',
 'ingestion_date']

In [0]:
constructors_df = spark.read.table('f1_processed.constructors')
constructors_df.columns

['constructor_id',
 'constructor_ref',
 'name',
 'nationality',
 'data_source',
 'file_date',
 'ingestion_date']

In [0]:
results_df = spark.read\
    .table('f1_processed.results')\
    .filter(f"file_date = '{param_file_date}'")
results_df.columns

['result_id',
 'race_id',
 'driver_id',
 'constructor_id',
 'number',
 'grid',
 'position',
 'position_text',
 'position_order',
 'points',
 'laps',
 'time',
 'milliseconds',
 'fastest_lap',
 'rank',
 'fastest_lap_time',
 'fastest_lap_speed',
 'data_source',
 'file_date',
 'ingestion_date']

# 1. Get the required joined table

In [0]:
from pyspark.sql.functions import current_timestamp, lit

In [0]:
final_df = results_df\
    .join(constructors_df, on='constructor_id', how='inner')\
    .join(drivers_df, on='driver_id', how='inner')\
    .join(races_df, on=('race_id'), how='inner')\
    .join(circuits_df, on='circuit_id', how='inner')\
    .select(
        results_df.race_id.alias('race_id'),
        races_df.race_year.alias('race_year'),
        races_df.name.alias('race_name'),
        races_df.race_timestamp.alias('race_date'),
        circuits_df.location.alias('circuit_location'),
        drivers_df.name.alias('driver_name'),
        drivers_df.number.alias('driver_number'),
        drivers_df.nationality.alias('driver_nationality'),
        constructors_df.name.alias('team'),
        results_df.grid.alias('grid'),
        results_df.fastest_lap.alias('fastest_lap'),
        results_df.time.alias('race_time'),
        results_df.points.alias('points'),
        results_df.position.alias('position')
    )\
    .withColumn('created_date', current_timestamp())\
    .withColumn('file_date', lit(param_file_date))

final_df.limit(10).display()


race_id,race_year,race_name,race_date,circuit_location,driver_name,driver_number,driver_nationality,team,grid,fastest_lap,race_time,points,position,created_date,file_date
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Lewis Hamilton,44,British,Mercedes,1,60,+22.000,19.0,2.0,2024-09-06T09:18:11.09Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Fernando Alonso,14,Spanish,Alpine F1 Team,15,62,+66.561,1.0,10.0,2024-09-06T09:18:11.09Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Kimi Räikkönen,7,Finnish,Alfa Romeo,16,62,+94.773,0.0,13.0,2024-09-06T09:18:11.09Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Sebastian Vettel,5,German,Aston Martin,0,59,\N,0.0,15.0,2024-09-06T09:18:11.09Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Sergio Pérez,11,Mexican,Red Bull,2,62,+67.151,0.0,11.0,2024-09-06T09:18:11.09Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Daniel Ricciardo,3,Australian,McLaren,6,54,+51.220,8.0,6.0,2024-09-06T09:18:11.09Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Valtteri Bottas,77,Finnish,Mercedes,8,30,\N,0.0,,2024-09-06T09:18:11.09Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Max Verstappen,33,Dutch,Red Bull,3,60,2:02:34.598,25.0,1.0,2024-09-06T09:18:11.09Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Carlos Sainz,55,Spanish,Ferrari,11,60,+27.036,10.0,5.0,2024-09-06T09:18:11.09Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Esteban Ocon,31,French,Alpine F1 Team,9,62,+65.704,2.0,9.0,2024-09-06T09:18:11.09Z,2021-04-18


In [0]:
display(final_df.filter("race_year = 2020 and race_name ='Abu Dhabi Grand Prix'").orderBy(final_df.points.desc()))

race_id,race_year,race_name,race_date,circuit_location,driver_name,driver_number,driver_nationality,team,grid,fastest_lap,race_time,points,position,created_date,file_date


In [0]:
display(final_df.filter("race_year = 2021"))

race_id,race_year,race_name,race_date,circuit_location,driver_name,driver_number,driver_nationality,team,grid,fastest_lap,race_time,points,position,created_date,file_date
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Lewis Hamilton,44.0,British,Mercedes,1,60.0,+22.000,19.0,2.0,2024-09-06T09:18:12.73Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Fernando Alonso,14.0,Spanish,Alpine F1 Team,15,62.0,+66.561,1.0,10.0,2024-09-06T09:18:12.73Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Kimi Räikkönen,7.0,Finnish,Alfa Romeo,16,62.0,+94.773,0.0,13.0,2024-09-06T09:18:12.73Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Sebastian Vettel,5.0,German,Aston Martin,0,59.0,\N,0.0,15.0,2024-09-06T09:18:12.73Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Sergio Pérez,11.0,Mexican,Red Bull,2,62.0,+67.151,0.0,11.0,2024-09-06T09:18:12.73Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Daniel Ricciardo,3.0,Australian,McLaren,6,54.0,+51.220,8.0,6.0,2024-09-06T09:18:12.73Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Valtteri Bottas,77.0,Finnish,Mercedes,8,30.0,\N,0.0,,2024-09-06T09:18:12.73Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Max Verstappen,33.0,Dutch,Red Bull,3,60.0,2:02:34.598,25.0,1.0,2024-09-06T09:18:12.73Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Carlos Sainz,55.0,Spanish,Ferrari,11,60.0,+27.036,10.0,5.0,2024-09-06T09:18:12.73Z,2021-04-18
1053,2021,Emilia Romagna Grand Prix,2021-04-18T13:00:00Z,Imola,Esteban Ocon,31.0,French,Alpine F1 Team,9,62.0,+65.704,2.0,9.0,2024-09-06T09:18:12.73Z,2021-04-18


#2. Save to parquet in presentation folder

In [0]:
merge_condition = 'target.driver_name = source.driver_name and\
                   target.race_id = source.race_id'
upsert_to_delta_table('f1_presentation', 'race_results', presentation_folder_path, final_df, merge_condition, 'race_id')

In [0]:
final_df = spark.read.table('f1_presentation.race_results')
display(final_df.filter(f"file_date = '{param_file_date}'").count())

20