## Produce constructor standings 

In [0]:
dbutils.widgets.text('p_file_date', '2021-03-21')
v_file_date = dbutils.widgets.get('p_file_date')

In [0]:
%run ../includes/configuration

In [0]:
%run ../includes/common_functions

## Find race years for which the data is to be reprocessed

In [0]:
race_results_df = spark.read.format('delta').load(f'{presentation_folder_path}/race_results') \
    .filter(f"file_date = '{v_file_date}'") 


In [0]:
race_year_list = df_column_to_list(race_results_df, 'race_year')

In [0]:
from pyspark.sql.functions import col 
race_results_df = spark.read.format('delta').load(f'{presentation_folder_path}/race_results') \
    .filter(col('race_year').isin(race_year_list))

In [0]:
#Si queremos contar el numero de victorias que tuvo cada constructor en la temporada, en este caso 2020, podemos usar la funcion when. 
#Cuando la condición se cumpla, devuelve 1 que en este caso es la posición de la tabla. 

from pyspark.sql.functions import sum, when, col, count

constructor_standings_df = race_results_df \
    .groupBy('race_year', 'team') \
    .agg(sum('points').alias('total_points'),
         count(when(col('position') == 1, True)).alias('wins'))
    


In [0]:
from pyspark.sql.window import Window 
from pyspark.sql.functions import desc, rank, asc

constructor_rank_spec = Window.partitionBy('race_year').orderBy(desc('total_points'), desc('wins')) #creamos la funcion de ventana con la especificación 
final_df = constructor_standings_df.withColumn('rank', rank().over(constructor_rank_spec)) #creamos el rank por cada constructor



In [0]:
merge_condition = 'tgt.team = src.team AND tgt.race_year = src.race_year'
merge_delta(final_df, 'f1_presentation', 'constructor_standings', presentation_folder_path, merge_condition, 'race_year')