# 1. Imports

In [2]:
from src.configuration import Configuration
from src.fetch_data import DataFetcher
from src.dim import Dims
from src.facts import Facts
import pandas as pd
import time
import datetime
from pyspark.sql import functions as f 
from pyspark.sql import  SparkSession, Window
from aggregate_data import DataAggregator



pd.set_option('display.max_columns', None)



# 2. Variables

In [3]:
config = Configuration('src/config.cfg')
fetcher = DataFetcher(config)
DIMS = Dims(config, fetcher)
FACTS = Facts(config, fetcher)
DATAAGGREGATOR = DataAggregator(config, fetcher)
spark = SparkSession.builder.appName("MojaSesja").master("local").getOrCreate()



25/10/14 18:58:51 WARN Utils: Your hostname, MacBook-Pro-Grzegorz.local resolves to a loopback address: 127.0.0.1; using 192.168.0.3 instead (on interface en0)
25/10/14 18:58:51 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/10/14 18:58:51 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
25/10/14 18:58:52 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


# 1. Assigning points throught the season to the driver.

In [4]:
dim_session = DIMS.dim_sessions(race="Race")
fact_session_result = FACTS.fact_session_results()

In [5]:
fact_session_result['position'] = fact_session_result['position'].fillna(value=21)
dim_driver_team = DIMS.dim_driver_team()

In [6]:
dim_session

Unnamed: 0,session_key,location,date_start,date_end,session_name,country_code,country_name,year,is_current_season,key
1,7953,Sakhir,2023-03-05,2023-03-05,Race,BRN,Bahrain,2023,0,Sakhir2023
3,7779,Jeddah,2023-03-19,2023-03-19,Race,KSA,Saudi Arabia,2023,0,Jeddah2023
5,7787,Melbourne,2023-04-02,2023-04-02,Race,AUS,Australia,2023,0,Melbourne2023
9,9070,Baku,2023-04-30,2023-04-30,Race,AZE,Azerbaijan,2023,0,Baku2023
11,9078,Miami,2023-05-07,2023-05-07,Race,USA,United States,2023,0,Miami2023
...,...,...,...,...,...,...,...,...,...,...
149,9928,Budapest,2025-08-03,2025-08-03,Race,HUN,Hungary,2025,1,Budapest2025
151,9920,Zandvoort,2025-08-31,2025-08-31,Race,NED,Netherlands,2025,1,Zandvoort2025
153,9912,Monza,2025-09-07,2025-09-07,Race,ITA,Italy,2025,1,Monza2025
155,9904,Baku,2025-09-21,2025-09-21,Race,AZE,Azerbaijan,2025,1,Baku2025


In [7]:
joined_results  = fact_session_result.merge(
    dim_session,
    on="session_key",
    how="inner"
).merge(
    dim_driver_team,
    on = ['session_key','driver_number'],
    how = "inner"
)



In [8]:
joined_results

Unnamed: 0,position,driver_number,number_of_laps,dnf,dns,dsq,duration,gap_to_leader,meeting_key,session_key,points,location,date_start,date_end,session_name,country_code,country_name,year,is_current_season,key,team_name
0,1.0,1,57.0,False,False,False,5636.736,0,1141,7953,25.0,Sakhir,2023-03-05,2023-03-05,Race,BRN,Bahrain,2023,0,Sakhir2023,Red Bull Racing
1,2.0,11,57.0,False,False,False,5648.723,11.987,1141,7953,18.0,Sakhir,2023-03-05,2023-03-05,Race,BRN,Bahrain,2023,0,Sakhir2023,Red Bull Racing
2,3.0,14,57.0,False,False,False,5675.373,38.637,1141,7953,15.0,Sakhir,2023-03-05,2023-03-05,Race,BRN,Bahrain,2023,0,Sakhir2023,Aston Martin
3,4.0,55,57.0,False,False,False,5684.788,48.052,1141,7953,12.0,Sakhir,2023-03-05,2023-03-05,Race,BRN,Bahrain,2023,0,Sakhir2023,Ferrari
4,5.0,44,57.0,False,False,False,5687.713,50.977,1141,7953,10.0,Sakhir,2023-03-05,2023-03-05,Race,BRN,Bahrain,2023,0,Sakhir2023,Mercedes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1272,21.0,12,23.0,True,False,False,,,1277,9947,0.0,Silverstone,2025-07-06,2025-07-06,Race,GBR,United Kingdom,2025,1,Silverstone2025,Mercedes
1273,21.0,6,17.0,True,False,False,,,1277,9947,0.0,Silverstone,2025-07-06,2025-07-06,Race,GBR,United Kingdom,2025,1,Silverstone2025,Racing Bulls
1274,21.0,5,3.0,True,False,False,,,1277,9947,0.0,Silverstone,2025-07-06,2025-07-06,Race,GBR,United Kingdom,2025,1,Silverstone2025,Kick Sauber
1275,21.0,30,0.0,True,False,False,,,1277,9947,0.0,Silverstone,2025-07-06,2025-07-06,Race,GBR,United Kingdom,2025,1,Silverstone2025,Racing Bulls


In [9]:
joined_results = joined_results[['position','driver_number','session_key','points','year','is_current_season','key','date_end','team_name']]
spark_df = spark.createDataFrame(joined_results)
spark_df.show()

                                                                                

+--------+-------------+-----------+------+----+-----------------+----------+----------+---------------+
|position|driver_number|session_key|points|year|is_current_season|       key|  date_end|      team_name|
+--------+-------------+-----------+------+----+-----------------+----------+----------+---------------+
|     1.0|            1|       7953|  25.0|2023|                0|Sakhir2023|2023-03-05|Red Bull Racing|
|     2.0|           11|       7953|  18.0|2023|                0|Sakhir2023|2023-03-05|Red Bull Racing|
|     3.0|           14|       7953|  15.0|2023|                0|Sakhir2023|2023-03-05|   Aston Martin|
|     4.0|           55|       7953|  12.0|2023|                0|Sakhir2023|2023-03-05|        Ferrari|
|     5.0|           44|       7953|  10.0|2023|                0|Sakhir2023|2023-03-05|       Mercedes|
|     6.0|           18|       7953|   8.0|2023|                0|Sakhir2023|2023-03-05|   Aston Martin|
|     7.0|           63|       7953|   6.0|2023|       

In [10]:
window = Window.partitionBy("driver_number","year").orderBy("date_end").rowsBetween(
    Window.unboundedPreceding, Window.currentRow
)

spark_df = spark_df.withColumn("cumulative_points", f.sum("points").over(window))

In [11]:
spark_df.show(50)

+--------+-------------+-----------+------+----+-----------------+--------------------+----------+---------------+-----------------+
|position|driver_number|session_key|points|year|is_current_season|                 key|  date_end|      team_name|cumulative_points|
+--------+-------------+-----------+------+----+-----------------+--------------------+----------+---------------+-----------------+
|     1.0|            1|       7953|  25.0|2023|                0|          Sakhir2023|2023-03-05|Red Bull Racing|             25.0|
|     2.0|            1|       7779|   0.0|2023|                0|          Jeddah2023|2023-03-19|Red Bull Racing|             25.0|
|     1.0|            1|       7787|  25.0|2023|                0|       Melbourne2023|2023-04-02|Red Bull Racing|             50.0|
|     2.0|            1|       9070|  18.0|2023|                0|            Baku2023|2023-04-30|Red Bull Racing|             68.0|
|     1.0|            1|       9078|  26.0|2023|                0|   

In [12]:
window2 = Window.partitionBy("year","team_name").orderBy("date_end")

test = spark_df.withColumn(
    "team_points",
    f.sum("points").over(window2)
)

In [13]:
test.show()

+--------+-------------+-----------+------+----+-----------------+---------------+----------+----------+-----------------+-----------+
|position|driver_number|session_key|points|year|is_current_season|            key|  date_end| team_name|cumulative_points|team_points|
+--------+-------------+-----------+------+----+-----------------+---------------+----------+----------+-----------------+-----------+
|    16.0|           24|       7953|   0.0|2023|                0|     Sakhir2023|2023-03-05|Alfa Romeo|              0.0|        4.0|
|     8.0|           77|       7953|   4.0|2023|                0|     Sakhir2023|2023-03-05|Alfa Romeo|              4.0|        4.0|
|    13.0|           24|       7779|   0.0|2023|                0|     Jeddah2023|2023-03-19|Alfa Romeo|              0.0|        4.0|
|    18.0|           77|       7779|   0.0|2023|                0|     Jeddah2023|2023-03-19|Alfa Romeo|              4.0|        4.0|
|     9.0|           24|       7787|   2.0|2023|       

In [14]:
DATAAGGREGATOR.get_racer_team_points(racer_team="team")

Unnamed: 0,session_key,key,year,driver_number,team_name,date_end,points,points_gained
0,7953,Sakhir2023,2023,77,Alfa Romeo,2023-03-05,4.0,4.0
1,7953,Sakhir2023,2023,24,Alfa Romeo,2023-03-05,0.0,4.0
2,7779,Jeddah2023,2023,24,Alfa Romeo,2023-03-19,0.0,4.0
3,7779,Jeddah2023,2023,77,Alfa Romeo,2023-03-19,0.0,4.0
4,7787,Melbourne2023,2023,24,Alfa Romeo,2023-04-02,2.0,6.0
...,...,...,...,...,...,...,...,...
1272,9912,Monza2025,2025,55,Williams,2025-09-07,0.0,83.0
1273,9904,Baku2025,2025,55,Williams,2025-09-21,15.0,98.0
1274,9904,Baku2025,2025,23,Williams,2025-09-21,0.0,98.0
1275,9896,Marina Bay2025,2025,55,Williams,2025-10-05,1.0,99.0


In [16]:
DATAAGGREGATOR.get_last_races_result(n_races=5, race_type="Race", measure="position")

Unnamed: 0,driver_number,position,session_key,date_end,key,last_race_pos_1,last_race_pos_2,last_race_pos_3,last_race_pos_4,last_race_pos_5
5,1,1.0,9094,2023-05-28,Monaco2023,1.0,2.0,1.0,2.0,1.0
6,1,1.0,9102,2023-06-04,Barcelona2023,2.0,1.0,2.0,1.0,1.0
7,1,1.0,9110,2023-06-18,Montréal2023,1.0,2.0,1.0,1.0,1.0
8,1,1.0,9118,2023-07-02,Spielberg2023,2.0,1.0,1.0,1.0,1.0
9,1,1.0,9126,2023-07-09,Silverstone2023,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...
1272,87,21.0,9928,2025-08-03,Budapest2025,17.0,11.0,11.0,11.0,11.0
1273,87,6.0,9920,2025-08-31,Zandvoort2025,11.0,11.0,11.0,11.0,21.0
1274,87,12.0,9912,2025-09-07,Monza2025,11.0,11.0,11.0,21.0,6.0
1275,87,12.0,9904,2025-09-21,Baku2025,11.0,11.0,21.0,6.0,12.0


In [None]:
DATAAGGREGATOR.get_last_races_result(n_races=5, race_type="Qualifying", measure="avg")

Unnamed: 0,driver_number,session_key,date_end,key,avg_last_5_race
1,1,7779,2023-03-19,Jeddah2023,1.000000
2,1,7787,2023-04-02,Melbourne2023,1.500000
3,1,9070,2023-04-30,Baku2023,1.333333
4,1,9078,2023-05-07,Miami2023,1.500000
5,1,9094,2023-05-28,Monaco2023,1.400000
...,...,...,...,...,...
1272,87,9928,2025-08-03,Budapest2025,12.200000
1273,87,9920,2025-08-31,Zandvoort2025,13.000000
1274,87,9912,2025-09-07,Monza2025,12.000000
1275,87,9904,2025-09-21,Baku2025,12.200000


In [29]:
test = DATAAGGREGATOR.get_racer_team_points(racer_team="driver").merge(
    DIMS.dim_driver_team(),
    on=["driver_number","session_key"]
)

time.sleep(10)


merged = test.merge(
    test,
    on=["key","team_name"],
    suffixes=["","_other"]
)

merged = merged[merged["driver_number"]!= merged["driver_number_other"]]

In [35]:
merged

Unnamed: 0,session_key,key,year,driver_number,date_end,points,points_gained,team_name,session_key_other,year_other,driver_number_other,date_end_other,points_other,points_gained_other
1,7953,Sakhir2023,2023,1,2023-03-05,25.0,25.0,Red Bull Racing,7953,2023,11,2023-03-05,18.0,18.0
3,7779,Jeddah2023,2023,1,2023-03-19,0.0,25.0,Red Bull Racing,7779,2023,11,2023-03-19,25.0,43.0
5,7787,Melbourne2023,2023,1,2023-04-02,25.0,50.0,Red Bull Racing,7787,2023,11,2023-04-02,11.0,54.0
7,9070,Baku2023,2023,1,2023-04-30,18.0,68.0,Red Bull Racing,9070,2023,11,2023-04-30,25.0,79.0
9,9078,Miami2023,2023,1,2023-05-07,26.0,94.0,Red Bull Racing,9078,2023,11,2023-05-07,18.0,97.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2541,9928,Budapest2025,2025,87,2025-08-03,0.0,6.0,Haas F1 Team,9928,2025,31,2025-08-03,0.0,23.0
2543,9920,Zandvoort2025,2025,87,2025-08-31,8.0,14.0,Haas F1 Team,9920,2025,31,2025-08-31,1.0,24.0
2545,9912,Monza2025,2025,87,2025-09-07,0.0,14.0,Haas F1 Team,9912,2025,31,2025-09-07,0.0,24.0
2547,9904,Baku2025,2025,87,2025-09-21,0.0,14.0,Haas F1 Team,9904,2025,31,2025-09-21,0.0,24.0


In [None]:
merged

Unnamed: 0,session_key,key,year,driver_number,date_end,points,points_gained,team_name,session_key_other,year_other,driver_number_other,date_end_other,points_other,points_gained_other
590,7953,Sakhir2023,2023,11,2023-03-05,18.0,18.0,Red Bull Racing,7953,2023,1,2023-03-05,25.0,25.0
592,7779,Jeddah2023,2023,11,2023-03-19,25.0,43.0,Red Bull Racing,7779,2023,1,2023-03-19,0.0,25.0
594,7787,Melbourne2023,2023,11,2023-04-02,11.0,54.0,Red Bull Racing,7787,2023,1,2023-04-02,25.0,50.0
596,9070,Baku2023,2023,11,2023-04-30,25.0,79.0,Red Bull Racing,9070,2023,1,2023-04-30,18.0,68.0
598,9078,Miami2023,2023,11,2023-05-07,18.0,97.0,Red Bull Racing,9078,2023,1,2023-05-07,26.0,94.0
600,9094,Monaco2023,2023,11,2023-05-28,0.0,97.0,Red Bull Racing,9094,2023,1,2023-05-28,25.0,119.0
602,9102,Barcelona2023,2023,11,2023-06-04,12.0,109.0,Red Bull Racing,9102,2023,1,2023-06-04,26.0,145.0
604,9110,Montréal2023,2023,11,2023-06-18,9.0,118.0,Red Bull Racing,9110,2023,1,2023-06-18,25.0,170.0
606,9118,Spielberg2023,2023,11,2023-07-02,15.0,133.0,Red Bull Racing,9118,2023,1,2023-07-02,26.0,196.0
608,9126,Silverstone2023,2023,11,2023-07-09,8.0,141.0,Red Bull Racing,9126,2023,1,2023-07-09,26.0,222.0
