In [50]:
import fastf1
import pandas as pd

In [60]:
YEARS = [2024]

acc = []

for year in YEARS:
  events = fastf1.events.get_event_schedule(year)

  for i, event in events.iterrows():
    if event.is_testing():
      continue

    if i not in range(1, 3):
      continue


    session = fastf1.get_session(year, event['RoundNumber'], 'Race')
    session.load(telemetry=True)

    for driver in session.drivers:
      df = session.laps.pick_drivers(driver).get_pos_data().add_driver_ahead()

      mask = (df['DriverAhead'].shift() != df['DriverAhead']) & \
            (df['DriverAhead'].notna()) & \
            (df['DriverAhead'].shift().notna() & \
            df['DistanceToDriverAhead'].shift() < 100)

      first_row_mask = df.index == 0
      if len(df) > 0 and pd.notna(df.loc[0, 'DriverAhead']):
          mask = mask | first_row_mask

      filtered_df = df[mask]

      filtered_df['DriverId'] = driver
      filtered_df['DriverName'] = session.get_driver(driver).FullName
      filtered_df['RoundNumber'] = event['RoundNumber']
      filtered_df['Country'] = event['Country']
      filtered_df['Location'] = event['Location']
      filtered_df['OfficialEventName'] = event['OfficialEventName']
      # filtered_df['EventDate'] = event['EventDate']
      filtered_df['EventName'] = event['EventName']
      filtered_df['EventFormat'] = event['EventFormat']
      
      acc.append(filtered_df)

df = pd.concat(acc, ignore_index=True)
df


core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '63', '4', '44', '81', '14', '18', '24', '20', '3', '22', '23', '27', '31', '10', '77', '2']
A value is trying to be set on a copy of a slice from a DataFr

Unnamed: 0,Date,Status,X,Y,Z,Source,Time,SessionTime,DriverAhead,DistanceToDriverAhead,DriverId,DriverName,RoundNumber,Country,Location,OfficialEventName,EventName,EventFormat
0,2024-03-02 15:03:42.460,OnTrack,-280.0,3550.0,-157.0,pos,0 days 00:00:00.118000,0 days 01:00:00.029000,,0.000000,1,Max Verstappen,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,Bahrain Grand Prix,conventional
1,2024-03-02 15:03:43.040,OnTrack,-280.0,3558.0,-157.0,pos,0 days 00:00:00.698000,0 days 01:00:00.609000,63,0.000000,1,Max Verstappen,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,Bahrain Grand Prix,conventional
2,2024-03-02 15:03:43.460,OnTrack,-280.0,3574.0,-157.0,pos,0 days 00:00:01.118000,0 days 01:00:01.029000,4,0.000000,1,Max Verstappen,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,Bahrain Grand Prix,conventional
3,2024-03-02 15:03:44.779,OnTrack,-273.0,3744.0,-158.0,pos,0 days 00:00:02.437000,0 days 01:00:02.348000,14,0.077743,1,Max Verstappen,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,Bahrain Grand Prix,conventional
4,2024-03-02 15:03:45.119,OnTrack,-270.0,3824.0,-159.0,pos,0 days 00:00:02.777000,0 days 01:00:02.688000,44,0.222222,1,Max Verstappen,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,Bahrain Grand Prix,conventional
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24928,2024-03-09 17:05:38.811,OnTrack,-2968.0,-3512.0,114.0,pos,0 days 00:01:39.755000,0 days 01:01:23.591000,24,521.019722,10,Pierre Gasly,2,Saudi Arabia,Jeddah,FORMULA 1 STC SAUDI ARABIAN GRAND PRIX 2024,Saudi Arabian Grand Prix,conventional
24929,2024-03-09 17:05:49.751,OnTrack,-203.0,-4050.0,117.0,pos,0 days 00:01:50.695000,0 days 01:01:34.531000,18,852.255278,10,Pierre Gasly,2,Saudi Arabia,Jeddah,FORMULA 1 STC SAUDI ARABIAN GRAND PRIX 2024,Saudi Arabian Grand Prix,conventional
24930,2024-03-09 17:05:53.891,OnTrack,-665.0,-1966.0,122.0,pos,0 days 00:01:54.835000,0 days 01:01:38.671000,3,744.796665,10,Pierre Gasly,2,Saudi Arabia,Jeddah,FORMULA 1 STC SAUDI ARABIAN GRAND PRIX 2024,Saudi Arabian Grand Prix,conventional
24931,2024-03-09 17:05:56.131,OnTrack,-990.0,-1244.0,122.0,pos,0 days 00:01:57.075000,0 days 01:01:40.911000,2,721.242500,10,Pierre Gasly,2,Saudi Arabia,Jeddah,FORMULA 1 STC SAUDI ARABIAN GRAND PRIX 2024,Saudi Arabian Grand Prix,conventional


In [52]:
print(df.to_json())

{"Date":{"0":1709391822460,"1":1709391823040,"2":1709391823460,"3":1709391824779,"4":1709391825119,"5":1709391825699,"6":1709391826899,"7":1709391828139,"8":1709391829520,"9":1709391829960,"10":1709391830639,"11":1709391830999,"12":1709391831179,"13":1709391831439,"14":1709391832099,"15":1709391832299,"16":1709391832679,"17":1709391833059,"18":1709391833259,"19":1709391833599,"20":1709391834979,"21":1709391836759,"22":1709391838759,"23":1709391839399,"24":1709391839999,"25":1709391840419,"26":1709391848880,"27":1709391849740,"28":1709391850379,"29":1709391850979,"30":1709391851899,"31":1709391852159,"32":1709391852419,"33":1709391852859,"34":1709391864439,"35":1709391865079,"36":1709391865419,"37":1709391865679,"38":1709391866219,"39":1709391866479,"40":1709391866659,"41":1709391866879,"42":1709391867979,"43":1709391876299,"44":1709391876999,"45":1709391930499,"46":1709391932799,"47":1709392124319,"48":1709392125859,"49":1709393511179,"50":1709393513079,"51":1709391822460,"52":17093918

In [None]:
import pandas as pd
import numpy as np

# Create a sample DataFrame with DistanceToDriverAhead column
data = {
    'Time': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'DriverAhead': ['Hamilton', 'Hamilton', 'Verstappen', 'Verstappen', None, 
                   'Leclerc', 'Leclerc', None, 'Sainz', 'Sainz'],
    'DistanceToDriverAhead': [300, 250, 600, 550, np.nan, 400, 350, np.nan, 700, 650],
    'OtherData': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
}
df = pd.DataFrame(data)

# Display the original DataFrame
# print("Original DataFrame:")
# print(df)
# print("\n")

# Create a mask with all our conditions:
# 1. DriverAhead changes from previous row
# 2. Current row has a driver ahead (not NaN)
# 3. Previous row had a driver ahead (not NaN)
# 4. Previous row had DistanceToDriverAhead < 500
mask = (df['DriverAhead'].shift() != df['DriverAhead']) & \
       (df['DriverAhead'].notna()) & \
       (df['DriverAhead'].shift().notna()) & \
       (df['DistanceToDriverAhead'].shift() < 500)

# Include the first row if it has a driver ahead
first_row_mask = df.index == 0
if len(df) > 0 and pd.notna(df.loc[0, 'DriverAhead']):
    mask = mask | first_row_mask

# Filter the DataFrame
filtered_df = df[mask]
filtered_df = filtered_df.reset_index(drop=True)

# Display the filtered DataFrame
# print("Filtered DataFrame (only qualifying driver-to-driver transitions):")
print(filtered_df)