In [2]:
%pip install fastf1
import fastf1
import pandas as pd

Note: you may need to restart the kernel to use updated packages.


In [10]:
def get_silverstone_results(start_year, end_year):
    all_results = []
    for year in range(start_year, end_year + 1):
        try:
            session = fastf1.get_session(year, 'Silverstone', 'R')
            session.load()
            results = session.results
            results['Year'] = year
            all_results.append(results)
            results['Position'] = results['Position'].astype(int)
        except Exception as e:
            print(f"Could not load data for year {year}: {e}")
    results_df = pd.concat(all_results, ignore_index=True)
    return results_df[(results_df['Status'] == 'Finished')]

    

In [11]:
def pitstop_analysis(start_year, end_year):
    pitstop_data = []
    for year in range(start_year, end_year + 1):
        try:
            session = fastf1.get_session(year, 'Silverstone', 'R')
            session.load()
            lap_data = session.laps
            lap_data[lap_data['PitInTime'].notna()] 
            #lap_data[unique]
            lap_data['PitStopCount'] = lap_data.groupby('Driver')['PitInTime'].transform('count')
            lap_data = lap_data[lap_data['PitStopCount'] > 0]
            lap_data['AveragePitStopDuration'] = lap_data.groupby('Driver')['PitInTime'].transform('mean')
            lap_data['Year'] = year
            lap_data = lap_data.rename(columns={'Driver': 'Abbreviation'})
            lap_data[['Abbreviation', 'PitStopCount', 'AveragePitStopDuration']].drop_duplicates()      
            pitstop_data.append(lap_data[['Abbreviation', 'PitStopCount', 'AveragePitStopDuration', 'Year']].drop_duplicates())
        except Exception as e:
            print(f"Could not load data for year {year}: {e}")
    pitstop_df = pd.concat(pitstop_data, ignore_index=True)
    return pitstop_df

In [13]:
df_pitstop = pitstop_analysis(2018, 2024)
df_general = get_silverstone_results(2018, 2024)

core           INFO 	Loading data for British Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['5', '44', '7', '77', '3', '27', '31', '14', '20', '11', '2', '18', '10', '35', '33', '8', '55', '9', '16', '28']
core           INFO 	Loading data for British Grand Prix - Race 

In [14]:
final_df = pd.merge(df_general, df_pitstop, on=['Abbreviation', 'Year'], how='left')
final_df

Unnamed: 0,DriverNumber,BroadcastName,Abbreviation,DriverId,TeamName,TeamColor,TeamId,FirstName,LastName,FullName,...,Q1,Q2,Q3,Time,Status,Points,Laps,Year,PitStopCount,AveragePitStopDuration
0,5,S VETTEL,VET,vettel,Ferrari,DC0000,ferrari,Sebastian,Vettel,Sebastian Vettel,...,NaT,NaT,NaT,0 days 01:27:29.784000,Finished,25.0,52.0,2018,2,0 days 00:48:50.431500
1,44,L HAMILTON,HAM,hamilton,Mercedes,00D2BE,mercedes,Lewis,Hamilton,Lewis Hamilton,...,NaT,NaT,NaT,0 days 00:00:02.264000,Finished,18.0,52.0,2018,1,0 days 00:46:37.656000
2,7,K RAIKKONEN,RAI,raikkonen,Ferrari,DC0000,ferrari,Kimi,Räikkönen,Kimi Räikkönen,...,NaT,NaT,NaT,0 days 00:00:03.652000,Finished,15.0,52.0,2018,2,0 days 00:43:39.819500
3,77,V BOTTAS,BOT,bottas,Mercedes,00D2BE,mercedes,Valtteri,Bottas,Valtteri Bottas,...,NaT,NaT,NaT,0 days 00:00:08.883000,Finished,12.0,52.0,2018,1,0 days 00:40:03.335000
4,3,D RICCIARDO,RIC,ricciardo,Red Bull Racing,1E41FF,red_bull,Daniel,Ricciardo,Daniel Ricciardo,...,NaT,NaT,NaT,0 days 00:00:09.500000,Finished,10.0,52.0,2018,2,0 days 00:45:00.863500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,14,F ALONSO,ALO,alonso,Aston Martin,229971,aston_martin,Fernando,Alonso,Fernando Alonso,...,NaT,NaT,NaT,0 days 00:01:03.577000,Finished,4.0,52.0,2024,2,0 days 01:50:30.759000
91,23,A ALBON,ALB,albon,Williams,64C4FF,williams,Alexander,Albon,Alexander Albon,...,NaT,NaT,NaT,0 days 00:01:08.387000,Finished,2.0,52.0,2024,2,0 days 01:50:35.682500
92,22,Y TSUNODA,TSU,tsunoda,RB,6692FF,rb,Yuki,Tsunoda,Yuki Tsunoda,...,NaT,NaT,NaT,0 days 00:01:19.303000,Finished,1.0,52.0,2024,2,0 days 01:50:33.752500
93,2,L SARGEANT,SAR,sargeant,Williams,64C4FF,williams,Logan,Sargeant,Logan Sargeant,...,NaT,NaT,NaT,0 days 00:01:28.960000,Finished,0.0,52.0,2024,2,0 days 01:50:46.114000


In [37]:
df_analysis = final_df[['Position','GridPosition','Time','PitStopCount','AveragePitStopDuration']].copy()
df_analysis['Time'] = pd.to_timedelta(df_analysis['Time']).dt.total_seconds()
df_analysis['AveragePitStopDuration'] = pd.to_timedelta(df_analysis['AveragePitStopDuration']).dt.total_seconds()
#add a delta position change in grid position vs final position
df_analysis['PositionDelta'] = df_analysis['GridPosition'] - df_analysis['Position']
df_analysis['PositionBin'] = pd.cut(df_analysis['Position'], bins=[-1, 3, 10, 20], labels=['T-3', 'T4-T10', 'Other'])
df_analysis = df_analysis.drop(columns=['Position'])
df_analysis

Unnamed: 0,GridPosition,Time,PitStopCount,AveragePitStopDuration,PositionDelta,PositionBin
0,2.0,5249.784,2,2930.4315,1.0,T-3
1,1.0,2.264,1,2797.6560,-1.0,T-3
2,3.0,3.652,2,2619.8195,0.0,T-3
3,4.0,8.883,1,2403.3350,0.0,T4-T10
4,6.0,9.500,2,2700.8635,1.0,T4-T10
...,...,...,...,...,...,...
90,10.0,63.577,2,6630.7590,2.0,T4-T10
91,9.0,68.387,2,6635.6825,0.0,T4-T10
92,13.0,79.303,2,6633.7525,3.0,T4-T10
93,12.0,88.960,2,6646.1140,1.0,Other


In [41]:
#run random forest classifier to predict position
%pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

# Prepare features and target
X = df_analysis[['GridPosition', 'Time', 'PitStopCount', 'AveragePitStopDuration']]
y = df_analysis['PositionBin']

# Drop rows with missing values
X = X.dropna()
y = y.loc[X.index]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit Random Forest
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


Note: you may need to restart the kernel to use updated packages.
              precision    recall  f1-score   support

       Other       1.00      0.60      0.75         5
         T-3       1.00      0.80      0.89         5
      T4-T10       0.75      1.00      0.86         9

    accuracy                           0.84        19
   macro avg       0.92      0.80      0.83        19
weighted avg       0.88      0.84      0.84        19

Accuracy: 0.84
