# **VIRTUAL RACE ENGINEER**

In [107]:
import fastf1 as ff
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import fastf1.plotting
fastf1.plotting.setup_mpl()
import fastf1.api as fap
fastf1.Cache.enable_cache(r"D:\Prabhu\SEM 7\F1 Data")
import re
import datetime as dt
import fastf1.mvapi as famp


### LOADING SESSION

In [108]:
session = ff.get_session(2023,'Dutch','R')
session.load()

core           INFO 	Loading data for Dutch Grand Prix - Race [v3.3.7]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '14', '10', '11', '55', '44', '4', '23', '81', '31', '18', '27', '40', '77', '22', '20', '63', '24', '16', '2']


In [109]:
laps=session.laps
drivers=laps['Driver'].unique()

### FETCHING TELEMETRY DATA 

In [110]:
total_telemetry=[]
for i in drivers:
    drivers_laps=laps.pick_driver(i)
    driver_telemetry=drivers_laps.get_car_data()
    
    driver_telemetry['Driver']=i
    total_telemetry.append(driver_telemetry)
telemetry_data=pd.concat(total_telemetry,ignore_index=True)

### FETCHING WEATHER,TYRE,LAP AND POSITION DATA 

In [111]:
weather_data=pd.DataFrame(fap.weather_data(session.api_path))
tyre_data = laps[['Driver', 'LapNumber', 'Compound', 'FreshTyre','TyreLife']]
lap_data=laps[['Time','Driver','DriverNumber','LapNumber','LapTime','Sector1Time','Sector2Time','Sector3Time','IsAccurate']]
position_data=laps[['Driver','DriverNumber','LapNumber','Position','LapStartTime']]

req            INFO 	Using cached data for weather_data


### FETCHING RACE CONTROL MESSAGES

In [112]:
rcm_data = pd.DataFrame(fap.race_control_messages(session.api_path))
msg=rcm_data[['Time','Status','Message']]

#Filter for Yellow, Red, VSC, SC 
yellow_flags_key=['YELLOW','DOUBLE YELLOW']
red_flags_key=['RED']
vsc_key=['VSC','VIRTUAL SAFETY CAR']
sc_key=['SC','SAFETY CAR']

#Searching using the keyword in REGEX
yellow_flags = msg[msg['Message'].apply(lambda x: any(re.search(r'\b{}\b'.format(keyword), x) for keyword in yellow_flags_key))]
red_flags = msg[msg['Message'].apply(lambda x: any(re.search(r'\b{}\b'.format(keyword), x) for keyword in red_flags_key))]
vsc = msg[msg['Message'].apply(lambda x: any(re.search(r'\b{}\b'.format(keyword), x) for keyword in vsc_key))]
sc = msg[msg['Message'].apply(lambda x: any(re.search(r'\b{}\b'.format(keyword), x) for keyword in sc_key))]


req            INFO 	Using cached data for race_control_messages


### FETCHING LAP DATA (PITS STOPS, DRIVER POSITION ETC.)

In [113]:
lapsdata, streamdata = fap.timing_data(session.api_path)
# pd.set_option('display.max_columns', None)
lapsdata=pd.DataFrame(lapsdata)
lapsdata['DriverNumber'] = lapsdata['Driver']
gapdata=pd.DataFrame(streamdata)
gapdata['DriverNumber'] = gapdata['Driver']
gap_data = gapdata[['Time','DriverNumber','Position','GapToLeader','IntervalToPositionAhead']]
laps_data=lapsdata[['Time','DriverNumber','LapTime','NumberOfPitStops','PitOutTime','PitInTime']]

req            INFO 	Using cached data for _extended_timing_data


### GETTING CORNERS INFO

In [114]:
circuit_info=session.get_circuit_info()
corners_data=circuit_info.corners

### MAKING A MASTER DATA (POSITION DATA, POSITION DATA, LAPS DATA)
###### NOTE:WE ARE KEEPING THE GAP DATA, WEATHER DATA, RACE CONTROL MESSAGES AND THE TELEMETRY DATA SEPERATE BECAUSE EACH DATA IS GENERATED IN A COMPLETELY DIFFERENT TIME

In [115]:
master_data = lap_data.merge(position_data, on=['Driver', 'DriverNumber', 'LapNumber'], how='left')
master_data = master_data.merge(tyre_data, on=['Driver', 'LapNumber'], how='left')
master_data = master_data.merge(laps_data, on=['DriverNumber','Time','LapTime'], how='left')

#### FILLING THE MISSING LAPTIME VALUES WITH BACKWARD FILL

In [116]:
columns_to_fill=['LapTime','Sector1Time','Sector2Time','Sector3Time']
master_data[columns_to_fill]=master_data[columns_to_fill].fillna(method='bfill')
master_data

  master_data[columns_to_fill]=master_data[columns_to_fill].fillna(method='bfill')


Unnamed: 0,Time,Driver,DriverNumber,LapNumber,LapTime,Sector1Time,Sector2Time,Sector3Time,IsAccurate,Position,LapStartTime,Compound,FreshTyre,TyreLife,NumberOfPitStops,PitOutTime,PitInTime
0,0 days 01:03:36.820000,VER,1,1.0,0 days 00:01:31.585000,0 days 00:00:36.330000,0 days 00:00:28.929000,0 days 00:00:30.478000,False,1.0,0 days 01:02:04.960000,SOFT,True,1.0,,NaT,NaT
1,0 days 01:05:26.792000,VER,1,2.0,0 days 00:01:49.972000,0 days 00:00:36.330000,0 days 00:00:36.685000,0 days 00:00:36.957000,False,3.0,0 days 01:03:36.820000,SOFT,True,2.0,0.0,NaT,0 days 01:05:25.519000
2,0 days 01:07:15.450000,VER,1,3.0,0 days 00:01:48.658000,0 days 00:00:49.822000,0 days 00:00:30.905000,0 days 00:00:27.931000,False,5.0,0 days 01:05:26.792000,INTERMEDIATE,True,1.0,1.0,0 days 01:05:45.581000,NaT
3,0 days 01:08:40.701000,VER,1,4.0,0 days 00:01:25.251000,0 days 00:00:29.996000,0 days 00:00:29.479000,0 days 00:00:25.776000,True,4.0,0 days 01:07:15.450000,INTERMEDIATE,True,2.0,1.0,NaT,NaT
4,0 days 01:10:04.162000,VER,1,5.0,0 days 00:01:23.461000,0 days 00:00:28.478000,0 days 00:00:29.064000,0 days 00:00:25.919000,True,4.0,0 days 01:08:40.701000,INTERMEDIATE,True,3.0,1.0,NaT,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1338,0 days 03:20:55.371000,PIA,81,68.0,0 days 00:01:25.004000,0 days 00:00:29.890000,0 days 00:00:29.060000,0 days 00:00:26.054000,True,9.0,0 days 03:19:30.367000,INTERMEDIATE,False,9.0,5.0,NaT,NaT
1339,0 days 03:22:18.282000,PIA,81,69.0,0 days 00:01:22.911000,0 days 00:00:28.834000,0 days 00:00:28.632000,0 days 00:00:25.445000,True,9.0,0 days 03:20:55.371000,INTERMEDIATE,False,10.0,5.0,NaT,NaT
1340,0 days 03:23:40.941000,PIA,81,70.0,0 days 00:01:22.659000,0 days 00:00:28.662000,0 days 00:00:28.457000,0 days 00:00:25.540000,True,9.0,0 days 03:22:18.282000,INTERMEDIATE,False,11.0,5.0,NaT,NaT
1341,0 days 03:25:03.237000,PIA,81,71.0,0 days 00:01:22.296000,0 days 00:00:28.423000,0 days 00:00:28.419000,0 days 00:00:25.454000,True,9.0,0 days 03:23:40.941000,INTERMEDIATE,False,12.0,5.0,NaT,NaT


### CONVERTING TIME TO SECONDS

In [117]:
master_data['Sector1Time(s)']=master_data['Sector1Time'].dt.total_seconds()
master_data['Sector2Time(s)']=master_data['Sector2Time'].dt.total_seconds()
master_data['Sector3Time(s)']=master_data['Sector3Time'].dt.total_seconds()
master_data['Cum.SectorTime(s)']=master_data['Sector1Time(s)']+master_data['Sector2Time(s)']+master_data['Sector3Time(s)']
master_data['LapTime(s)']=master_data['LapTime'].dt.total_seconds()
master_data['LapStartTime(s)']=master_data['LapStartTime'].dt.total_seconds()
master_data['PitInTime(s)']=master_data['PitInTime'].dt.total_seconds()
master_data['PitOutTime(s)']=master_data['PitOutTime'].dt.total_seconds().shift(-1)

#### CALCULATING PITSTOP TIME AND LAP DELTA

In [118]:
master_data['PitStopTime(s)']=master_data['PitOutTime(s)']-master_data['PitInTime(s)']
master_data['PrevLapTime(s)']=master_data['LapTime(s)'].shift(1)
master_data['LapΔ(s)']=master_data['LapTime(s)']-master_data['PrevLapTime(s)']
master_data

Unnamed: 0,Time,Driver,DriverNumber,LapNumber,LapTime,Sector1Time,Sector2Time,Sector3Time,IsAccurate,Position,...,Sector2Time(s),Sector3Time(s),Cum.SectorTime(s),LapTime(s),LapStartTime(s),PitInTime(s),PitOutTime(s),PitStopTime(s),PrevLapTime(s),LapΔ(s)
0,0 days 01:03:36.820000,VER,1,1.0,0 days 00:01:31.585000,0 days 00:00:36.330000,0 days 00:00:28.929000,0 days 00:00:30.478000,False,1.0,...,28.929,30.478,95.737,91.585,3724.960,,,,,
1,0 days 01:05:26.792000,VER,1,2.0,0 days 00:01:49.972000,0 days 00:00:36.330000,0 days 00:00:36.685000,0 days 00:00:36.957000,False,3.0,...,36.685,36.957,109.972,109.972,3816.820,3925.519,3945.581,20.062,91.585,18.387
2,0 days 01:07:15.450000,VER,1,3.0,0 days 00:01:48.658000,0 days 00:00:49.822000,0 days 00:00:30.905000,0 days 00:00:27.931000,False,5.0,...,30.905,27.931,108.658,108.658,3926.792,,,,109.972,-1.314
3,0 days 01:08:40.701000,VER,1,4.0,0 days 00:01:25.251000,0 days 00:00:29.996000,0 days 00:00:29.479000,0 days 00:00:25.776000,True,4.0,...,29.479,25.776,85.251,85.251,4035.450,,,,108.658,-23.407
4,0 days 01:10:04.162000,VER,1,5.0,0 days 00:01:23.461000,0 days 00:00:28.478000,0 days 00:00:29.064000,0 days 00:00:25.919000,True,4.0,...,29.064,25.919,83.461,83.461,4120.701,,,,85.251,-1.790
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1338,0 days 03:20:55.371000,PIA,81,68.0,0 days 00:01:25.004000,0 days 00:00:29.890000,0 days 00:00:29.060000,0 days 00:00:26.054000,True,9.0,...,29.060,26.054,85.004,85.004,11970.367,,,,88.541,-3.537
1339,0 days 03:22:18.282000,PIA,81,69.0,0 days 00:01:22.911000,0 days 00:00:28.834000,0 days 00:00:28.632000,0 days 00:00:25.445000,True,9.0,...,28.632,25.445,82.911,82.911,12055.371,,,,85.004,-2.093
1340,0 days 03:23:40.941000,PIA,81,70.0,0 days 00:01:22.659000,0 days 00:00:28.662000,0 days 00:00:28.457000,0 days 00:00:25.540000,True,9.0,...,28.457,25.540,82.659,82.659,12138.282,,,,82.911,-0.252
1341,0 days 03:25:03.237000,PIA,81,71.0,0 days 00:01:22.296000,0 days 00:00:28.423000,0 days 00:00:28.419000,0 days 00:00:25.454000,True,9.0,...,28.419,25.454,82.296,82.296,12220.941,,,,82.659,-0.363


In [119]:
master_data=master_data.sort_values(by=['LapNumber','Position'])
master_data['CumLapTime(s)'] = master_data.groupby('Driver')['LapTime(s)'].cumsum()
master_data

Unnamed: 0,Time,Driver,DriverNumber,LapNumber,LapTime,Sector1Time,Sector2Time,Sector3Time,IsAccurate,Position,...,Sector3Time(s),Cum.SectorTime(s),LapTime(s),LapStartTime(s),PitInTime(s),PitOutTime(s),PitStopTime(s),PrevLapTime(s),LapΔ(s),CumLapTime(s)
0,0 days 01:03:36.820000,VER,1,1.0,0 days 00:01:31.585000,0 days 00:00:36.330000,0 days 00:00:28.929000,0 days 00:00:30.478000,False,1.0,...,30.478,95.737,91.585,3724.960,,,,,,91.585
839,0 days 01:03:37.462000,NOR,4,1.0,0 days 00:01:32.227000,0 days 00:00:37.353000,0 days 00:00:28.594000,0 days 00:00:30.567000,False,2.0,...,30.567,96.514,92.227,3724.960,,,,83.205,9.022,92.227
216,0 days 01:03:38.001000,ALO,14,1.0,0 days 00:01:32.766000,0 days 00:00:36.326000,0 days 00:00:28.764000,0 days 00:00:30.590000,False,3.0,...,30.590,95.680,92.766,3724.960,,,,81.699,11.067,92.766
1127,0 days 01:03:38.476000,RUS,63,1.0,0 days 00:01:33.241000,0 days 00:00:37.093000,0 days 00:00:28.820000,0 days 00:00:30.679000,False,4.0,...,30.679,96.592,93.241,3724.960,,,,82.652,10.589,93.241
560,0 days 01:03:39.511000,ALB,23,1.0,0 days 00:01:34.276000,0 days 00:00:36.857000,0 days 00:00:29.096000,0 days 00:00:31.301000,False,5.0,...,31.301,97.254,94.276,3724.960,,,,83.883,10.393,94.276
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
559,0 days 03:26:34.603000,TSU,22,72.0,0 days 00:01:23.883000,0 days 00:00:28.923000,0 days 00:00:28.789000,0 days 00:00:26.171000,True,13.0,...,26.171,83.883,83.883,12310.720,,,,83.101,0.782,6247.044
982,0 days 03:26:35.827000,LAW,40,72.0,0 days 00:01:23.592000,0 days 00:00:28.792000,0 days 00:00:28.745000,0 days 00:00:26.055000,True,14.0,...,26.055,83.592,83.592,12312.235,,,,84.122,-0.530,6261.330
487,0 days 03:26:36.084000,MAG,20,72.0,0 days 00:01:23.550000,0 days 00:00:29.009000,0 days 00:00:28.794000,0 days 00:00:25.747000,True,15.0,...,25.747,83.550,83.550,12312.534,,,,83.912,-0.362,6315.824
1270,0 days 03:26:37.061000,BOT,77,72.0,0 days 00:01:23.030000,0 days 00:00:28.882000,0 days 00:00:28.443000,0 days 00:00:25.705000,True,16.0,...,25.705,83.030,83.030,12314.031,,,,82.873,0.157,6255.933


#### DEFINING FUNCTION TO CALCULATE GAP TO LEADER AND INTERVAL AT THE END OF EACH LAP

In [120]:
def gap_and_interval(df):
    gap_to_leader=[]
    interval=[]
    for i in df['LapNumber'].unique():
        lap=df[df['LapNumber']==i]
        leader_time=lap.iloc[0]['CumLapTime(s)']
        lap.loc[:,'GapToLeader']=lap['CumLapTime(s)']-leader_time
        lap.loc[:,'Interval']=lap['CumLapTime(s)'].diff().fillna(0)
        gap_to_leader.extend(lap['GapToLeader'].tolist())
        interval.extend(lap['Interval'].tolist())
    return gap_to_leader,interval
master_data['GapToLeader'],master_data['Interval']=gap_and_interval(master_data)
master_data.loc[master_data['Position']==1,'GapToLeader']=0
master_data.loc[master_data['Position']==1,'Interval']=0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lap.loc[:,'GapToLeader']=lap['CumLapTime(s)']-leader_time
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lap.loc[:,'Interval']=lap['CumLapTime(s)'].diff().fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lap.loc[:,'GapToLeader']=lap['CumLapTime(s)']-leader_time
A value is trying to be set o

#### SCALING AND NORMALIZING THE DATA

In [121]:
from sklearn.preprocessing import RobustScaler
scaler=RobustScaler()
toscale=['LapTime(s)', 'Sector1Time(s)', 'Sector2Time(s)', 'Sector3Time(s)', 
                    'CumLapTime(s)', 'GapToLeader', 'Interval', 'LapStartTime(s)', 
                    'PitInTime(s)', 'PitOutTime(s)', 'PitStopTime(s)', 'PrevLapTime(s)', 'LapΔ(s)']
master_data[toscale] = scaler.fit_transform(master_data[toscale])
telemetry_columns_to_scale = ['Speed', 'Throttle', 'Brake', 'nGear', 'RPM', 'DRS']
telemetry_data[telemetry_columns_to_scale] = scaler.fit_transform(telemetry_data[telemetry_columns_to_scale])
weather_columns_to_scale = ['AirTemp', 'TrackTemp', 'Humidity', 'Pressure', 'WindSpeed', 'WindDirection','Rainfall']
weather_data[weather_columns_to_scale] = scaler.fit_transform(weather_data[weather_columns_to_scale])

#### FINDING MISSING DATA

In [129]:
master_missing=master_data.isnull().sum()
telemetry_missing=telemetry_data.isnull().sum()
weather_missing=weather_data.isnull().sum()
print(master_missing,'\n',telemetry_missing,'\n',weather_missing)

Time                 0
Driver               0
DriverNumber         0
LapNumber            0
LapTime              0
Sector1Time          0
Sector2Time          0
Sector3Time          0
IsAccurate           0
Position             2
LapStartTime         0
Compound             0
FreshTyre            0
TyreLife             0
NumberOfPitStops     0
PitOutTime           0
PitInTime            0
Sector1Time(s)       0
Sector2Time(s)       0
Sector3Time(s)       0
Cum.SectorTime(s)    0
LapTime(s)           0
LapStartTime(s)      0
PitInTime(s)         0
PitOutTime(s)        0
PitStopTime(s)       0
PrevLapTime(s)       0
LapΔ(s)              0
CumLapTime(s)        0
GapToLeader          0
Interval             0
dtype: int64 
 Date           0
RPM            0
Speed          0
nGear          0
Throttle       0
Brake          0
DRS            0
Source         0
Time           0
SessionTime    0
Driver         0
dtype: int64 
 Time             0
AirTemp          0
Humidity         0
Pressure     

#### FILLING MISSING PITSTOP DATA 

In [123]:
colums_to_fill_zero=['NumberOfPitStops','PitInTime','PitOutTime','PitInTime(s)','PitOutTime(s)','PitStopTime(s)','PrevLapTime(s)','LapΔ(s)']
master_data[colums_to_fill_zero]=master_data[colums_to_fill_zero].fillna(0)
master_data.isnull().sum()

Time                 0
Driver               0
DriverNumber         0
LapNumber            0
LapTime              0
Sector1Time          0
Sector2Time          0
Sector3Time          0
IsAccurate           0
Position             2
LapStartTime         0
Compound             0
FreshTyre            0
TyreLife             0
NumberOfPitStops     0
PitOutTime           0
PitInTime            0
Sector1Time(s)       0
Sector2Time(s)       0
Sector3Time(s)       0
Cum.SectorTime(s)    0
LapTime(s)           0
LapStartTime(s)      0
PitInTime(s)         0
PitOutTime(s)        0
PitStopTime(s)       0
PrevLapTime(s)       0
LapΔ(s)              0
CumLapTime(s)        0
GapToLeader          0
Interval             0
dtype: int64