**Data Preprocessing, Cleaning and Merging**
---

---
**Import necessary libraries and datasets**

In [4]:
import numpy as np
import pandas as pd 
circuits = pd.read_csv('circuits_pre.csv')
con_specs = pd.read_csv('constructors_specs_pre.csv')
con_stats = pd.read_csv('constructors_stats_pre.csv')
drivers = pd.read_csv('drivers_pre.csv')
quali = pd.read_csv('quali_results_pre.csv')
weather = pd.read_csv('weather_pre.csv')
races2 = pd.read_csv('race_results_pre.csv')
salaries = pd.read_csv('salaries.csv')

---
**Constructors dataframes preprocessing**


In [303]:
'''
In this part of the code, column names and some instances names are adjusted  
to be consistent in all data frames. 
'''
circuits.rename(columns={'country': 'circuit_country'}, inplace=True)
circuits['circuit_country'] = circuits['circuit_country'].replace('UK', 'United Kingdom')


con_specs['Valves'] = con_specs.groupby('season')['Valves'].transform(lambda x: x.fillna(x.median()))
con_specs['Valves'] = con_specs['Valves'].astype(int)
nationality_to_country = {'Spanish': 'Spain', 'Brazilian': 'Brazil', 'Swiss': 'Switzerland', 'British': 'United Kingdom', 
                          'Indian': 'India', 'German': 'Germany', 'Austrian': 'Austria', 'Japanese': 'Japan', 
                          'Finnish': 'Finland', 'Polish': 'Poland', 'Italian': 'Italy', 'Russian': 'Russia',
                          'Australian': 'Australia', 'Belgian': 'Belgium', 'Venezuelan': 'Venezuela', 
                          'Mexican': 'Mexico', 'French': 'France','Dutch': 'Netherlands', 'Swedish': 'Sweeden', 
                          'Danish': 'Denmark', 'American': 'USA', 'Indonesian': 'Indonesia','New Zealander': 'New Zealand',
                          'Canadian': 'Canada', 'Monegasque': 'Monaco', 'Thai': 'Thailand', 'Chinese': 'China',
                          'Malaysian': 'Malaysia'}
con_specs['Country of Origin'] = con_specs['nationality'].map(nationality_to_country)
con_specs.drop('nationality', axis=1, inplace=True)
con_specs.rename(columns={'Country of Origin': 'constructor_country_of_origin',
                          'pole_positions': 'consturctor_pole_positions', 'Points': 'constructor_points'}, inplace = True)
con_stats[['wins', 'pole_positions', 'Constructor_Championships', 'Driver_Championships']] = con_stats[['wins', 'pole_positions', 'Constructor_Championships', 'Driver_Championships']].astype(int)
nationality_to_country = {'Spanish': 'Spain', 'Brazilian': 'Brazil', 'Swiss': 'Switzerland', 'British': 'United Kingdom', 
                          'Indian': 'India', 'German': 'Germany', 'Austrian': 'Austria', 'Japanese': 'Japan', 
                          'Finnish': 'Finland', 'Polish': 'Poland', 'Italian': 'Italy', 'Russian': 'Russia',
                          'Australian': 'Australia', 'Belgian': 'Belgium', 'Venezuelan': 'Venezuela', 
                          'Mexican': 'Mexico', 'French': 'France','Dutch': 'Netherlands', 'Swedish': 'Sweeden', 
                          'Danish': 'Denmark', 'American': 'USA', 'Indonesian': 'Indonesia','New Zealander': 'New Zealand',
                          'Canadian': 'Canada', 'Monegasque': 'Monaco', 'Thai': 'Thailand', 'Chinese': 'China',
                          'Malaysian': 'Malaysia'}

con_stats['Country of Origin'] = con_stats['nationality'].map(nationality_to_country)

con_stats.rename(columns={'wins': 'constructor_wins', 
                          'nationality': 'constructor_nationality', 'pole_positions': 'consturctor_pole_positions', 
                          'Points': 'constructor_points', 
                          'Country of Origin': 'constructor_country_of_origin'}, inplace = True)
con_stats.drop('constructor_nationality', axis=1, inplace=True)

---
**Drivers dataframe preprocessing**

In [305]:
'''
In this part of the code, missing values for weight and height are imputed using their median values. Moreover,
column names and some instances names are adjusted to be consistent in all data frames, as above.
'''


mean_height = drivers['height (cm)'].mean()
mean_weight = drivers['weight (kg)'].mean()

drivers['height (cm)'].fillna(mean_height, inplace=True)
drivers['weight (kg)'].fillna(mean_weight, inplace=True)

drivers['height (cm)'] = drivers['height (cm)'].astype(int)
drivers['weight (kg)'] = drivers['weight (kg)'].astype(int)

nationality_to_country = {'Spanish': 'Spain', 'Brazilian': 'Brazil', 'Swiss': 'Switzerland', 'British': 'United Kingdom', 
                          'Indian': 'India', 'German': 'Germany', 'Austrian': 'Austria', 'Japanese': 'Japan', 
                          'Finnish': 'Finland', 'Polish': 'Poland', 'Italian': 'Italy', 'Russian': 'Russia',
                          'Australian': 'Australia', 'Belgian': 'Belgium', 'Venezuelan': 'Venezuela', 
                          'Mexican': 'Mexico', 'French': 'France','Dutch': 'Netherlands', 'Swedish': 'Sweeden', 
                          'Danish': 'Denmark', 'American': 'USA', 'Indonesian': 'Indonesia','New Zealander': 'New Zealand',
                          'Canadian': 'Canada', 'Monegasque': 'Monaco', 'Thai': 'Thailand', 'Chinese': 'China'}

drivers['Country of Origin'] = drivers['nationality'].map(nationality_to_country)

drivers.drop('nationality', axis = 1, inplace = True)

drivers.rename(columns={'wins': 'driver_wins', 'poles': 'driver_pole_positions', 'career_points': 'driver_career_points',
                        'Country of Origin': 'driver_country_of_origin', 'championships': 'driver_championships'}, inplace=True)



---
**Qualifying results dataframe preprocessing**

In [306]:
'''
In this part of the code, qualifying lap times format is changed to be interpreted in a common format across
all instances. Instances that had missing lap times were imputed.
'''
quali.rename(columns={'Season': 'season'}, inplace=True)
def convert_time_to_millis(time_str):
    minutes, seconds = time_str.split(":")
    seconds, milliseconds = seconds.split(".")
    total_millis = int(minutes)*60000 + int(seconds)*1000 + int(milliseconds)
    return total_millis

def convert_millis_to_time(millis):
    minutes = millis // 60000
    seconds = (millis % 60000) // 1000
    milliseconds = millis % 1000
    return f"{minutes}:{seconds}.{milliseconds}"


nan_indices = quali[quali['time'].isna()].index

for idx in nan_indices:
    if idx > 0:
        prev_time = quali.loc[idx - 1, 'time']
        prev_time_millis = convert_time_to_millis(prev_time)
        new_time_millis = prev_time_millis + 1
        new_time = convert_millis_to_time(new_time_millis)
        quali.loc[idx, 'time'] = new_time
        
        


---
**Race results dataframe preprocessing**

In [307]:
'''
In this part of the code, round of each grand prix is added as a column to the dataframe, 
with some manual modifications to address the issues with races that happened in the same circuit in one season,
seen in 2020 and 2021 due to Covid-19 pandemic implications. Furthermore, 'Points before Grand Prix' column is 
generated, which is a variable capturing the accumulated points of a driver throughout each single 
season. Drivers which were not classified due to not finishing the race, were addressed by incrementing the rank,
in the order that the data is acquired. A new variable named 'Finished' is also created to split the 
Time/Retired column, to have consistent data types within each variable. Some further names fixes for consistency 
are also implemented.
'''

df = races2.copy()
df.drop(['constructor_id', 'driver_id', 'Laps', 'Race time', 'Points', 'Rank'], axis = 1, inplace = True)
df.drop_duplicates(inplace =True)
df.reset_index(drop=True, inplace = True)
df

df['round'] = df.groupby('Season').cumcount() + 1
df = df[['Season', 'circuit_id', 'round'] + [col for col in df.columns if col not in ['Season', 'circuit_id', 'round']]]

rounds = df
races3 = races2.merge(df, on=['Season', 'circuit_id'])

races3.loc[4309:4328, 'round'] = 2
races3.loc[4329:4348, 'round'] = 3
races3.loc[4349:4368, 'round'] = 4
races3.loc[4369:4388, 'round'] = 5
races3.loc[4389:4408, 'round'] = 6
races3.loc[4409:4428, 'round'] = 7
races3.loc[4429:4448, 'round'] = 8
races3.loc[4449:4468, 'round'] = 9
races3.loc[4469:4488, 'round'] = 10
races3.loc[4489:4508, 'round'] = 11
races3.loc[4509:4528, 'round'] = 12
races3.loc[4509:4528, 'round'] = 12
races3.loc[4529:4548, 'round'] = 13
races3.loc[4509:4528, 'round'] = 12
races3.loc[4549:4568, 'round'] = 14
races3.loc[4509:4528, 'round'] = 12
races3.loc[4569:4588, 'round'] = 15
races3.loc[4509:4528, 'round'] = 12
races3.loc[4589:4608, 'round'] = 16
races3.loc[4609:4628, 'round'] = 17
races3.loc[4789:4808, 'round'] = 9
races3.loc[4809:4828, 'round'] = 10
races3.loc[4829:4848, 'round'] = 11
races3.loc[4849:4868, 'round'] = 12
races3.loc[4869:4888, 'round'] = 13
races3.loc[4889:4908, 'round'] = 14
races3.loc[4909:4928, 'round'] = 15
races3.loc[4929:4948, 'round'] = 16
races3.loc[4949:4968, 'round'] = 17
races3.loc[4969:4988, 'round'] = 18
races3.loc[4989:5008, 'round'] = 19
races3.loc[5009:5028, 'round'] = 20
races3.loc[5029:5048, 'round'] = 21
races3.loc[5049:5068, 'round'] = 22

races3.sort_values(by=['Season', 'round', 'circuit_id'], inplace=True)

races3['Points'] = races3['Points'].astype(float)
races3.sort_values(by=['Season', 'driver_id', 'round'], inplace=True)
races3['Points Before GP'] = races3.groupby(['Season', 'driver_id'])['Points'].cumsum() - races3['Points']

races3.rename(columns={'Season': 'season'}, inplace=True)
races3['Rank'] = pd.to_numeric(races3['Rank'], errors='coerce')
races3 = races3.sort_values(by=['season', 'round', 'Rank'])
races3 = races3.reset_index(drop=True)

races3 = races3.sort_values(['season', 'round', 'circuit_id'])

def fill_ranks(group):
    rank_counter = 1
    for i in group.index:
        if pd.isna(group.loc[i, 'Rank']):
            group.loc[i, 'Rank'] = group.loc[i-1, 'Rank'] + 1 if i-1 in group.index else rank_counter
        else:
            rank_counter = group.loc[i, 'Rank'] + 1
    return group

races3 = races3.groupby(['season', 'round', 'circuit_id']).apply(fill_ranks)



races3['Rank'] = races3['Rank'].astype(int)
races3['Laps'] = races3['Laps'].fillna(0)
races3['Laps'] = races3['Laps'].astype(int)

races3['Finished'] = races3['Race time'].apply(lambda x: 0 if x in ['DNF', 'DNS'] else 1)
races3.drop('Race time', axis = 1, inplace = True)

conditions = (races3['season'] == 2010) & ((races3['driver_id'] == 'vettel') | (races3['driver_id'] == 'webber'))

races3.loc[conditions, 'constructor_id'] = 'red_bull'

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  races3 = races3.groupby(['season', 'round', 'circuit_id']).apply(fill_ranks)


In [308]:
races3

Unnamed: 0,season,circuit_id,constructor_id,driver_id,Laps,Points,Rank,round,Points Before GP,Finished
0,2010,bahrain,ferrari,alonso,49,25.0,1,1,0.0,1
1,2010,bahrain,ferrari,massa,49,18.0,2,1,0.0,1
2,2010,bahrain,mclaren,hamilton,49,15.0,3,1,0.0,1
3,2010,bahrain,red_bull,vettel,49,12.0,4,1,0.0,1
4,2010,bahrain,mercedes,rosberg,49,10.0,5,1,0.0,1
...,...,...,...,...,...,...,...,...,...,...
5504,2022,yas_marina,haas,mick_schumacher,57,0.0,16,22,12.0,1
5505,2022,yas_marina,haas,kevin_magnussen,57,0.0,17,22,21.0,1
5506,2022,yas_marina,mercedes,hamilton,55,0.0,18,22,233.0,0
5507,2022,yas_marina,williams,latifi,55,0.0,19,22,2.0,0


---
**Further Modifications, that were identified after the initial cleaning**

In [309]:
'''
In this part of the code, some further modifications to columns not needed for analysis regarding drivers, constructors and qualifying
dataframes. Some further names modifications are implemented as well.
'''

drivers.drop(['driver_championships', 'driver_career_points'], axis = 1, inplace = True)
con_stats.drop(['constructor_points', 'Constructor_Championships', 'Driver_Championships'],
               axis = 1, inplace = True)

conditions = (quali['season'] == 2010) & ((quali['driver_id'] == 'vettel') | (quali['driver_id'] == 'webber'))

quali.loc[conditions, 'constructor_id'] = 'red_bull'

quali.loc[quali['driver_id'] == 'chandhock', 'driver_id'] = 'chandhok'




In [311]:
quali2 = quali[['season', 'circuit_id']]

quali2.drop_duplicates(inplace = True)


quali2.loc[:, 'round'] = quali2.groupby('season').cumcount() + 1
quali = pd.merge(quali, quali2, on=['season', 'circuit_id'], how='left')



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  quali2.drop_duplicates(inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  quali2.loc[:, 'round'] = quali2.groupby('season').cumcount() + 1


In [313]:
quali.loc[4270:4288, 'round'] = 2
quali.loc[4289:4308, 'round'] = 3
quali.loc[4309:4328, 'round'] = 4
quali.loc[4329:4348, 'round'] = 5
quali.loc[4349:4368, 'round'] = 6
quali.loc[4369:4388, 'round'] = 7
quali.loc[4389:4408, 'round'] = 8
quali.loc[4409:4428, 'round'] = 9
quali.loc[4429:4448, 'round'] = 10
quali.loc[4449:4468, 'round'] = 11
quali.loc[4469:4488, 'round'] = 12
quali.loc[4489:4508, 'round'] = 13
quali.loc[4509:4528, 'round'] = 14
quali.loc[4529:4548, 'round'] = 15
quali.loc[4549:4568, 'round'] = 16
quali.loc[4569:4588, 'round'] = 17

quali.loc[4748:4767, 'round'] = 9
quali.loc[4768:4787, 'round'] = 10
quali.loc[4788:4807, 'round'] = 11
quali.loc[4808:4827, 'round'] = 12
quali.loc[4828:4845, 'round'] = 13
quali.loc[4846:4864, 'round'] = 14
quali.loc[4865:4884, 'round'] = 15
quali.loc[4885:4904, 'round'] = 16
quali.loc[4905:4924, 'round'] = 17
quali.loc[4925:4944, 'round'] = 18
quali.loc[4945:4964, 'round'] = 19
quali.loc[4965:4984, 'round'] = 20
quali.loc[4985:5004, 'round'] = 21
quali.loc[5005:5023, 'round'] = 22

con_specs.loc[(con_specs['season'] == 2011) & (con_specs['constructor_id'] == 'lotus_racing'), 'constructor_id'] = 'lotus_f1'
con_specs.loc[(con_specs['season'] == 2015) & (con_specs['constructor_id'] == 'manor'), 'constructor_id'] = 'marussia'
con_specs.loc[con_specs['constructor_id'] == 'lotus_f1', 'constructor_country_of_origin'] = 'United Kingdom'
con_specs.loc[(con_specs['constructor_id'] == 'marussia') & (con_specs['season'] == 2015), 'constructor_country_of_origin'] = 'Russia'
con_specs.loc[con_specs['constructor_id'] == 'manor', 'constructor_country_of_origin'] = 'United Kingdom'
con_stats.loc[con_specs['constructor_id'] == 'ferrari', 'constructor_wins'] = 210
con_stats.loc[con_specs['constructor_id'] == 'ferrari', 'consturctor_pole_positions'] = 203

quali

Unnamed: 0,season,circuit_id,constructor_id,driver_id,time,grid,round
0,2010,bahrain,red_bull,vettel,1:54.101,1,1
1,2010,bahrain,ferrari,massa,1:54.242,2,1
2,2010,bahrain,ferrari,alonso,1:54.608,3,1
3,2010,bahrain,mclaren,hamilton,1:55.217,4,1
4,2010,bahrain,mercedes,rosberg,1:55.241,5,1
...,...,...,...,...,...,...,...
5455,2022,yas_marina,haas,kevin_magnussen,1:25.834,16,22
5456,2022,yas_marina,alphatauri,gasly,1:25.859,17,22
5457,2022,yas_marina,alfa,bottas,1:25.892,18,22
5458,2022,yas_marina,williams,albon,1:26.028,19,22


---
**Dataframes merging**

In [314]:
'''
In this section of the code all dataframes are merged on common variables, to construct a complete
dataset, for our analysis purposes. Whenever needed, further modificartions were implemented.
'''
mdf1 = pd.merge(races3, quali, on=['season',  'constructor_id', 'circuit_id', 'driver_id', 'round'], how = 'left')

mdf2 = pd.merge(mdf1, con_stats, on='constructor_id', how='left')

constructor_wins_dict = con_stats.set_index('constructor_id')['constructor_wins'].to_dict()
constructor_poles_dict = con_stats.set_index('constructor_id')['consturctor_pole_positions'].to_dict()

mdf2['updated_wins'] = 0
mdf2['updated_poles'] = 0

for idx, row in mdf2.iterrows():
    constructor_id = row['constructor_id']
    
    if row['Rank'] == 1:
        constructor_wins_dict[constructor_id] += 1
    if row['grid'] == 1:
        constructor_poles_dict[constructor_id] += 1
        
    mdf2.at[idx, 'updated_wins'] = constructor_wins_dict[constructor_id]
    mdf2.at[idx, 'updated_poles'] = constructor_poles_dict[constructor_id]

mdf3 = pd.merge(mdf2, drivers, on='driver_id', how='left')

mdf3.drop(['constructor_wins', 'consturctor_pole_positions'], axis = 1, inplace = True)
mdf3.loc[(weather['season'] == 2011) & (mdf3['round'] == 10), 'circuit_id'] = 'nurburgring'
mdf3.loc[(weather['season'] == 2013) & (mdf3['round'] == 9), 'circuit_id'] = 'nurburgring'
drivers_wins_dict = drivers.set_index('driver_id')['driver_wins'].to_dict()
drivers_poles_dict = drivers.set_index('driver_id')['driver_pole_positions'].to_dict()

mdf3['updated_driver_wins'] = 0
mdf3['updated_driver_poles'] = 0

for idx, row in mdf3.iterrows():
    driver_id = row['driver_id']
    
    if row['Rank'] == 1:
        drivers_wins_dict[driver_id] += 1
    if row['grid'] == 1:
        drivers_poles_dict[driver_id] += 1
    
    mdf3.at[idx, 'updated_driver_wins'] = drivers_wins_dict[driver_id]
    mdf3.at[idx, 'updated_driver_poles'] = drivers_poles_dict[driver_id]

mdf3.drop(['driver_wins', 'driver_pole_positions'], axis = 1, inplace = True)
mdf3
mdf4 = pd.merge(mdf3, con_specs, on=['constructor_id', 'season', 'constructor_country_of_origin'], how='left')

mdf4['driver_age'] = mdf4['season'] - mdf4['year_of_birth']

mdf4['circuit_id'] = mdf4['circuit_id'].replace('rodriquez', 'rodriguez')
mdf4.loc[((mdf4['season'] == 2011) | (mdf4['season'] == 2013)) & (mdf5['circuit_id'] == 'hockenheimring'), 'circuit_id'] = 'nurburgring'
mdf4.loc[((mdf4['season'] == 2016) & (mdf4['circuit_id'] == 'valencia')), 'circuit_id'] = 'baku'
mdf5 = pd.merge(mdf4, circuits, on='circuit_id', how='left')

mdf5['driver_home_race'] = mdf5.apply(lambda row: 1 if row['driver_country_of_origin'] == row['circuit_country'] else 0, axis=1)
mdf5['constructor_home_race'] = mdf5.apply(lambda row: 1 if row['constructor_country_of_origin'] == row['circuit_country'] else 0, axis=1)

weather.loc[:, 'round'] = weather.groupby('season').cumcount() + 1
weather = weather[['season', 'circuit_id', 'round', 'weather_category_Clear', 'weather_category_Cloudy','weather_category_Rainy','weather_category_Warm', 'weather_category_Windy' ] + [col for col in weather2.columns if col not in ['season', 'circuit_id', 'round']]]
weather.loc[(weather['season'] == 2011) & (weather['round'] == 10), 'circuit_id'] = 'nurburgring'
weather.loc[(weather['season'] == 2013) & (weather['round'] == 9), 'circuit_id'] = 'nurburgring'


mdf6 = pd.merge(mdf5, weather, on=['season','circuit_id', 'round'], how='left')

mdf6.drop('time', axis = 1, inplace = True)

def fill_grids(group):
    grid_counter = 1
    for i in group.index:
        if pd.isna(group.loc[i, 'grid']):
            group.loc[i, 'grid'] = group.loc[i-1, 'grid'] + 1 if i-1 in group.index else grid_counter
        else:
            grid_counter = group.loc[i, 'grid'] + 1
    return group

mdf6 = mdf6.groupby(['season', 'round', 'circuit_id']).apply(fill_grids)

mdf6['grid'] = mdf6['grid'].astype(int)

salaries = salaries.append({'season': 2014, 'driver_id': 'hamilton', 'salary': 26000000}, ignore_index=True)
df1copy = mdf6.copy()
df1copy.columns
df1copy.drop(['circuit_id', 'constructor_id',  'Laps', 'Points',
       'Rank', 'round', 'Points Before GP', 'Finished', 'grid',
       'constructor_country_of_origin', 'updated_wins', 'updated_poles',
       'year_of_birth', 'height (cm)', 'weight (kg)',
       'driver_country_of_origin', 'updated_driver_wins',
       'updated_driver_poles', 'Engine Manufacturer', 'Engine', 'Displacement',
       'Cylinders', 'Valves', 'driver_age', 'circuit_country', 'configuration',
       'length (km)', 'number_of_corners', 'drs_zones', 'driver_home_race',
       'constructor_home_race', 'weather_category_Clear',
       'weather_category_Cloudy', 'weather_category_Rainy',
       'weather_category_Warm', 'weather_category_Windy'], axis = 1, inplace = True)
df1copy.drop_duplicates(inplace = True)
df2copy = salaries.copy()
df2copy.drop('salary', axis = 1, inplace = True)

merged_df = df1copy.merge(df2copy, on=['season', 'driver_id'], how='outer', indicator=True)

differences = merged_df.loc[merged_df['_merge'].isin(['left_only', 'right_only'])]

salaries = salaries.drop(salaries[(salaries['season'] == 2010) & (salaries['driver_id'] == 'raikkonen')].index)
salaries['driver_id'] = salaries['driver_id'].replace('Rossi', 'rossi')
differences = differences.drop(differences[(differences['season'] == 2010)&(differences['driver_id'] == 'raikkonen')].index)
differences = differences.drop(differences[(differences['season'] == 2015)&(differences['driver_id'] == 'Rossi')].index)
differences['salary'] = 'None'
differences.drop('_merge', axis = 1, inplace = True)
differences.reset_index(drop = True, inplace = True)
df3 = pd.concat([salaries, differences], ignore_index = True)
df3.sort_values(['season', 'driver_id'], inplace = True)

mdf7 = mdf6.merge(df3, on =['season', 'driver_id'], how = 'left')

mdf7['salary'] = mdf7['salary'].replace('None', np.nan)

mdf7 = mdf7.dropna(subset=['salary'])

mdf7.reset_index(drop=True, inplace=True)
mdf7 = mdf7.drop_duplicates()


mdf7.to_csv('mdf7.csv', index = False)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  mdf6 = mdf6.groupby(['season', 'round', 'circuit_id']).apply(fill_grids)
  salaries = salaries.append({'season': 2014, 'driver_id': 'hamilton', 'salary': 26000000}, ignore_index=True)


In [315]:
mdf7

Unnamed: 0,season,circuit_id,constructor_id,driver_id,Laps,Points,Rank,round,Points Before GP,Finished,grid,constructor_country_of_origin,updated_wins,updated_poles,year_of_birth,height (cm),weight (kg),driver_country_of_origin,updated_driver_wins,updated_driver_poles,Engine Manufacturer,Engine,Displacement,Cylinders,Valves,driver_age,circuit_country,configuration,length (km),number_of_corners,drs_zones,driver_home_race,constructor_home_race,weather_category_Clear,weather_category_Cloudy,weather_category_Rainy,weather_category_Warm,weather_category_Windy,salary
0,2010,bahrain,ferrari,alonso,49,25.0,1,1,0.0,1,3,Italy,211,203,1981,171,68,Spain,22,18,Ferrari,Ferrari Tipo 056,2398,8,32,29,Bahrain,Race,5.412,15,3,0,0,1,0,0,0,0,40000000.0
1,2010,bahrain,ferrari,massa,49,18.0,2,1,0.0,1,2,Italy,211,203,1981,166,59,Brazil,11,15,Ferrari,Ferrari Tipo 056,2398,8,32,29,Bahrain,Race,5.412,15,3,0,0,1,0,0,0,0,18620000.0
2,2010,bahrain,mclaren,hamilton,49,15.0,3,1,0.0,1,4,United Kingdom,164,145,1985,175,66,United Kingdom,11,17,Mercedes,Mercedes FO 108X,2398,8,32,25,Bahrain,Race,5.412,15,3,0,0,1,0,0,0,0,21280000.0
3,2010,bahrain,red_bull,vettel,49,12.0,4,1,0.0,1,1,Austria,6,6,1987,176,58,Germany,5,6,Renault,Renault RS27-2010,2400,8,32,23,Bahrain,Race,5.412,15,3,0,0,1,0,0,0,0,4700000.0
4,2010,bahrain,mercedes,rosberg,49,10.0,5,1,0.0,1,5,Germany,9,8,1985,178,71,Germany,0,0,Mercedes,Mercedes FO 108X,2398,8,32,25,Bahrain,Race,5.412,15,3,0,0,1,0,0,0,0,10640000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5444,2022,yas_marina,haas,mick_schumacher,57,0.0,16,22,12.0,1,12,USA,0,0,1999,170,60,Germany,0,0,Ferrari,Ferrari Tipo 066/7,1600,6,24,23,UAE,Race,5.554,16,2,0,0,1,0,0,0,0,1000000.0
5445,2022,yas_marina,haas,kevin_magnussen,57,0.0,17,22,21.0,1,16,USA,0,0,1992,174,68,Denmark,0,0,Ferrari,Ferrari Tipo 066/7,1600,6,24,30,UAE,Race,5.554,16,2,0,0,1,0,0,0,0,6000000.0
5446,2022,yas_marina,mercedes,hamilton,55,0.0,18,22,233.0,0,5,Germany,125,137,1985,175,66,United Kingdom,103,103,Mercedes,Mercedes M13 E Performance,1600,6,24,37,UAE,Race,5.554,16,2,0,0,1,0,0,0,0,40000000.0
5447,2022,yas_marina,williams,latifi,55,0.0,19,22,2.0,0,20,United Kingdom,114,128,1995,185,74,Canada,0,0,Mercedes,Mercedes M13 E Performance,1600,6,24,27,UAE,Race,5.554,16,2,0,0,1,0,0,0,0,1000000.0


In [323]:
mdf7.to_csv('mdf7.csv', index = False)