In [1]:
import pandas as pd
import ydata_profiling

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
results = pd.read_csv(r"data/results.csv")
races = pd.read_csv(r"data/races.csv")
drivers = pd.read_csv(r"data/drivers.csv")
driver_standings = pd.read_csv(r"data/driver_standings.csv")
constructors = pd.read_csv(r"data/constructors.csv")
constructor_standings = pd.read_csv(r"data/constructor_standings.csv")
constructor_results = pd.read_csv(r"data/constructor_results.csv")
pit_stops = pd.read_csv(r"data/pit_stops.csv")
status = pd.read_csv(r"data/status.csv")
seasons = pd.read_csv(r"data/seasons.csv")

circuits = pd.read_csv(r"data/circuits.csv")
lap_times = pd.read_csv(r"data/lap_times.csv")
qualifying = pd.read_csv(r"data/qualifying.csv")
sprint_results = pd.read_csv(r"data/sprint_results.csv")

In [4]:
print('\nDrivers:')
print((drivers == "\\N").sum())
print('\nRaces:')
print((races == "\\N").sum())
print('\nResults:')
print((results == "\\N").sum())
print('\nConstructor_results:')
print((constructor_results == "\\N").sum())
print('\nQualifying:')
print((qualifying == "\\N").sum())
print('\nSprint_results:')
print((sprint_results == "\\N").sum())

# W pozostałych zbiorach nie ma pustych wartości:
# print((circuits == "\\N").sum())
# print((constructor_standings == "\\N").sum())
# print((constructors == "\\N").sum())
# print((driver_standings == "\\N").sum())
# print((lap_times == "\\N").sum())
# print((pit_stops == "\\N").sum())
# print((seasons == "\\N").sum())
# print((status == "\\N").sum())


Drivers:
driverId         0
driverRef        0
number         802
code           757
forename         0
surname          0
dob              0
nationality      0
url              0
dtype: int64

Races:
raceId            0
year              0
round             0
circuitId         0
name              0
date              0
time            731
url               0
fp1_date       1035
fp1_time       1057
fp2_date       1035
fp2_time       1057
fp3_date       1053
fp3_time       1072
quali_date     1035
quali_time     1057
sprint_date    1107
sprint_time    1110
dtype: int64

Results:
resultId               0
raceId                 0
driverId               0
constructorId          0
number                 6
grid                   0
position           10916
positionText           0
positionOrder          0
points                 0
laps                   0
time               18931
milliseconds       18931
fastestLap         18478
rank               18249
fastestLapTime     18478
fastestLapSpeed

In [5]:
# Stworzenie zmiennej z punktami/rankingiem teamu przed danym wyścigiem

constructor_standings = constructor_standings.sort_values(by='raceId')
constructor_standings['points_before'] = constructor_standings.groupby('constructorId')['points'].shift(1)
constructor_standings['ranking_before'] = constructor_standings.groupby('constructorId')['position'].shift(1)
constructor_standings.dropna(inplace=True)
constructor_standings['points_before']=constructor_standings['points_before'].astype(int)
constructor_standings['ranking_before']=constructor_standings['ranking_before'].astype(int)
constructor_standings = constructor_standings[['raceId','constructorId','points_before','ranking_before']]

In [6]:
constructor_standings

Unnamed: 0,raceId,constructorId,points_before,ranking_before
3676,2,9,0,8
3684,2,23,18,1
3682,2,7,11,2
3675,2,6,0,9
3680,2,3,3,4
...,...,...,...,...
13205,1126,117,40,5
13206,1126,15,0,10
13207,1126,210,5,7
13208,1126,215,7,6


In [7]:
# Stworzenie zmiennej z punktami/rankingiem kierowcy przed danym wyścigiem

driver_standings = driver_standings.sort_values(by='raceId')
driver_standings['points_before'] = driver_standings.groupby('driverId')['points'].shift(1)
driver_standings['ranking_before'] = driver_standings.groupby('driverId')['position'].shift(1)
driver_standings.dropna(inplace=True)
driver_standings['points_before']=driver_standings['points_before'].astype(int)
driver_standings['ranking_before']=driver_standings['ranking_before'].astype(int)

In [8]:
# Stworzenie zmiennej ze średnim czasem trwania pit-stopu w sezonie dla poszczególnych teamów

pit_stops_merged = pd.merge(pit_stops, results, on = ['driverId','raceId'], how = 'left')
pit_stops_merged = pd.merge(pit_stops_merged, races, on = ['raceId'], how = 'left')
pit_stops_merged = pit_stops_merged[['raceId','constructorId','duration','year']]
pit_stops_merged['duration'] = pd.to_numeric(pit_stops_merged['duration'], errors='coerce')
pit_stops_merged['avg_season_duration'] = pit_stops_merged.groupby(['constructorId', 'year'])['duration'].transform('mean')
pit_stops_merged = pit_stops_merged[['year','constructorId','avg_season_duration']]
pit_stops_merged = pit_stops_merged.drop_duplicates()

In [9]:
pit_stops_merged.loc[pit_stops_merged['year']==2024].sort_values(by='constructorId')

Unnamed: 0,year,constructorId,avg_season_duration
10578,2024,1,22.4348
10550,2024,3,23.691211
10552,2024,6,21.888706
10554,2024,9,22.328842
10547,2024,15,26.523905
10548,2024,117,22.919609
10551,2024,131,22.246316
10546,2024,210,23.899478
10549,2024,214,24.166955
10560,2024,215,22.021375


In [10]:
# Łączenie zbiorów i usuwanie niepotrzebnych/zduplikowanych kolumn

merged_df = pd.merge(results, status, on = 'statusId', how = 'left')
merged_df = pd.merge(merged_df, drivers, on = 'driverId', how = 'left')
merged_df = pd.merge(merged_df, races, on = 'raceId', how = 'left')
merged_df = pd.merge(merged_df, constructors, on = 'constructorId', how = 'left')
merged_df = pd.merge(merged_df, driver_standings, on = ['driverId','raceId'], how = 'left')
merged_df = pd.merge(merged_df, pit_stops_merged, on = ['year','constructorId'], how = 'left')
merged_df = pd.merge(merged_df, constructor_standings, on = ['raceId','constructorId'], how = 'left')
merged_df = merged_df.drop(['driverRef','number_y','number_x','code','dob','nationality_x','url_x','round'
                            ,'time_y','url_y','constructorRef','nationality_y','url','fp1_date'
                           ,'fp1_time','fp2_date','fp2_time','fp3_date','fp3_time','quali_date'
                           ,'quali_time','sprint_date','sprint_time','position_x','fastestLap'
                           ,'fastestLapTime','rank','fastestLapSpeed','driverStandingsId'
                             ,'points_y','position_y','positionText_y','wins'], axis = 1)

In [11]:
# Uporządkowanie kolumn i zmiana nazw
order = ['resultId','raceId','name_x','year','date','driverId','forename','surname','constructorId','name_y','avg_season_duration','grid'
        ,'positionText_x','positionOrder','points_x','points_before_x','ranking_before_x','points_before_y','ranking_before_y','laps','time_x','milliseconds'
        ,'statusId','status','circuitId']
merged_df = merged_df[order]
merged_df = merged_df.rename(columns={'name_x':'race_name','date':'race_date','name_y':'constructor_name','grid':'start_position'
                                     ,'time_x':'time','rank':'fastest_lap_rank','positionOrder':'end_position','points_before_x':'driver_ranking_points_before_race'
                                     ,'ranking_before_x':'driver_ranking_before_race','positionText_x':'end_positionText','points_x':'points','avg_season_duration':'avg_season_pit_stop_duration'
                                     ,'ranking_before_y':'constructor_ranking_before_race','points_before_y':'constructor_ranking_points_before_race'})

In [12]:
(merged_df == "\\N").sum()

resultId                                      0
raceId                                        0
race_name                                     0
year                                          0
race_date                                     0
driverId                                      0
forename                                      0
surname                                       0
constructorId                                 0
constructor_name                              0
avg_season_pit_stop_duration                  0
start_position                                0
end_positionText                              0
end_position                                  0
points                                        0
driver_ranking_points_before_race             0
driver_ranking_before_race                    0
constructor_ranking_points_before_race        0
constructor_ranking_before_race               0
laps                                          0
time                                    

In [13]:
merged_df['status'].value_counts()
# Można pomyśleć czy bierzemy tylko Finished

status
Finished          7463
+1 Lap            3944
Engine            2022
+2 Laps           1600
Accident          1057
                  ... 
Launch control       1
Tyre puncture        1
+26 Laps             1
CV joint             1
Crankshaft           1
Name: count, Length: 137, dtype: int64

In [14]:
merged_df = merged_df.loc[merged_df['status']=='Finished']
merged_df = merged_df.loc[merged_df['year']>=2014]
merged_df = merged_df.loc[merged_df['start_position']!=0] #
merged_df.dropna(inplace=True)
merged_df['driver_ranking_points_before_race'] = merged_df['driver_ranking_points_before_race'].astype(int)
merged_df['driver_ranking_before_race'] = merged_df['driver_ranking_before_race'].astype(int)
merged_df['constructor_ranking_points_before_race'] = merged_df['constructor_ranking_points_before_race'].astype(int)
merged_df['constructor_ranking_before_race'] = merged_df['constructor_ranking_before_race'].astype(int)
(merged_df == "\\N").sum()

resultId                                  0
raceId                                    0
race_name                                 0
year                                      0
race_date                                 0
driverId                                  0
forename                                  0
surname                                   0
constructorId                             0
constructor_name                          0
avg_season_pit_stop_duration              0
start_position                            0
end_positionText                          0
end_position                              0
points                                    0
driver_ranking_points_before_race         0
driver_ranking_before_race                0
constructor_ranking_points_before_race    0
constructor_ranking_before_race           0
laps                                      0
time                                      0
milliseconds                              0
statusId                        

In [15]:
# for col in merged_df.columns:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2157 entries, 22127 to 26397
Data columns (total 25 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   resultId                                2157 non-null   int64  
 1   raceId                                  2157 non-null   int64  
 2   race_name                               2157 non-null   object 
 3   year                                    2157 non-null   int64  
 4   race_date                               2157 non-null   object 
 5   driverId                                2157 non-null   int64  
 6   forename                                2157 non-null   object 
 7   surname                                 2157 non-null   object 
 8   constructorId                           2157 non-null   int64  
 9   constructor_name                        2157 non-null   object 
 10  avg_season_pit_stop_duration            2157 non-null   floa

In [16]:
merged_df

Unnamed: 0,resultId,raceId,race_name,year,race_date,driverId,forename,surname,constructorId,constructor_name,avg_season_pit_stop_duration,start_position,end_positionText,end_position,points,driver_ranking_points_before_race,driver_ranking_before_race,constructor_ranking_points_before_race,constructor_ranking_before_race,laps,time,milliseconds,statusId,status,circuitId
22127,22130,900,Australian Grand Prix,2014,2014-03-16,3,Nico,Rosberg,131,Mercedes,24.296973,3,1,1,25.0,171,6,360,2,57,1:32:58.710,5578710,1,Finished,1
22129,22132,900,Australian Grand Prix,2014,2014-03-16,18,Jenson,Button,1,McLaren,24.454975,10,3,3,15.0,73,9,122,5,57,+30.027,5608737,1,Finished,1
22130,22133,900,Australian Grand Prix,2014,2014-03-16,4,Fernando,Alonso,6,Ferrari,24.355450,5,4,4,12.0,242,2,354,3,57,+35.284,5613994,1,Finished,1
22131,22134,900,Australian Grand Prix,2014,2014-03-16,822,Valtteri,Bottas,3,Williams,24.824068,15,5,5,10.0,4,17,5,9,57,+47.639,5626349,1,Finished,1
22132,22135,900,Australian Grand Prix,2014,2014-03-16,807,Nico,Hülkenberg,10,Force India,24.795582,7,6,6,8.0,51,10,77,6,57,+50.718,5629428,1,Finished,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26393,26399,1126,Miami Grand Prix,2024,2024-05-05,817,Daniel,Ricciardo,215,RB F1 Team,22.021375,20,15,15,0.0,0,18,7,6,57,+50.956,5500832,1,Finished,79
26394,26400,1126,Miami Grand Prix,2024,2024-05-05,822,Valtteri,Bottas,15,Sauber,26.523905,16,16,16,0.0,0,20,0,10,57,+52.356,5502232,1,Finished,79
26395,26401,1126,Miami Grand Prix,2024,2024-05-05,840,Lance,Stroll,117,Aston Martin,22.919609,11,17,17,0.0,9,10,40,5,57,+55.173,5505049,1,Finished,79
26396,26402,1126,Miami Grand Prix,2024,2024-05-05,848,Alexander,Albon,3,Williams,23.691211,14,18,18,0.0,0,15,0,8,57,+1:16.091,5525967,1,Finished,79


In [17]:
# merged_df.isna().sum()
# (merged_df == "\\N").sum()

In [18]:
# profile = ydata_profiling.ProfileReport(merged_df, title="Profiling Report")
# profile.to_file("output_new.html")

In [19]:
columns_of_interest = ['avg_season_pit_stop_duration', 'start_position', 
                       'driver_ranking_before_race', 'constructor_ranking_before_race', 'end_position']
selected_df = merged_df[columns_of_interest]

# Konwersja wybranych kolumn na one-hot encoding
start_position_dummies = pd.get_dummies(selected_df['start_position'], prefix='start_position', dtype=int)
driver_ranking_dummies = pd.get_dummies(selected_df['driver_ranking_before_race'], prefix='driver_ranking', dtype=int)
constructor_ranking_dummies = pd.get_dummies(selected_df['constructor_ranking_before_race'], prefix='constructor_ranking', dtype=int)

# Scalanie wszystkiego w nowym DataFrame
new_df = pd.concat([selected_df['avg_season_pit_stop_duration'], 
                    start_position_dummies, 
                    driver_ranking_dummies, 
                    constructor_ranking_dummies, 
                    selected_df['end_position']], axis=1)

In [20]:
new_df['in_top_5'] = (new_df['end_position'] <= 5).astype(int)
new_df.drop('end_position',axis=1, inplace=True)

In [21]:
new_df

Unnamed: 0,avg_season_pit_stop_duration,start_position_1,start_position_2,start_position_3,start_position_4,start_position_5,start_position_6,start_position_7,start_position_8,start_position_9,start_position_10,start_position_11,start_position_12,start_position_13,start_position_14,start_position_15,start_position_16,start_position_17,start_position_18,start_position_19,start_position_20,start_position_21,start_position_22,driver_ranking_1,driver_ranking_2,driver_ranking_3,driver_ranking_4,driver_ranking_5,driver_ranking_6,driver_ranking_7,driver_ranking_8,driver_ranking_9,driver_ranking_10,driver_ranking_11,driver_ranking_12,driver_ranking_13,driver_ranking_14,driver_ranking_15,driver_ranking_16,driver_ranking_17,driver_ranking_18,driver_ranking_19,driver_ranking_20,driver_ranking_21,driver_ranking_22,driver_ranking_23,driver_ranking_24,constructor_ranking_1,constructor_ranking_2,constructor_ranking_3,constructor_ranking_4,constructor_ranking_5,constructor_ranking_6,constructor_ranking_7,constructor_ranking_8,constructor_ranking_9,constructor_ranking_10,constructor_ranking_11,in_top_5
22127,24.296973,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
22129,24.454975,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1
22130,24.355450,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1
22131,24.824068,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1
22132,24.795582,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26393,22.021375,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
26394,26.523905,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
26395,22.919609,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
26396,23.691211,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0


In [22]:
from sklearn.model_selection import train_test_split

X = new_df.drop(columns=['in_top_5'])
y = new_df['in_top_5']

# Podział danych na zbiór treningowy i testowy
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [23]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

model = LogisticRegression()

# Trenowanie modelu
model.fit(X_train, y_train)

# Przewidywanie klas na zbiorze testowym
y_pred = model.predict(X_test)

# Obliczenie dokładności modelu
accuracy = accuracy_score(y_test, y_pred)
print(f'Dokładność modelu: {accuracy:.2f}')

Dokładność modelu: 0.81


In [24]:
merged_df

Unnamed: 0,resultId,raceId,race_name,year,race_date,driverId,forename,surname,constructorId,constructor_name,avg_season_pit_stop_duration,start_position,end_positionText,end_position,points,driver_ranking_points_before_race,driver_ranking_before_race,constructor_ranking_points_before_race,constructor_ranking_before_race,laps,time,milliseconds,statusId,status,circuitId
22127,22130,900,Australian Grand Prix,2014,2014-03-16,3,Nico,Rosberg,131,Mercedes,24.296973,3,1,1,25.0,171,6,360,2,57,1:32:58.710,5578710,1,Finished,1
22129,22132,900,Australian Grand Prix,2014,2014-03-16,18,Jenson,Button,1,McLaren,24.454975,10,3,3,15.0,73,9,122,5,57,+30.027,5608737,1,Finished,1
22130,22133,900,Australian Grand Prix,2014,2014-03-16,4,Fernando,Alonso,6,Ferrari,24.355450,5,4,4,12.0,242,2,354,3,57,+35.284,5613994,1,Finished,1
22131,22134,900,Australian Grand Prix,2014,2014-03-16,822,Valtteri,Bottas,3,Williams,24.824068,15,5,5,10.0,4,17,5,9,57,+47.639,5626349,1,Finished,1
22132,22135,900,Australian Grand Prix,2014,2014-03-16,807,Nico,Hülkenberg,10,Force India,24.795582,7,6,6,8.0,51,10,77,6,57,+50.718,5629428,1,Finished,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26393,26399,1126,Miami Grand Prix,2024,2024-05-05,817,Daniel,Ricciardo,215,RB F1 Team,22.021375,20,15,15,0.0,0,18,7,6,57,+50.956,5500832,1,Finished,79
26394,26400,1126,Miami Grand Prix,2024,2024-05-05,822,Valtteri,Bottas,15,Sauber,26.523905,16,16,16,0.0,0,20,0,10,57,+52.356,5502232,1,Finished,79
26395,26401,1126,Miami Grand Prix,2024,2024-05-05,840,Lance,Stroll,117,Aston Martin,22.919609,11,17,17,0.0,9,10,40,5,57,+55.173,5505049,1,Finished,79
26396,26402,1126,Miami Grand Prix,2024,2024-05-05,848,Alexander,Albon,3,Williams,23.691211,14,18,18,0.0,0,15,0,8,57,+1:16.091,5525967,1,Finished,79


In [25]:
merged_df_2024 = merged_df[merged_df['year'] == 2024]

# Wyliczenie pogrupowanych danych - avg pit stop
grouped_data = merged_df_2024.groupby(['constructor_name', 'driverId', 'surname'])['avg_season_pit_stop_duration'].mean().reset_index()
grouped_data

Unnamed: 0,constructor_name,driverId,surname,avg_season_pit_stop_duration
0,Alpine F1 Team,839,Ocon,24.166955
1,Alpine F1 Team,842,Gasly,24.166955
2,Aston Martin,4,Alonso,22.919609
3,Aston Martin,840,Stroll,22.919609
4,Ferrari,832,Sainz,21.888706
5,Ferrari,844,Leclerc,21.888706
6,Haas F1 Team,807,Hülkenberg,23.899478
7,Haas F1 Team,825,Magnussen,23.899478
8,McLaren,846,Norris,22.4348
9,McLaren,857,Piastri,22.4348


In [26]:
new_df.head()

Unnamed: 0,avg_season_pit_stop_duration,start_position_1,start_position_2,start_position_3,start_position_4,start_position_5,start_position_6,start_position_7,start_position_8,start_position_9,start_position_10,start_position_11,start_position_12,start_position_13,start_position_14,start_position_15,start_position_16,start_position_17,start_position_18,start_position_19,start_position_20,start_position_21,start_position_22,driver_ranking_1,driver_ranking_2,driver_ranking_3,driver_ranking_4,driver_ranking_5,driver_ranking_6,driver_ranking_7,driver_ranking_8,driver_ranking_9,driver_ranking_10,driver_ranking_11,driver_ranking_12,driver_ranking_13,driver_ranking_14,driver_ranking_15,driver_ranking_16,driver_ranking_17,driver_ranking_18,driver_ranking_19,driver_ranking_20,driver_ranking_21,driver_ranking_22,driver_ranking_23,driver_ranking_24,constructor_ranking_1,constructor_ranking_2,constructor_ranking_3,constructor_ranking_4,constructor_ranking_5,constructor_ranking_6,constructor_ranking_7,constructor_ranking_8,constructor_ranking_9,constructor_ranking_10,constructor_ranking_11,in_top_5
22127,24.296973,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
22129,24.454975,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1
22130,24.35545,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1
22131,24.824068,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1
22132,24.795582,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0


In [104]:
# Stworzenie zbioru danych dla każdego kierowcy ze zmiennymi avg_season_pit_stop_duration, start_position, driver_ranking, constructor_ranking

drivers_data = {
    'Verstappen': [22.328842, #avg_season_pit_stop_duration
                   1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, #start_position
                   1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, #driver_ranking
                   1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], #constructor_ranking
    'Perez': [22.328842,
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'Leclerc': [21.888706,
                 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'Norris': [22.434800,
                 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
    'Sainz': [21.888706,
                 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'Piastri': [22.434800,
                 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
    'Russell': [22.246316,
                 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
    'Alonso': [22.919609,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
    'Hamilton': [22.246316,
                 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
    'Tsunoda': [22.021375,
                 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
    'Stroll': [22.919609,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
    'Magnussen': [23.899478,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
#     'Bearman': [21.888706,
#                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#                  0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'Hulkenberg': [23.899478,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
    'Ricciardo': [22.021375,
                 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
    'Ocon': [24.166955,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
    'Albon': [23.691211,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
    'Zhou': [26.523905,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
    'Gasly': [24.166955,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
    'Bottas': [26.523905,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
    'Sargeant': [23.691211,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
    
}
drivers_df = pd.DataFrame.from_dict(drivers_data, orient='index')

# Obliczenie prawdopodobieństwa znalezienia się kierowcy w pierwszej 5
probabilities = model.predict_proba(drivers_df)[:, 1]

# Dodanie kolumny z nazwiskami kierowców
drivers_df['probability_in_top_5'] = probabilities
drivers_df['driver'] = drivers_df.index

sorted_drivers_df = drivers_df.sort_values(by='probability_in_top_5', ascending=False).reset_index(drop=True)

# Przypisywanie pozycji końcowej na podstawie posortowanego prawdopodobieństwa
sorted_drivers_df['predicted_end_position'] = sorted_drivers_df.index + 1
print(sorted_drivers_df[['driver', 'probability_in_top_5', 'predicted_end_position']])

        driver  probability_in_top_5  predicted_end_position
0   Verstappen              0.970447                       1
1      Leclerc              0.933965                       2
2        Sainz              0.893351                       3
3       Norris              0.879511                       4
4      Piastri              0.853623                       5
5        Perez              0.734418                       6
6      Russell              0.446432                       7
7    Ricciardo              0.250661                       8
8      Tsunoda              0.202931                       9
9     Hamilton              0.197414                      10
10      Alonso              0.184511                      11
11    Sargeant              0.106966                      12
12       Gasly              0.090615                      13
13       Albon              0.046042                      14
14      Stroll              0.044119                      15
15        Zhou          

