## Data import and Datasets Creations

In [1]:
import kagglehub
import warnings
warnings.filterwarnings('ignore')
# Download latest version
path = kagglehub.dataset_download("rohanrao/formula-1-world-championship-1950-2020")
print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/formula-1-world-championship-1950-2020


In [2]:
import os
import pandas as pd

pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
# Path to the dataset folder
data_path = "/kaggle/input/formula-1-world-championship-1950-2020"

# Automatically list all CSV files
csv_files = [file for file in os.listdir(data_path) if file.endswith('.csv')]

dataframes = {file.replace('.csv', ''): pd.read_csv(os.path.join(data_path, file)) for file in csv_files}
# print(dataframes)


In [3]:
df1 = dataframes['results'] 
df2 = dataframes['races']   
df3 = dataframes['drivers'] # Only PII Data
df4 = dataframes['circuits']
df5 = dataframes['status']
df6 = dataframes['constructor_standings']
df7 = dataframes['pit_stops'] # v2
df8 = dataframes['lap_times'] # v2
df9 =dataframes['driver_standings'] # redundant data

## Base Dataset Join

In [4]:
df12 = pd.merge(df1, df2, on='raceId', how='left')
df = pd.DataFrame()

print(df12.shape)
print(df12[df12['driverId']==832]['year'].value_counts().sort_index()) #Carlos Sainz
print(df12[df12['driverId']==848]['year'].value_counts().sort_index()) #Alex Albon
# df12.head()

(26759, 35)
year
2015    19
2016    21
2017    20
2018    21
2019    21
2020    17
2021    22
2022    22
2023    22
2024    23
Name: count, dtype: int64
year
2019    21
2020    17
2022    21
2023    22
2024    24
Name: count, dtype: int64


## Filtering for 2015 and later Races

In [5]:
df12_real = df12[df12['year']>=2015]
print(df12_real.shape)
df12_real.head()

(4219, 35)


Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time_x,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,year,round,circuitId,name,date,time_y,url,fp1_date,fp1_time,fp2_date,fp2_time,fp3_date,fp3_time,quali_date,quali_time,sprint_date,sprint_time
22534,22538,926,1,131,44,1,1,1,1,25.0,58,1:31:54.067,5514067,50,1,1:30.945,209.915,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
22535,22539,926,3,131,6,2,2,2,2,18.0,58,+1.360,5515427,47,2,1:31.092,209.577,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
22536,22540,926,20,6,5,4,3,3,3,15.0,58,+34.523,5548590,52,4,1:31.457,208.74,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
22537,22541,926,13,3,19,3,4,4,4,12.0,58,+38.196,5552263,50,6,1:31.719,208.144,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
22538,22542,926,831,15,12,10,5,5,5,10.0,58,+1:35.149,5609216,46,9,1:32.612,206.137,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N


## Merging Other Datasets

In [6]:
df124 = pd.merge(df12_real, df4, on='circuitId', how='left')
print(df124.shape)
df124.head()

(4219, 43)


Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time_x,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,year,round,circuitId,name_x,date,time_y,url_x,fp1_date,fp1_time,fp2_date,fp2_time,fp3_date,fp3_time,quali_date,quali_time,sprint_date,sprint_time,circuitRef,name_y,location,country,lat,lng,alt,url_y
0,22538,926,1,131,44,1,1,1,1,25.0,58,1:31:54.067,5514067,50,1,1:30.945,209.915,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...
1,22539,926,3,131,6,2,2,2,2,18.0,58,+1.360,5515427,47,2,1:31.092,209.577,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...
2,22540,926,20,6,5,4,3,3,3,15.0,58,+34.523,5548590,52,4,1:31.457,208.74,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...
3,22541,926,13,3,19,3,4,4,4,12.0,58,+38.196,5552263,50,6,1:31.719,208.144,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...
4,22542,926,831,15,12,10,5,5,5,10.0,58,+1:35.149,5609216,46,9,1:32.612,206.137,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...


In [7]:
df124.columns

Index(['resultId', 'raceId', 'driverId', 'constructorId', 'number', 'grid',
       'position', 'positionText', 'positionOrder', 'points', 'laps', 'time_x',
       'milliseconds', 'fastestLap', 'rank', 'fastestLapTime',
       'fastestLapSpeed', 'statusId', 'year', 'round', 'circuitId', 'name_x',
       'date', 'time_y', 'url_x', 'fp1_date', 'fp1_time', 'fp2_date',
       'fp2_time', 'fp3_date', 'fp3_time', 'quali_date', 'quali_time',
       'sprint_date', 'sprint_time', 'circuitRef', 'name_y', 'location',
       'country', 'lat', 'lng', 'alt', 'url_y'],
      dtype='object')

In [8]:
df1245 = pd.merge(df124, df5, on='statusId', how='left')
print(df1245.shape)
df1245.head()

(4219, 44)


Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time_x,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,year,round,circuitId,name_x,date,time_y,url_x,fp1_date,fp1_time,fp2_date,fp2_time,fp3_date,fp3_time,quali_date,quali_time,sprint_date,sprint_time,circuitRef,name_y,location,country,lat,lng,alt,url_y,status
0,22538,926,1,131,44,1,1,1,1,25.0,58,1:31:54.067,5514067,50,1,1:30.945,209.915,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Finished
1,22539,926,3,131,6,2,2,2,2,18.0,58,+1.360,5515427,47,2,1:31.092,209.577,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Finished
2,22540,926,20,6,5,4,3,3,3,15.0,58,+34.523,5548590,52,4,1:31.457,208.74,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Finished
3,22541,926,13,3,19,3,4,4,4,12.0,58,+38.196,5552263,50,6,1:31.719,208.144,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Finished
4,22542,926,831,15,12,10,5,5,5,10.0,58,+1:35.149,5609216,46,9,1:32.612,206.137,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Finished


In [9]:
df12456 = pd.merge(df1245, df6, on=['raceId', 'constructorId'], how='left')
print(df12456.shape)
df12456.head()

(4219, 49)


Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position_x,positionText_x,positionOrder,points_x,laps,time_x,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,year,round,circuitId,name_x,date,time_y,url_x,fp1_date,fp1_time,fp2_date,fp2_time,fp3_date,fp3_time,quali_date,quali_time,sprint_date,sprint_time,circuitRef,name_y,location,country,lat,lng,alt,url_y,status,constructorStandingsId,points_y,position_y,positionText_y,wins
0,22538,926,1,131,44,1,1,1,1,25.0,58,1:31:54.067,5514067,50,1,1:30.945,209.915,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Finished,26140,43.0,1,1,1
1,22539,926,3,131,6,2,2,2,2,18.0,58,+1.360,5515427,47,2,1:31.092,209.577,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Finished,26140,43.0,1,1,1
2,22540,926,20,6,5,4,3,3,3,15.0,58,+34.523,5548590,52,4,1:31.457,208.74,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Finished,26141,15.0,2,2,0
3,22541,926,13,3,19,3,4,4,4,12.0,58,+38.196,5552263,50,6,1:31.719,208.144,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Finished,26142,12.0,4,4,0
4,22542,926,831,15,12,10,5,5,5,10.0,58,+1:35.149,5609216,46,9,1:32.612,206.137,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Finished,26143,14.0,3,3,0


## Verifing Carlos Sainz Race ID = 926 in Driving Standing Dataset for reducing redundant Data

In [10]:
df_inter = df9[df9['driverId']==832]
df_inter.head()
# print(df9['driverId']==848)

Unnamed: 0,driverStandingsId,raceId,driverId,points,position,positionText,wins
30395,67192,926,832,2.0,9,9,0
30412,67066,927,832,6.0,11,11,0
30433,67087,928,832,6.0,12,12,0
30454,67108,929,832,6.0,12,12,0
30475,67129,930,832,8.0,10,10,0


In [11]:
df12456[df12456['raceId']==926].head(10)

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position_x,positionText_x,positionOrder,points_x,laps,time_x,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,year,round,circuitId,name_x,date,time_y,url_x,fp1_date,fp1_time,fp2_date,fp2_time,fp3_date,fp3_time,quali_date,quali_time,sprint_date,sprint_time,circuitRef,name_y,location,country,lat,lng,alt,url_y,status,constructorStandingsId,points_y,position_y,positionText_y,wins
0,22538,926,1,131,44,1,1,1,1,25.0,58,1:31:54.067,5514067,50,1,1:30.945,209.915,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Finished,26140,43.0,1,1,1
1,22539,926,3,131,6,2,2,2,2,18.0,58,+1.360,5515427,47,2,1:31.092,209.577,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Finished,26140,43.0,1,1,1
2,22540,926,20,6,5,4,3,3,3,15.0,58,+34.523,5548590,52,4,1:31.457,208.74,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Finished,26141,15.0,2,2,0
3,22541,926,13,3,19,3,4,4,4,12.0,58,+38.196,5552263,50,6,1:31.719,208.144,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Finished,26142,12.0,4,4,0
4,22542,926,831,15,12,10,5,5,5,10.0,58,+1:35.149,5609216,46,9,1:32.612,206.137,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Finished,26143,14.0,3,3,0
5,22543,926,817,9,3,6,6,6,6,8.0,57,\N,\N,46,10,1:32.797,205.726,11,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,+1 Lap,26144,8.0,5,5,0
6,22544,926,807,10,27,13,7,7,7,6.0,57,\N,\N,48,8,1:31.970,207.576,11,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,+1 Lap,26145,7.0,6,6,0
7,22545,926,828,15,9,15,8,8,8,4.0,57,\N,\N,51,5,1:31.560,208.505,11,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,+1 Lap,26143,14.0,3,3,0
8,22546,926,832,5,55,7,9,9,9,2.0,57,\N,\N,49,11,1:32.872,205.56,11,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,+1 Lap,26146,2.0,7,7,0
9,22547,926,815,10,11,14,10,10,10,1.0,57,\N,\N,46,7,1:31.959,207.601,11,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,http://en.wikipedia.org/wiki/2015_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,+1 Lap,26145,7.0,6,6,0


## Dropping Unneccessary Columns

In [12]:
df12456.columns

Index(['resultId', 'raceId', 'driverId', 'constructorId', 'number', 'grid',
       'position_x', 'positionText_x', 'positionOrder', 'points_x', 'laps',
       'time_x', 'milliseconds', 'fastestLap', 'rank', 'fastestLapTime',
       'fastestLapSpeed', 'statusId', 'year', 'round', 'circuitId', 'name_x',
       'date', 'time_y', 'url_x', 'fp1_date', 'fp1_time', 'fp2_date',
       'fp2_time', 'fp3_date', 'fp3_time', 'quali_date', 'quali_time',
       'sprint_date', 'sprint_time', 'circuitRef', 'name_y', 'location',
       'country', 'lat', 'lng', 'alt', 'url_y', 'status',
       'constructorStandingsId', 'points_y', 'position_y', 'positionText_y',
       'wins'],
      dtype='object')

In [13]:
df12456.drop(['number',
             'position_x',
              'positionText_x',
              'url_y', 'url_x',
              'fp1_date', 'fp1_time', 'fp2_date',
       'fp2_time', 'fp3_date', 'fp3_time', 'quali_date', 'quali_time',
       'sprint_date', 'sprint_time', 'circuitRef',
              'points_y', 'position_y', 'positionText_y'
             ], axis=1, inplace=True)

In [14]:
print(df12456.shape)
df12456.head()

(4219, 30)


Unnamed: 0,resultId,raceId,driverId,constructorId,grid,positionOrder,points_x,laps,time_x,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,year,round,circuitId,name_x,date,time_y,name_y,location,country,lat,lng,alt,status,constructorStandingsId,wins
0,22538,926,1,131,1,1,25.0,58,1:31:54.067,5514067,50,1,1:30.945,209.915,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,Finished,26140,1
1,22539,926,3,131,2,2,18.0,58,+1.360,5515427,47,2,1:31.092,209.577,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,Finished,26140,1
2,22540,926,20,6,4,3,15.0,58,+34.523,5548590,52,4,1:31.457,208.74,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,Finished,26141,0
3,22541,926,13,3,3,4,12.0,58,+38.196,5552263,50,6,1:31.719,208.144,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,Finished,26142,0
4,22542,926,831,15,10,5,10.0,58,+1:35.149,5609216,46,9,1:32.612,206.137,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,Finished,26143,0


## Saving Dataset as File

In [15]:
df12456.to_csv('dataset.csv', index=False)

In [16]:
import os
import subprocess
from IPython.display import FileLink, display

def download_file(path, download_file_name):
    os.chdir('/kaggle/working/')
    zip_name = f"/kaggle/working/{download_file_name}.zip"
    command = f"zip {zip_name} {path} -r"
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    if result.returncode != 0:
        print("Unable to run zip command!")
        print(result.stderr)
        return
    display(FileLink(f'{download_file_name}.zip'))

download_file('dataset.csv', 'out')

##  V2 Version of Dataset Addition (Lap Times & Pit Stops Addition to Dataset)

## Lap Timing Dataset Cleaning and Joining

In [17]:
print(df8.shape)
df8.head() #Lap Time dataset

(589081, 6)


Unnamed: 0,raceId,driverId,lap,position,time,milliseconds
0,841,20,1,1,1:38.109,98109
1,841,20,2,1,1:33.006,93006
2,841,20,3,1,1:32.713,92713
3,841,20,4,1,1:32.803,92803
4,841,20,5,1,1:32.342,92342


In [18]:
# Pivot milliseconds per lap
lap_ms = df8.pivot(index=['raceId', 'driverId'], columns='lap', values='milliseconds')
lap_ms.columns = [f'lap_{lap}_ms' for lap in lap_ms.columns]

# Pivot formatted time per lap
# lap_time = df8.pivot(index=['raceId', 'driverId'], columns='lap', values='time')
# lap_time.columns = [f'lap_{lap}_time' for lap in lap_time.columns]

# Combine both into one DataFrame
driver_laps_wide = pd.concat([lap_ms], axis=1)

# Optional: reset index to make raceId and driverId columns
driver_laps_wide.reset_index(inplace=True)

# Show result
print(driver_laps_wide.shape)


(11041, 89)


In [19]:
driver_laps_wide.head()

Unnamed: 0,raceId,driverId,lap_1_ms,lap_2_ms,lap_3_ms,lap_4_ms,lap_5_ms,lap_6_ms,lap_7_ms,lap_8_ms,lap_9_ms,lap_10_ms,lap_11_ms,lap_12_ms,lap_13_ms,lap_14_ms,lap_15_ms,lap_16_ms,lap_17_ms,lap_18_ms,lap_19_ms,lap_20_ms,lap_21_ms,lap_22_ms,lap_23_ms,lap_24_ms,lap_25_ms,lap_26_ms,lap_27_ms,lap_28_ms,lap_29_ms,lap_30_ms,lap_31_ms,lap_32_ms,lap_33_ms,lap_34_ms,lap_35_ms,lap_36_ms,lap_37_ms,lap_38_ms,lap_39_ms,lap_40_ms,lap_41_ms,lap_42_ms,lap_43_ms,lap_44_ms,lap_45_ms,lap_46_ms,lap_47_ms,lap_48_ms,lap_49_ms,lap_50_ms,lap_51_ms,lap_52_ms,lap_53_ms,lap_54_ms,lap_55_ms,lap_56_ms,lap_57_ms,lap_58_ms,lap_59_ms,lap_60_ms,lap_61_ms,lap_62_ms,lap_63_ms,lap_64_ms,lap_65_ms,lap_66_ms,lap_67_ms,lap_68_ms,lap_69_ms,lap_70_ms,lap_71_ms,lap_72_ms,lap_73_ms,lap_74_ms,lap_75_ms,lap_76_ms,lap_77_ms,lap_78_ms,lap_79_ms,lap_80_ms,lap_81_ms,lap_82_ms,lap_83_ms,lap_84_ms,lap_85_ms,lap_86_ms,lap_87_ms
0,1,1,109088.0,93740.0,91600.0,91067.0,92129.0,90469.0,89488.0,90302.0,90889.0,92418.0,115549.0,98029.0,92372.0,91749.0,90771.0,90751.0,90984.0,95580.0,101892.0,105445.0,98683.0,153309.0,152352.0,136183.0,96863.0,92198.0,91291.0,90377.0,90077.0,89912.0,89743.0,90027.0,89560.0,89723.0,89474.0,89894.0,89422.0,89104.0,89020.0,90261.0,89824.0,89484.0,107494.0,95135.0,89444.0,90224.0,90528.0,90147.0,91175.0,90082.0,89611.0,89722.0,89599.0,91129.0,90149.0,107829.0,134442.0,150895.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1,2,157754.0,97640.0,91697.0,91867.0,92234.0,92036.0,91659.0,91801.0,93491.0,93063.0,92439.0,91452.0,91523.0,91408.0,91232.0,91112.0,91209.0,125300.0,121930.0,108094.0,108456.0,95396.0,121351.0,135301.0,97154.0,93808.0,93104.0,92280.0,92356.0,91390.0,91188.0,92289.0,91734.0,90967.0,90959.0,90756.0,90685.0,90269.0,89941.0,90031.0,89982.0,89950.0,89748.0,107500.0,94324.0,89421.0,88968.0,88283.0,88373.0,88697.0,89476.0,91178.0,91204.0,91444.0,91730.0,112621.0,128962.0,132652.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1,3,102659.0,92091.0,91148.0,89660.0,89468.0,89643.0,90181.0,91501.0,92315.0,90219.0,93984.0,91984.0,88519.0,88428.0,88553.0,120177.0,97629.0,93148.0,103569.0,102455.0,105705.0,155699.0,151994.0,141534.0,98511.0,91336.0,91444.0,90439.0,90328.0,90032.0,89822.0,89350.0,89152.0,89059.0,89294.0,89878.0,89801.0,89737.0,89748.0,90112.0,90626.0,88957.0,88728.0,106312.0,95294.0,89168.0,88506.0,87706.0,88803.0,90277.0,91532.0,91400.0,94441.0,96161.0,93169.0,123117.0,116092.0,150911.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1,4,109367.0,94983.0,92175.0,93114.0,91831.0,91556.0,91683.0,91290.0,91083.0,91569.0,90944.0,90522.0,90503.0,90914.0,90718.0,90863.0,90746.0,92498.0,131126.0,111011.0,95702.0,153045.0,152290.0,135567.0,97840.0,91912.0,91215.0,90771.0,90595.0,90132.0,89646.0,89993.0,89684.0,89466.0,89714.0,89539.0,89778.0,89744.0,90436.0,91106.0,90373.0,90751.0,90230.0,90181.0,90160.0,90003.0,90115.0,89933.0,89695.0,89451.0,107303.0,96483.0,88712.0,88724.0,89171.0,124117.0,118145.0,150445.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1,6,104899.0,92734.0,92684.0,90952.0,90152.0,89923.0,90057.0,90173.0,90965.0,92313.0,90804.0,91343.0,90955.0,91152.0,90596.0,90558.0,90718.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [20]:
# Removal of Null Columns
print(driver_laps_wide.columns)
print(driver_laps_wide.isnull().sum())

Index(['raceId', 'driverId', 'lap_1_ms', 'lap_2_ms', 'lap_3_ms', 'lap_4_ms',
       'lap_5_ms', 'lap_6_ms', 'lap_7_ms', 'lap_8_ms', 'lap_9_ms', 'lap_10_ms',
       'lap_11_ms', 'lap_12_ms', 'lap_13_ms', 'lap_14_ms', 'lap_15_ms',
       'lap_16_ms', 'lap_17_ms', 'lap_18_ms', 'lap_19_ms', 'lap_20_ms',
       'lap_21_ms', 'lap_22_ms', 'lap_23_ms', 'lap_24_ms', 'lap_25_ms',
       'lap_26_ms', 'lap_27_ms', 'lap_28_ms', 'lap_29_ms', 'lap_30_ms',
       'lap_31_ms', 'lap_32_ms', 'lap_33_ms', 'lap_34_ms', 'lap_35_ms',
       'lap_36_ms', 'lap_37_ms', 'lap_38_ms', 'lap_39_ms', 'lap_40_ms',
       'lap_41_ms', 'lap_42_ms', 'lap_43_ms', 'lap_44_ms', 'lap_45_ms',
       'lap_46_ms', 'lap_47_ms', 'lap_48_ms', 'lap_49_ms', 'lap_50_ms',
       'lap_51_ms', 'lap_52_ms', 'lap_53_ms', 'lap_54_ms', 'lap_55_ms',
       'lap_56_ms', 'lap_57_ms', 'lap_58_ms', 'lap_59_ms', 'lap_60_ms',
       'lap_61_ms', 'lap_62_ms', 'lap_63_ms', 'lap_64_ms', 'lap_65_ms',
       'lap_66_ms', 'lap_67_ms', 'lap_68_ms', 'lap_

In [21]:
driver_laps_wide.drop(['lap_79_ms', 'lap_80_ms', 'lap_81_ms', 'lap_82_ms', 'lap_83_ms', 'lap_84_ms', 'lap_85_ms', 'lap_86_ms', 'lap_87_ms'], axis=1, inplace=True)

In [22]:
driver_laps_wide.columns

Index(['raceId', 'driverId', 'lap_1_ms', 'lap_2_ms', 'lap_3_ms', 'lap_4_ms',
       'lap_5_ms', 'lap_6_ms', 'lap_7_ms', 'lap_8_ms', 'lap_9_ms', 'lap_10_ms',
       'lap_11_ms', 'lap_12_ms', 'lap_13_ms', 'lap_14_ms', 'lap_15_ms',
       'lap_16_ms', 'lap_17_ms', 'lap_18_ms', 'lap_19_ms', 'lap_20_ms',
       'lap_21_ms', 'lap_22_ms', 'lap_23_ms', 'lap_24_ms', 'lap_25_ms',
       'lap_26_ms', 'lap_27_ms', 'lap_28_ms', 'lap_29_ms', 'lap_30_ms',
       'lap_31_ms', 'lap_32_ms', 'lap_33_ms', 'lap_34_ms', 'lap_35_ms',
       'lap_36_ms', 'lap_37_ms', 'lap_38_ms', 'lap_39_ms', 'lap_40_ms',
       'lap_41_ms', 'lap_42_ms', 'lap_43_ms', 'lap_44_ms', 'lap_45_ms',
       'lap_46_ms', 'lap_47_ms', 'lap_48_ms', 'lap_49_ms', 'lap_50_ms',
       'lap_51_ms', 'lap_52_ms', 'lap_53_ms', 'lap_54_ms', 'lap_55_ms',
       'lap_56_ms', 'lap_57_ms', 'lap_58_ms', 'lap_59_ms', 'lap_60_ms',
       'lap_61_ms', 'lap_62_ms', 'lap_63_ms', 'lap_64_ms', 'lap_65_ms',
       'lap_66_ms', 'lap_67_ms', 'lap_68_ms', 'lap_

### Pit Stop Dataset Cleaning and joining

In [23]:
print(df7.shape) # Pit stops dataset
df7.head()

(11371, 7)


Unnamed: 0,raceId,driverId,stop,lap,time,duration,milliseconds
0,841,153,1,1,17:05:23,26.898,26898
1,841,30,1,1,17:05:52,25.021,25021
2,841,17,1,11,17:20:48,23.426,23426
3,841,4,1,12,17:22:34,23.251,23251
4,841,13,1,13,17:24:10,23.842,23842


In [24]:
# Pivot milliseconds
stop_ms = df7.pivot(index=['raceId', 'driverId'], columns='stop', values='milliseconds')
stop_ms.columns = [f'stop_{int(col)}_ms' for col in stop_ms.columns]

# Pivot lap numbers
stop_laps = df7.pivot(index=['raceId', 'driverId'], columns='stop', values='lap')
stop_laps.columns = [f'stop_{int(col)}_lap' for col in stop_laps.columns]

# Combine both into one wide-format DataFrame
df_pitstops_wide = pd.concat([stop_ms, stop_laps], axis=1).reset_index()

# View result
print(df_pitstops_wide.shape)


(5575, 30)


In [25]:
# print(df_pitstops_wide.shape)
print(df_pitstops_wide.columns)
df_pitstops_wide.head()

Index(['raceId', 'driverId', 'stop_1_ms', 'stop_2_ms', 'stop_3_ms',
       'stop_4_ms', 'stop_5_ms', 'stop_6_ms', 'stop_7_ms', 'stop_15_ms',
       'stop_42_ms', 'stop_48_ms', 'stop_51_ms', 'stop_52_ms', 'stop_57_ms',
       'stop_70_ms', 'stop_1_lap', 'stop_2_lap', 'stop_3_lap', 'stop_4_lap',
       'stop_5_lap', 'stop_6_lap', 'stop_7_lap', 'stop_15_lap', 'stop_42_lap',
       'stop_48_lap', 'stop_51_lap', 'stop_52_lap', 'stop_57_lap',
       'stop_70_lap'],
      dtype='object')


Unnamed: 0,raceId,driverId,stop_1_ms,stop_2_ms,stop_3_ms,stop_4_ms,stop_5_ms,stop_6_ms,stop_7_ms,stop_15_ms,stop_42_ms,stop_48_ms,stop_51_ms,stop_52_ms,stop_57_ms,stop_70_ms,stop_1_lap,stop_2_lap,stop_3_lap,stop_4_lap,stop_5_lap,stop_6_lap,stop_7_lap,stop_15_lap,stop_42_lap,stop_48_lap,stop_51_lap,stop_52_lap,stop_57_lap,stop_70_lap
0,841,1,23227.0,23199.0,,,,,,,,,,,,,16.0,36.0,,,,,,,,,,,,
1,841,2,22994.0,25098.0,,,,,,,,,,,,,15.0,30.0,,,,,,,,,,,,
2,841,3,23716.0,,,,,,,,,,,,,,16.0,,,,,,,,,,,,,
3,841,4,23251.0,24733.0,24181.0,,,,,,,,,,,,12.0,27.0,42.0,,,,,,,,,,,
4,841,5,24865.0,,,,,,,,,,,,,,17.0,,,,,,,,,,,,,


In [26]:
df_pitstops_wide.isnull().sum()

raceId            0
driverId          0
stop_1_ms         2
stop_2_ms      1859
stop_3_ms      4079
stop_4_ms      5148
stop_5_ms      5455
stop_6_ms      5546
stop_7_ms      5572
stop_15_ms     5574
stop_42_ms     5574
stop_48_ms     5574
stop_51_ms     5574
stop_52_ms     5574
stop_57_ms     5574
stop_70_ms     5574
stop_1_lap        2
stop_2_lap     1859
stop_3_lap     4079
stop_4_lap     5148
stop_5_lap     5455
stop_6_lap     5546
stop_7_lap     5572
stop_15_lap    5574
stop_42_lap    5574
stop_48_lap    5574
stop_51_lap    5574
stop_52_lap    5574
stop_57_lap    5574
stop_70_lap    5574
dtype: int64

In [27]:
df_pit_stops = df_pitstops_wide[['raceId', 'driverId', 'stop_1_ms', 'stop_2_ms', 'stop_3_ms', 'stop_1_lap', 'stop_2_lap', 'stop_3_lap']]
df_pit_stops.head()

Unnamed: 0,raceId,driverId,stop_1_ms,stop_2_ms,stop_3_ms,stop_1_lap,stop_2_lap,stop_3_lap
0,841,1,23227.0,23199.0,,16.0,36.0,
1,841,2,22994.0,25098.0,,15.0,30.0,
2,841,3,23716.0,,,16.0,,
3,841,4,23251.0,24733.0,24181.0,12.0,27.0,42.0
4,841,5,24865.0,,,17.0,,


## Joining Existing Dataset with lap_time & pit_stops

In [28]:
df124567 = pd.merge(df12456, df_pit_stops, on=['raceId', 'driverId'], how='left')
print(df124567.shape)
df124567.head()

(4219, 36)


Unnamed: 0,resultId,raceId,driverId,constructorId,grid,positionOrder,points_x,laps,time_x,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,year,round,circuitId,name_x,date,time_y,name_y,location,country,lat,lng,alt,status,constructorStandingsId,wins,stop_1_ms,stop_2_ms,stop_3_ms,stop_1_lap,stop_2_lap,stop_3_lap
0,22538,926,1,131,1,1,25.0,58,1:31:54.067,5514067,50,1,1:30.945,209.915,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,Finished,26140,1,22295.0,,,25.0,,
1,22539,926,3,131,2,2,18.0,58,+1.360,5515427,47,2,1:31.092,209.577,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,Finished,26140,1,22105.0,,,26.0,,
2,22540,926,20,6,4,3,15.0,58,+34.523,5548590,52,4,1:31.457,208.74,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,Finished,26141,0,22694.0,,,24.0,,
3,22541,926,13,3,3,4,12.0,58,+38.196,5552263,50,6,1:31.719,208.144,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,Finished,26142,0,22062.0,,,21.0,,
4,22542,926,831,15,10,5,10.0,58,+1:35.149,5609216,46,9,1:32.612,206.137,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,Finished,26143,0,22933.0,,,25.0,,


In [29]:
df1245678 = pd.merge(df124567, driver_laps_wide, on=['raceId', 'driverId'], how='left')
print(df1245678.shape)
df1245678.head()

(4219, 114)


Unnamed: 0,resultId,raceId,driverId,constructorId,grid,positionOrder,points_x,laps,time_x,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,year,round,circuitId,name_x,date,time_y,name_y,location,country,lat,lng,alt,status,constructorStandingsId,wins,stop_1_ms,stop_2_ms,stop_3_ms,stop_1_lap,stop_2_lap,stop_3_lap,lap_1_ms,lap_2_ms,lap_3_ms,lap_4_ms,lap_5_ms,lap_6_ms,lap_7_ms,lap_8_ms,lap_9_ms,lap_10_ms,lap_11_ms,lap_12_ms,lap_13_ms,lap_14_ms,lap_15_ms,lap_16_ms,lap_17_ms,lap_18_ms,lap_19_ms,lap_20_ms,lap_21_ms,lap_22_ms,lap_23_ms,lap_24_ms,lap_25_ms,lap_26_ms,lap_27_ms,lap_28_ms,lap_29_ms,lap_30_ms,lap_31_ms,lap_32_ms,lap_33_ms,lap_34_ms,lap_35_ms,lap_36_ms,lap_37_ms,lap_38_ms,lap_39_ms,lap_40_ms,lap_41_ms,lap_42_ms,lap_43_ms,lap_44_ms,lap_45_ms,lap_46_ms,lap_47_ms,lap_48_ms,lap_49_ms,lap_50_ms,lap_51_ms,lap_52_ms,lap_53_ms,lap_54_ms,lap_55_ms,lap_56_ms,lap_57_ms,lap_58_ms,lap_59_ms,lap_60_ms,lap_61_ms,lap_62_ms,lap_63_ms,lap_64_ms,lap_65_ms,lap_66_ms,lap_67_ms,lap_68_ms,lap_69_ms,lap_70_ms,lap_71_ms,lap_72_ms,lap_73_ms,lap_74_ms,lap_75_ms,lap_76_ms,lap_77_ms,lap_78_ms
0,22538,926,1,131,1,1,25.0,58,1:31:54.067,5514067,50,1,1:30.945,209.915,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,Finished,26140,1,22295.0,,,25.0,,,142988.0,146005.0,133785.0,93493.0,92765.0,92685.0,92593.0,92899.0,92933.0,92911.0,92742.0,92432.0,92491.0,93033.0,93177.0,92458.0,92148.0,92462.0,92347.0,92306.0,91940.0,92405.0,92791.0,92925.0,111204.0,99012.0,93021.0,93241.0,92824.0,92005.0,91676.0,91953.0,91875.0,93071.0,91995.0,91562.0,91514.0,91134.0,91208.0,91825.0,91347.0,91982.0,91541.0,91693.0,91229.0,91177.0,91649.0,91224.0,91023.0,90945.0,91804.0,92242.0,91708.0,91273.0,91358.0,90993.0,91400.0,91645.0,,,,,,,,,,,,,,,,,,,,
1,22539,926,3,131,2,2,18.0,58,+1.360,5515427,47,2,1:31.092,209.577,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,Finished,26140,1,22105.0,,,26.0,,,144350.0,145242.0,134420.0,94638.0,93068.0,92371.0,92827.0,92460.0,92380.0,92572.0,93184.0,92650.0,92752.0,92930.0,93576.0,92324.0,92300.0,92365.0,92243.0,92426.0,92454.0,93011.0,92948.0,93048.0,93964.0,111568.0,98552.0,92092.0,92108.0,91754.0,91609.0,91737.0,91660.0,92981.0,91775.0,91355.0,91610.0,91573.0,91565.0,91798.0,91400.0,91493.0,91259.0,92243.0,91231.0,91319.0,91092.0,91332.0,91141.0,91384.0,91443.0,91678.0,91688.0,91411.0,91372.0,91366.0,91189.0,91146.0,,,,,,,,,,,,,,,,,,,,
2,22540,926,20,6,4,3,15.0,58,+34.523,5548590,52,4,1:31.457,208.74,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,Finished,26141,0,22694.0,,,24.0,,,147522.0,144069.0,133846.0,95361.0,94027.0,93438.0,93197.0,93754.0,92991.0,93074.0,93490.0,93597.0,93513.0,93374.0,93853.0,93173.0,93229.0,93146.0,93168.0,93145.0,92859.0,92307.0,92664.0,111263.0,98543.0,92115.0,92429.0,92670.0,92432.0,92518.0,92698.0,92443.0,92300.0,92856.0,92633.0,92567.0,92456.0,92518.0,93729.0,92644.0,92673.0,92653.0,92129.0,92334.0,92288.0,92054.0,92259.0,92037.0,92127.0,91754.0,91578.0,91457.0,91492.0,91847.0,91719.0,91478.0,91717.0,93383.0,,,,,,,,,,,,,,,,,,,,
3,22541,926,13,3,3,4,12.0,58,+38.196,5552263,50,6,1:31.719,208.144,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,Finished,26142,0,22062.0,,,21.0,,,146113.0,144418.0,134436.0,95150.0,93928.0,93470.0,93205.0,93576.0,93212.0,93054.0,93171.0,93484.0,93355.0,93354.0,93517.0,93247.0,93159.0,93245.0,93195.0,93141.0,111561.0,98947.0,94555.0,92072.0,93042.0,92580.0,93059.0,93220.0,93118.0,92910.0,92755.0,92813.0,92776.0,92857.0,92612.0,92607.0,92293.0,92444.0,92910.0,92776.0,92747.0,92358.0,92141.0,92091.0,91947.0,92273.0,92073.0,91802.0,91931.0,91719.0,91859.0,91857.0,92289.0,91784.0,91944.0,91753.0,92000.0,92358.0,,,,,,,,,,,,,,,,,,,,
4,22542,926,831,15,10,5,10.0,58,+1:35.149,5609216,46,9,1:32.612,206.137,1,2015,1,1,Australian Grand Prix,2015-03-15,05:00:00,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,Finished,26143,0,22933.0,,,25.0,,,149928.0,143809.0,132364.0,96284.0,95148.0,94761.0,94263.0,94505.0,94330.0,94164.0,93945.0,94047.0,94211.0,94180.0,94385.0,93596.0,93549.0,93850.0,93745.0,94118.0,94248.0,93722.0,94264.0,94091.0,114815.0,100836.0,92766.0,92931.0,93376.0,93459.0,93466.0,93585.0,93385.0,93234.0,93887.0,93435.0,93221.0,93019.0,93429.0,93127.0,92739.0,92855.0,92905.0,93058.0,93440.0,92612.0,92632.0,93013.0,93188.0,93766.0,93919.0,93797.0,94053.0,93015.0,92788.0,93371.0,93957.0,96630.0,,,,,,,,,,,,,,,,,,,,


In [30]:
df1245678.to_csv('dataset_v2.csv', index=False)

In [31]:
import os
import subprocess
from IPython.display import FileLink, display

def download_file(path, download_file_name):
    os.chdir('/kaggle/working/')
    zip_name = f"/kaggle/working/{download_file_name}.zip"
    command = f"zip {zip_name} {path} -r"
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    if result.returncode != 0:
        print("Unable to run zip command!")
        print(result.stderr)
        return
    display(FileLink(f'{download_file_name}.zip'))

download_file('dataset_v2.csv', 'final')