In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


import logging
logging.basicConfig(level=logging.INFO)

In [6]:
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100) 

In [7]:
from formula1 import preprocessing_newdata

In [8]:
work_df = (
    preprocessing_newdata.read_data()
    .pipe(preprocessing_newdata.merge_constructors)
    .pipe(preprocessing_newdata.merge_drivers)
    .pipe(preprocessing_newdata.merge_races)
    .pipe(preprocessing_newdata.merge_status)
    .pipe(preprocessing_newdata.merge_driverstandings)
    .pipe(preprocessing_newdata.remove_columns)
    .pipe(preprocessing_newdata.rename_columns)
    .pipe(preprocessing_newdata.sort_races)
)

INFO:formula1.utils:[read_data         ] shape=(24297, 18),  time=0:00:00.046703
INFO:formula1.utils:[merge_constructors] shape=(24297, 19),  time=0:00:00.019193
INFO:formula1.utils:[merge_drivers     ] shape=(24297, 27),  time=0:00:00.015113
INFO:formula1.utils:[merge_races       ] shape=(24297, 34),  time=0:00:00.016334
INFO:formula1.utils:[merge_status      ] shape=(24297, 35),  time=0:00:00.014176
INFO:formula1.utils:[merge_driverstandi] shape=(24297, 40),  time=0:00:00.039344
INFO:formula1.utils:[remove_columns    ] shape=(24297, 30),  time=0:00:00.017494
INFO:formula1.utils:[rename_columns    ] shape=(24297, 30),  time=0:00:00.006965
INFO:formula1.utils:[sort_races        ] shape=(24297, 30),  time=0:00:00.006699


In [12]:
results = preprocessing_newdata.read_data()

INFO:formula1.utils:[read_data         ] shape=(24297, 18),  time=0:00:00.054194


In [16]:
results

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId
0,1,18,1,1,22,1,1,1,1,10.0,58,1:34:50.616,5690616,39,2,1:27.452,218.300,1
1,2,18,2,2,3,5,2,2,2,8.0,58,+5.478,5696094,41,3,1:27.739,217.586,1
2,3,18,3,3,7,7,3,3,3,6.0,58,+8.163,5698779,41,5,1:28.090,216.719,1
3,4,18,4,4,5,11,4,4,4,5.0,58,+17.181,5707797,58,7,1:28.603,215.464,1
4,5,18,5,1,23,3,5,5,5,4.0,58,+18.014,5708630,43,1,1:27.418,218.385,1
5,6,18,6,3,8,13,6,6,6,3.0,57,\N,\N,50,14,1:29.639,212.974,11
6,7,18,7,5,14,17,7,7,7,2.0,55,\N,\N,22,12,1:29.534,213.224,5
7,8,18,8,6,1,15,8,8,8,1.0,53,\N,\N,20,4,1:27.903,217.180,5
8,9,18,9,2,4,2,\N,R,9,0.0,47,\N,\N,15,9,1:28.753,215.100,4
9,10,18,10,7,12,18,\N,R,10,0.0,43,\N,\N,23,13,1:29.558,213.166,3


In [14]:
names = ['driverStandingsId',
             'raceId',
             'driverId',
             'points',
             'position',
             'positionText',
             'wins']

driverstandings = pd.read_csv('../new_data/driver_standings.csv', names=names)

In [15]:
driverstandings

Unnamed: 0,driverStandingsId,raceId,driverId,points,position,positionText,wins
0,1,18,1,10.0,1,1,1
1,2,18,2,8.0,2,2,0
2,3,18,3,6.0,3,3,0
3,4,18,4,5.0,4,4,0
4,5,18,5,4.0,5,5,0
5,6,18,6,3.0,6,6,0
6,7,18,7,2.0,7,7,0
7,8,18,8,1.0,8,8,0
8,9,19,1,14.0,1,1,1
9,10,19,2,11.0,3,3,0


In [10]:
# Last race
last_race = work_df[(work_df['year'] == 2019) & (work_df['raceId'] == 1014)]
last_race

Unnamed: 0,raceId,driverId,constructorId,number_x,grid,position,positionText_1,positionOrder,points_1,laps,time_x,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,constructorRef,driverRef,dob,nationality,year,round,circuitId,name,date,status,WC_points_thisyear,championship_standing,wins_this_year
2418,1014,1,131,44,2,1,1,1,26.0,66,1:35:50.443,5750443,54,1,1:18.492,213.499,1,mercedes,hamilton,1985-01-07,British,2019,5,4,Spanish Grand Prix,2019-05-12,Finished,112.0,1.0,3.0
2419,1014,8,51,7,14,14,14,14,0.0,66,+41.803,5792246,66,14,1:21.382,205.917,1,alfa,raikkonen,1979-10-17,Finnish,2019,5,4,Spanish Grand Prix,2019-05-12,Finished,13.0,9.0,0.0
2420,1014,815,211,11,15,15,15,15,0.0,66,+46.877,5797320,65,16,1:21.859,204.717,1,racing_point,perez,1990-01-26,Mexican,2019,5,4,Spanish Grand Prix,2019-05-12,Finished,13.0,8.0,0.0
2421,1014,825,210,20,8,7,7,7,6.0,66,+28.159,5778602,66,9,1:20.770,207.478,1,haas,kevin_magnussen,1992-10-05,Danish,2019,5,4,Spanish Grand Prix,2019-05-12,Finished,14.0,7.0,0.0
2422,1014,832,1,55,12,8,8,8,4.0,66,+32.342,5782785,59,10,1:20.859,207.249,1,mclaren,sainz,1994-09-01,Spanish,2019,5,4,Spanish Grand Prix,2019-05-12,Finished,10.0,11.0,0.0
2423,1014,20,6,5,3,4,4,4,12.0,66,+9.167,5759610,64,4,1:19.820,209.947,1,ferrari,vettel,1987-07-03,German,2019,5,4,Spanish Grand Prix,2019-05-12,Finished,64.0,4.0,0.0
2424,1014,807,4,27,0,13,13,13,0.0,66,+39.241,5789684,65,13,1:21.282,206.171,1,renault,hulkenberg,1987-08-19,German,2019,5,4,Spanish Grand Prix,2019-05-12,Finished,6.0,13.0,0.0
2425,1014,822,131,77,1,2,2,2,18.0,66,+4.074,5754517,55,2,1:18.737,212.835,1,mercedes,bottas,1989-08-28,Finnish,2019,5,4,Spanish Grand Prix,2019-05-12,Finished,105.0,2.0,2.0
2426,1014,154,210,8,7,10,10,10,1.0,66,+34.641,5785084,64,12,1:21.057,206.743,1,haas,grosjean,1986-04-17,French,2019,5,4,Spanish Grand Prix,2019-05-12,Finished,1.0,17.0,0.0
2427,1014,817,4,3,13,12,12,12,0.0,66,+36.758,5787201,56,7,1:20.615,207.876,1,renault,ricciardo,1989-07-01,Australian,2019,5,4,Spanish Grand Prix,2019-05-12,Finished,6.0,12.0,0.0


In [11]:
last_race.shape

(20, 30)