In [87]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sqlite3
import pandasql
from pandasql import sqldf

pysqldf = lambda q: sqldf(q, globals())

In [81]:
circuits = pd.read_csv('../data/circuits.csv')
constructor_results = pd.read_csv('../data/constructor_results.csv')
constructor_standings = pd.read_csv('../data/constructor_standings.csv')
constructor = pd.read_csv('../data/constructors.csv')
drivers = pd.read_csv('../data/drivers.csv')
driver_standings = pd.read_csv('../data/driver_standings.csv')
lap_times = pd.read_csv('../data/lap_times.csv')
pit_stops = pd.read_csv('../data/pit_stops.csv')
qualif = pd.read_csv('../data/qualifying.csv')
races = pd.read_csv('../data/races.csv')
results = pd.read_csv('../data/results.csv')
seasons = pd.read_csv('../data/seasons.csv')
status = pd.read_csv('../data/status.csv')

# Data Cleaning

We remove the url links from the datasets and combine some of the datasets for easier readability.

In [82]:
circuits = circuits.drop("url", axis=1)
constructor = constructor.drop("url", axis=1)
drivers = drivers.drop("url", axis=1)
seasons = seasons.drop("url", axis=1)
races = races.drop("url", axis=1)

# Data Exploration

- constructor wins by circuit

In [147]:
q = """select 
C.raceId, 
C.constructorId,
CName.name,
C.points,
C.position, 
C.wins,
R.circuitID,
R.name as circuitName
from constructor_standings C
inner join races R
on C.raceID = R.raceID
inner join constructor CName
on CName.constructorId = C.constructorId
where C.position = 1
or C.position = 2
or C.position = 3
;"""
constructor_standings_cleaned = pysqldf(q)
constructor_standings_cleaned

Unnamed: 0,raceId,constructorId,name,points,position,wins,circuitId,circuitName
0,18,1,McLaren,14.0,1,1,1,Australian Grand Prix
1,18,2,BMW Sauber,8.0,3,0,1,Australian Grand Prix
2,18,3,Williams,9.0,2,0,1,Australian Grand Prix
3,19,1,McLaren,24.0,1,1,2,Malaysian Grand Prix
4,19,2,BMW Sauber,19.0,2,0,2,Malaysian Grand Prix
...,...,...,...,...,...,...,...,...
2923,1054,9,Red Bull,83.0,2,1,75,Portuguese Grand Prix
2924,1054,1,McLaren,53.0,3,0,75,Portuguese Grand Prix
2925,1055,131,Mercedes,141.0,1,3,4,Spanish Grand Prix
2926,1055,9,Red Bull,112.0,2,1,4,Spanish Grand Prix


In [294]:
def get_win_location_count(constructor_standings_cleaned):
    win_locations = {}
    for row in constructor_standings_cleaned.iterrows():
        constructorID = row[1]['name']
        circuitName = row[1]['circuitName']
        if constructorID not in win_locations:
            win_locations[constructorID] = {}
            win_locations[constructorID][circuitName]= 1
        else:
            if circuitName not in win_locations[constructorID]:
                win_locations[constructorID][circuitName]= 1
            else:
                win_locations[constructorID][circuitName] += 1
    location_df = pd.DataFrame(win_locations)
    return win_locations, location_df

win_locations, location_df = get_win_location_count(constructor_standings_cleaned)
location_df.reset_index(inplace=True)

Questions to answer here:
- Is there a particular constructor that dominates a circuit and has most wins?
- Is there a circuit that a constructor is relatively better at?

In [295]:
location_na = location_df.T.isna().sum()
for k, circuit in enumerate(list(location_na)):
    if circuit > 38:
        location_df.drop(k, axis=0, inplace=True)

constructor_na = location_df.isna().sum()
drop_list = []
for k, constructor_n in enumerate(list(constructor_na)):
    if constructor_n > 20:
        drop_list.append(k)
location_df.drop(location_df.columns[drop_list], axis=1, inplace=True)
location_df


Unnamed: 0,index,McLaren,BMW Sauber,Williams,Ferrari,Renault,Red Bull,Toyota,BAR,Jordan,...,Brabham-Repco,Cooper-Climax,Cooper-Maserati,Lotus-Climax,Porsche,Vanwall,Cooper,Brabham-Ford,Mercedes,Lotus F1
0,Australian Grand Prix,20.0,2.0,18.0,27.0,6.0,7.0,1.0,1.0,1.0,...,,,,,,,,,7.0,1.0
1,Malaysian Grand Prix,11.0,3.0,4.0,17.0,5.0,7.0,2.0,,2.0,...,,,,,,,,,4.0,1.0
2,Bahrain Grand Prix,9.0,2.0,2.0,11.0,4.0,8.0,2.0,,,...,,,,,,,,,9.0,2.0
3,Spanish Grand Prix,26.0,2.0,17.0,35.0,5.0,12.0,2.0,1.0,1.0,...,,,,,,,,,8.0,2.0
5,Monaco Grand Prix,30.0,2.0,16.0,48.0,7.0,10.0,2.0,1.0,1.0,...,1.0,5.0,3.0,3.0,1.0,,1.0,,6.0,1.0
6,Canadian Grand Prix,26.0,2.0,19.0,36.0,7.0,9.0,,1.0,1.0,...,1.0,,1.0,,,,,1.0,7.0,1.0
7,French Grand Prix,26.0,2.0,15.0,38.0,7.0,2.0,,1.0,1.0,...,2.0,4.0,1.0,5.0,1.0,1.0,1.0,,2.0,
8,British Grand Prix,29.0,2.0,19.0,48.0,5.0,11.0,1.0,1.0,1.0,...,2.0,4.0,1.0,5.0,1.0,1.0,1.0,,8.0,1.0
9,German Grand Prix,27.0,1.0,18.0,46.0,5.0,9.0,1.0,1.0,1.0,...,2.0,2.0,1.0,5.0,1.0,1.0,1.0,1.0,5.0,
10,Hungarian Grand Prix,21.0,2.0,16.0,29.0,4.0,11.0,,1.0,1.0,...,,,,,,,,,8.0,1.0
