# Imports

In [3]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.renderers

Renderers configuration
-----------------------
    Default renderer: 'vscode'
    Available renderers:
        ['plotly_mimetype', 'jupyterlab', 'nteract', 'vscode',
         'notebook', 'notebook_connected', 'kaggle', 'azure', 'colab',
         'cocalc', 'databricks', 'json', 'png', 'jpeg', 'jpg', 'svg',
         'pdf', 'browser', 'firefox', 'chrome', 'chromium', 'iframe',
         'iframe_connected', 'sphinx_gallery', 'sphinx_gallery_png']

# Load the data

In [4]:
circuits = pd.read_csv(r'./datasets/circuits.csv')
constructor_results = pd.read_csv(r'./datasets/constructor_results.csv')
constructor_standings = pd.read_csv(r'./datasets/constructor_standings.csv')
constructors = pd.read_csv(r'./datasets/constructors.csv')
driver_standings = pd.read_csv(r'./datasets/driver_standings.csv')
drivers = pd.read_csv(r'./datasets/drivers.csv')
lap_times = pd.read_csv(r'./datasets/lap_times.csv')
pit_stops = pd.read_csv(r'./datasets/pit_stops.csv')
qualifying = pd.read_csv(r'./datasets/qualifying.csv')
races = pd.read_csv(r'./datasets/races.csv')
results = pd.read_csv(r'./datasets/results.csv')
seasons = pd.read_csv(r'./datasets/seasons.csv')
sprint_results = pd.read_csv(r'./datasets/sprint_results.csv')
status = pd.read_csv(r'./datasets/status.csv')

# Data

In [5]:
circuits.head()
# constructor_results.head()
# constructor_standings.head()
# constructors.head()
# driver_standings.head()
# drivers.head()
# lap_times.head()
# pit_stops.head()
# qualifying.head()
# races.head()
# results.head()
# seasons.head()
# sprint_results.head()
# status.head()

Unnamed: 0,circuitId,circuitRef,name,location,country,lat,lng,alt,url
0,1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...
1,2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738,18,http://en.wikipedia.org/wiki/Sepang_Internatio...
2,3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7,http://en.wikipedia.org/wiki/Bahrain_Internati...
3,4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109,http://en.wikipedia.org/wiki/Circuit_de_Barcel...
4,5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517,29.405,130,http://en.wikipedia.org/wiki/Istanbul_Park


A dataset **qualifying** tem missing values mas fazem sentido (q2 e q3, pois nem todos os drivers vão a estas fases).   
Nenhuma das outras datasets contém missing values.

In [6]:
circuits['country'].replace('United States', 'USA', inplace=True)

# Visualizations

## Position of the driver in each lap of a race

Exemplo de uma corrida ao calhas:

In [7]:
races.loc[(races['year']==2009) & (races['circuitId']==1)]

Unnamed: 0,raceId,year,round,circuitId,name,date,time,url
0,1,2009,1,1,Australian Grand Prix,3/29/2009,06:00:00,http://en.wikipedia.org/wiki/2009_Australian_G...


In [8]:
# choosing the race
race = races.loc[(races['year']==2009) & (races['circuitId']==1)]['raceId'].values[0]


#dataframe of lap_times for that race
pos_per_lap = lap_times[lap_times['raceId']==race]  


driver_ids = np.unique(pos_per_lap['driverId'].values).tolist() #list of the ids of the drivers in that race
driver_names = [drivers.loc[drivers['driverId']==name]['surname'].values[0] for name in driver_ids] #names of the drivers
driver_dict = {driver_ids[i]: driver_names[i] for i in range(len(driver_ids))} #dictionary with ids and names


data_ppl = [dict(type='scatter',
             x=pos_per_lap[pos_per_lap['driverId']==driver]['lap'],
             y=pos_per_lap[pos_per_lap['driverId']==driver]['position'],
             name=name)
                            for driver, name in driver_dict.items()]

layout_ppl = dict(title=dict(
                        text='Position of the drivers in each lap'
                  ),
                  xaxis=dict(title='Laps'),
                  yaxis=dict(title='Positions'))

In [9]:
fig_ppl = go.Figure(data=data_ppl, layout=layout_ppl)

fig_ppl.show()

### PROBLEMA: Range das cores!!
E porque raio há um com linha e pontos????

## Sankey 

### Data preprocessing for Sankey

In [None]:
year = 2016
pstn = 1
drivID = 830

In [None]:
results['position'].replace(to_replace='\\N',value=21,inplace=True)

In [None]:
results_driver = results.merge(drivers[['driverId','driverRef','forename','surname','code','dob','nationality']],left_on='driverId',right_on='driverId')

In [None]:
results_driver.head()

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,...,rank,fastestLapTime,fastestLapSpeed,statusId,driverRef,forename,surname,code,dob,nationality
0,1,18,1,1,22,1,1,1,1,10.0,...,2,1:27.452,218.3,1,hamilton,Lewis,Hamilton,HAM,1985-01-07,British
1,27,19,1,1,22,9,5,5,5,4.0,...,3,1:35.462,209.033,1,hamilton,Lewis,Hamilton,HAM,1985-01-07,British
2,57,20,1,1,22,3,13,13,13,0.0,...,19,1:35.520,203.969,11,hamilton,Lewis,Hamilton,HAM,1985-01-07,British
3,69,21,1,1,22,5,3,3,3,6.0,...,3,1:22.017,204.323,1,hamilton,Lewis,Hamilton,HAM,1985-01-07,British
4,90,22,1,1,22,3,2,2,2,8.0,...,2,1:26.529,222.085,1,hamilton,Lewis,Hamilton,HAM,1985-01-07,British


In [None]:
results_driver.insert(len(results_driver.columns),'driverName',results_driver['forename']+' '+results_driver['surname'])
results_driver.drop(columns=['forename','surname'],axis=1,inplace=True)

In [None]:
results_driver['position']=results_driver['position'].astype(int)

In [None]:
#races_year = races[races['year']==year]

#### Year

In [64]:
results_year = results_driver[results_driver['raceId']\
    .apply(lambda x: np.intersect1d(x,\
         races[races['year']==year]['raceId']).size > 0)]

In [65]:
results_year.sample(5)

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,...,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,driverRef,code,dob,nationality,driverName
12217,23367,968,154,210,8,14,11,11,11,0.0,...,45,4,1:44.970,190.477,1,grosjean,GRO,1986-04-17,French,Romain Grosjean
23892,23266,963,825,4,20,14,21,R,20,0.0,...,3,21,1:43.379,193.025,75,kevin_magnussen,MAG,1992-10-05,Danish,Kevin Magnussen
24239,23224,961,831,15,12,18,21,R,22,0.0,...,\N,0,\N,\N,130,nasr,NAS,1992-08-21,Brazilian,Felipe Nasr
671,23203,961,3,131,6,2,1,1,1,25.0,...,26,9,1:26.599,240.820,1,rosberg,ROS,1985-06-27,German,Nico Rosberg
12197,22922,948,154,210,8,19,6,6,6,8.0,...,48,17,1:32.862,205.582,1,grosjean,GRO,1986-04-17,French,Romain Grosjean


In [66]:
results_y = results_year[['driverId',\
    'driverName','raceId','grid','position','code','dob','nationality']]

In [67]:
results_y_abr = results_y[['position','grid','driverName']].sort_values(['position','grid','driverName'])

A grid position value of '0' indicates the driver started from the pit lane.

In [68]:
results_y_abr.head()

Unnamed: 0,position,grid,driverName
173,1,1,Lewis Hamilton
175,1,1,Lewis Hamilton
176,1,1,Lewis Hamilton
184,1,1,Lewis Hamilton
185,1,1,Lewis Hamilton


In [69]:
results_y_abr['driverGrid'] = \
results_y_abr['driverName'] + ' - ' + results_y_abr['grid'].astype(str)

In [70]:
results_y_abr.drop(columns=['grid','driverName'],inplace=True)

In [71]:
d_g_list = []

curr_d_g = list(results_y_abr.iloc[0,:]) + [1]

for i in range(1,len(results_y_abr['position'])):
    if results_y_abr['position'].iloc[i] == results_y_abr['position'].iloc[i-1]:
        if results_y_abr['driverGrid'].iloc[i] == results_y_abr['driverGrid'].iloc[i-1]:
            curr_d_g[2] += 1
        else:
            d_g_list.append(curr_d_g)
            curr_d_g = list(results_y_abr.iloc[i,:]) + [1]
    else:
        d_g_list.append(curr_d_g)
        curr_d_g = list(results_y_abr.iloc[i,:]) + [1]

d_g_list.append(curr_d_g)

In [72]:
d_g_list[:20]

[[1, 'Lewis Hamilton - 1', 7],
 [1, 'Nico Rosberg - 1', 6],
 [1, 'Lewis Hamilton - 2', 2],
 [1, 'Nico Rosberg - 2', 3],
 [1, 'Lewis Hamilton - 3', 1],
 [1, 'Daniel Ricciardo - 4', 1],
 [1, 'Max Verstappen - 4', 1],
 [2, 'Daniel Ricciardo - 1', 1],
 [2, 'Lewis Hamilton - 1', 2],
 [2, 'Nico Rosberg - 1', 1],
 [2, 'Daniel Ricciardo - 2', 1],
 [2, 'Nico Rosberg - 2', 4],
 [2, 'Daniel Ricciardo - 3', 1],
 [2, 'Max Verstappen - 3', 3],
 [2, 'Sebastian Vettel - 3', 2],
 [2, 'Kimi Räikkönen - 4', 1],
 [2, 'Sebastian Vettel - 4', 1],
 [2, 'Daniel Ricciardo - 5', 1],
 [2, 'Kimi Räikkönen - 5', 1],
 [2, 'Max Verstappen - 8', 1]]

In [73]:
d_g_frame = pd.DataFrame(d_g_list,columns=['position','name','weight'])\
    .sort_values(['position','weight'],ascending=[True,False])

In [74]:
labels_line = list(d_g_frame['name'])+list(d_g_frame['position'])
labels_uni_line = list(dict.fromkeys(labels_line))
labels_final = [labels_uni_line.index(x) for x in labels_line]
weights_final = list(d_g_frame['weight'])

In [75]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = labels_uni_line
    ),
    link = dict(
      source = labels_final[:len(labels_final)//2], # indices correspond to labels, eg A1, A2, A1, B1, ...
      target = labels_final[len(labels_final)//2:],
      value = weights_final
  ))])

fig.update_layout(title_text="Placings in "+str(year), font_size=10)
fig.show()

#### Year and position

In [76]:
results_year_pstn = results_year[results_year['grid']==pstn]

In [77]:
results_yp = results_year_pstn[['driverId',\
    'driverName','raceId','grid','position','code','dob','nationality']]

In [78]:
results_yp_abr = results_yp[['position','driverName']].sort_values(['position','driverName'])

In [79]:
driver_list = []

curr_driver = list(results_yp_abr.iloc[0,:]) + [1]

for i in range(1,len(results_yp_abr['position'])):
    if results_yp_abr['position'].iloc[i] == results_yp_abr['position'].iloc[i-1]:
        if results_yp_abr['driverName'].iloc[i] == results_yp_abr['driverName'].iloc[i-1]:
            curr_driver[2] += 1
        else:
            driver_list.append(curr_driver)
            curr_driver = list(results_yp_abr.iloc[i,:]) + [1]
    else:
        driver_list.append(curr_driver)
        curr_driver = list(results_yp_abr.iloc[i,:]) + [1]

driver_list.append(curr_driver)

In [80]:
driver_list

[[1, 'Lewis Hamilton', 7],
 [1, 'Nico Rosberg', 6],
 [2, 'Daniel Ricciardo', 1],
 [2, 'Lewis Hamilton', 2],
 [2, 'Nico Rosberg', 1],
 [3, 'Lewis Hamilton', 1],
 [4, 'Nico Rosberg', 1],
 [21, 'Lewis Hamilton', 2]]

In [81]:
driver_frame = pd.DataFrame(driver_list,columns=['position','name','weight'])\
    .sort_values(['position','weight'],ascending=[True,False])

In [82]:
labels_line = list(driver_frame['name'])+list(driver_frame['position'])
labels_uni_line = list(dict.fromkeys(labels_line))
labels_final = [labels_uni_line.index(x) for x in labels_line]
weights_final = list(driver_frame['weight'])

In [83]:
print(labels_final[:len(labels_final)//2]) # indices correspond to labels, eg A1, A2, A1, B1, .
print(labels_final[len(labels_final)//2:])
print(weights_final)

[0, 1, 0, 2, 1, 0, 1, 0]
[3, 3, 4, 4, 4, 5, 6, 7]
[7, 6, 2, 1, 1, 1, 1, 2]


In [84]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = labels_uni_line
    ),
    link = dict(
      source = labels_final[:len(labels_final)//2], # indices correspond to labels, eg A1, A2, A1, B1, ...
      target = labels_final[len(labels_final)//2:],
      value = weights_final
  ))])

fig.update_layout(title_text="Placings in "+str(year)+\
  " starting at position "+str(pstn), font_size=10)
fig.show()

#### Year, start and driver

In [85]:
results_year_pstn_driver = \
    results_year_pstn[results_year_pstn['driverId']==drivID]

In [86]:
results_ypd = results_year_pstn_driver[['driverId',\
    'driverName','raceId','grid','position','code','dob','nationality']]

In [87]:
results_ypd.head()

Unnamed: 0,driverId,driverName,raceId,grid,position,code,dob,nationality


In [88]:
results_ypd_abr = results_ypd[['position','driverName']].sort_values('position')

In [89]:
results_ypd_abr#['position']#.iloc[11]

Unnamed: 0,position,driverName


In [90]:
list(results_ypd_abr.iloc[0,:])+[1]

IndexError: single positional indexer is out-of-bounds

In [None]:
posit_list = []

curr_posit = list(results_ypd_abr.iloc[0,:]) + [1]

for i in range(1,len(results_ypd_abr['position'])):
    if results_ypd_abr['position'].iloc[i] == results_ypd_abr['position'].iloc[i-1]:
        curr_posit[2] += 1
    else:
        posit_list.append(curr_posit)
        curr_posit = list(results_ypd_abr.iloc[i,:]) + [1]

posit_list.append(curr_posit)


In [None]:
(results['grid']==0).value_counts()

False    23822
True      1598
Name: grid, dtype: int64

In [None]:
posit_frame = pd.DataFrame(posit_list,columns=['position','name','weight'])\
    .sort_values(['position','weight'],ascending=[True,False])

In [None]:
labels_line = list(posit_frame['name'])+list(posit_frame['position'])
labels_uni_line = list(dict.fromkeys(labels_line))
labels_final = [labels_uni_line.index(x) for x in labels_line]
weights_final = list(posit_frame['weight'])

In [91]:
labels_final

[0, 1, 0, 2, 1, 0, 1, 0, 3, 3, 4, 4, 4, 5, 6, 7]

In [92]:
labels_uni_line

['Lewis Hamilton', 'Nico Rosberg', 'Daniel Ricciardo', 1, 2, 3, 4, 21]

In [93]:
labels_final[:len(labels_final)//2]

[0, 1, 0, 2, 1, 0, 1, 0]

In [94]:
print(labels_final[:len(labels_final)//2]) # indices correspond to labels, eg A1, A2, A1, B1, .
print(labels_final[len(labels_final)//2:])
print(weights_final)

[0, 1, 0, 2, 1, 0, 1, 0]
[3, 3, 4, 4, 4, 5, 6, 7]
[7, 6, 2, 1, 1, 1, 1, 2]


In [95]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = labels_uni_line
    ),
    link = dict(
      source = labels_final[:len(labels_final)//2], # indices correspond to labels, eg A1, A2, A1, B1, ...
      target = labels_final[len(labels_final)//2:],
      value = weights_final
  ))])

fig.update_layout(title_text="Placings of driver " + " in "+str(year)+\
  " starting at position "+str(pstn), font_size=10)
fig.show()

#### Sankey in Dash

In [53]:
from dash import Dash, dcc, html, Input, Output
import plotly.graph_objects as go
import json, urllib

app = Dash(__name__)

app.layout = html.Div([
    html.H4('Supply chain of the energy production'),
    dcc.Graph(id="graph"),
    html.P("Opacity"),
    dcc.Slider(id='slider', min=0, max=1, 
               value=0.5, step=0.1)
])

@app.callback(
    Output("graph", "figure"), 
    Input("slider", "value"))
def display_sankey(opacity):
    url = 'https://raw.githubusercontent.com/plotly/plotly.js/master/test/image/mocks/sankey_energy.json'
    response = urllib.request.urlopen(url)
    data = json.loads(response.read()) # replace with your own data source

    node = data['data'][0]['node']
    node['color'] = [
        f'rgba(255,0,255,{opacity})' 
        if c == "magenta" else c.replace('0.8', str(opacity)) 
        for c in node['color']]

    link = data['data'][0]['link']
    link['color'] = [
        node['color'][src] for src in link['source']]

    fig = go.Figure(go.Sankey(link=link, node=node))
    fig.update_layout(font_size=10)
    return fig

app.run_server(debug=True)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


SystemExit: 1


To exit: use 'exit', 'quit', or Ctrl-D.



### Tab code

In [None]:
from dash import Dash, dcc, html

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div([
    dcc.Tabs([
        dcc.Tab(label='Tab one', children=[
            dcc.Graph(
                figure={
                    'data': [
                        {'x': [1, 2, 3], 'y': [4, 1, 2],
                            'type': 'bar', 'name': 'SF'},
                        {'x': [1, 2, 3], 'y': [2, 4, 5],
                         'type': 'bar', 'name': u'Montréal'},
                    ]
                }
            )
        ]),
        dcc.Tab(label='Tab two', children=[
            dcc.Graph(
                figure={
                    'data': [
                        {'x': [1, 2, 3], 'y': [1, 4, 1],
                            'type': 'bar', 'name': 'SF'},
                        {'x': [1, 2, 3], 'y': [1, 2, 3],
                         'type': 'bar', 'name': u'Montréal'},
                    ]
                }
            )
        ]),
        dcc.Tab(label='Tab three', children=[
            dcc.Graph(
                figure={
                    'data': [
                        {'x': [1, 2, 3], 'y': [2, 4, 3],
                            'type': 'bar', 'name': 'SF'},
                        {'x': [1, 2, 3], 'y': [5, 4, 3],
                         'type': 'bar', 'name': u'Montréal'},
                    ]
                }
            )
        ]),
    ])
])

if __name__ == '__main__':
    app.run_server(debug=True)