# Imports

In [94]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
import random
from matplotlib.colors import to_rgba
pio.renderers

Renderers configuration
-----------------------
    Default renderer: 'vscode'
    Available renderers:
        ['plotly_mimetype', 'jupyterlab', 'nteract', 'vscode',
         'notebook', 'notebook_connected', 'kaggle', 'azure', 'colab',
         'cocalc', 'databricks', 'json', 'png', 'jpeg', 'jpg', 'svg',
         'pdf', 'browser', 'firefox', 'chrome', 'chromium', 'iframe',
         'iframe_connected', 'sphinx_gallery', 'sphinx_gallery_png']

# Load the data

In [95]:
circuits = pd.read_csv(r'./datasets/circuits.csv')
constructor_results = pd.read_csv(r'./datasets/constructor_results.csv')
constructor_standings = pd.read_csv(r'./datasets/constructor_standings.csv')
constructors = pd.read_csv(r'./datasets/constructors.csv')
driver_standings = pd.read_csv(r'./datasets/driver_standings.csv')
drivers = pd.read_csv(r'./datasets/drivers.csv')
lap_times = pd.read_csv(r'./datasets/lap_times.csv')
pit_stops = pd.read_csv(r'./datasets/pit_stops.csv')
qualifying = pd.read_csv(r'./datasets/qualifying.csv')
races = pd.read_csv(r'./datasets/races.csv')
results = pd.read_csv(r'./datasets/results.csv')
seasons = pd.read_csv(r'./datasets/seasons.csv')
sprint_results = pd.read_csv(r'./datasets/sprint_results.csv')
status = pd.read_csv(r'./datasets/status.csv')

# Data

In [96]:
circuits.head()
# constructor_results.head()
# constructor_standings.head()
# constructors.head()
# driver_standings.head()
# drivers.head()
# lap_times.head()
# pit_stops.head()
# qualifying.head()
# races.head()
# results.head()
# seasons.head()
# sprint_results.head()
# status.head()

Unnamed: 0,circuitId,circuitRef,name,location,country,lat,lng,alt,url
0,1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...
1,2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738,18,http://en.wikipedia.org/wiki/Sepang_Internatio...
2,3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7,http://en.wikipedia.org/wiki/Bahrain_Internati...
3,4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109,http://en.wikipedia.org/wiki/Circuit_de_Barcel...
4,5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517,29.405,130,http://en.wikipedia.org/wiki/Istanbul_Park


A dataset **qualifying** tem missing values mas fazem sentido (q2 e q3, pois nem todos os drivers vão a estas fases).   
Nenhuma das outras datasets contém missing values.

In [97]:
circuits['country'].replace('United States', 'USA', inplace=True)

# Visualizations

## Position of the driver in each lap of a race

Exemplo de uma corrida ao calhas:

In [98]:
races.loc[(races['year']==2009) & (races['circuitId']==1)]

Unnamed: 0,raceId,year,round,circuitId,name,date,time,url
0,1,2009,1,1,Australian Grand Prix,3/29/2009,06:00:00,http://en.wikipedia.org/wiki/2009_Australian_G...


In [99]:
# choosing the race
race = races.loc[(races['year']==2009) & (races['circuitId']==1)]['raceId'].values[0]


#dataframe of lap_times for that race
pos_per_lap = lap_times[lap_times['raceId']==race]  


driver_ids = np.unique(pos_per_lap['driverId'].values).tolist() #list of the ids of the drivers in that race
driver_names = [drivers.loc[drivers['driverId']==name]['surname'].values[0] for name in driver_ids] #names of the drivers
driver_dict = {driver_ids[i]: driver_names[i] for i in range(len(driver_ids))} #dictionary with ids and names


data_ppl = [dict(type='scatter',
             x=pos_per_lap[pos_per_lap['driverId']==driver]['lap'],
             y=pos_per_lap[pos_per_lap['driverId']==driver]['position'],
             name=name)
                            for driver, name in driver_dict.items()]

layout_ppl = dict(title=dict(
                        text='Position of the drivers in each lap'
                  ),
                  xaxis=dict(title='Laps'),
                  yaxis=dict(title='Positions'))

In [100]:
fig_ppl = go.Figure(data=data_ppl, layout=layout_ppl)

fig_ppl.show()

### PROBLEMA: Range das cores!!
E porque raio há um com linha e pontos????

## Sankey 

### Data preprocessing for Sankey

In [101]:
year = 2021

In [102]:
results['position'].replace(to_replace='\\N',value='DNF',inplace=True)
# results['grid'].replace(to_replace=0,value='Pit',inplace=True)

In [103]:
results_driver = results.merge(drivers[['driverId','driverRef','forename','surname','code','dob','nationality']],left_on='driverId',right_on='driverId')

In [104]:
results_driver.head()

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,...,rank,fastestLapTime,fastestLapSpeed,statusId,driverRef,forename,surname,code,dob,nationality
0,1,18,1,1,22,1,1,1,1,10.0,...,2,1:27.452,218.3,1,hamilton,Lewis,Hamilton,HAM,1985-01-07,British
1,27,19,1,1,22,9,5,5,5,4.0,...,3,1:35.462,209.033,1,hamilton,Lewis,Hamilton,HAM,1985-01-07,British
2,57,20,1,1,22,3,13,13,13,0.0,...,19,1:35.520,203.969,11,hamilton,Lewis,Hamilton,HAM,1985-01-07,British
3,69,21,1,1,22,5,3,3,3,6.0,...,3,1:22.017,204.323,1,hamilton,Lewis,Hamilton,HAM,1985-01-07,British
4,90,22,1,1,22,3,2,2,2,8.0,...,2,1:26.529,222.085,1,hamilton,Lewis,Hamilton,HAM,1985-01-07,British


In [105]:
results_driver.insert(len(results_driver.columns),'driverName',results_driver['forename']+' '+results_driver['surname'])
results_driver.drop(columns=['forename','surname'],axis=1,inplace=True)

In [106]:
# results_driver['position']=results_driver['position'].astype(int)

In [107]:
#races_year = races[races['year']==year]

#### Year

In [108]:
results_year = results_driver[results_driver['raceId']\
    .apply(lambda x: np.intersect1d(x,\
         races[races['year']==year]['raceId']).size > 0)]

In [109]:
results_year.sample(5)

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,...,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,driverRef,code,dob,nationality,driverName
23428,25344,1071,817,1,3,11,DNF,R,19,0.0,...,40,17,1:14.443,208.379,75,ricciardo,RIC,1989-07-01,Australian,Daniel Ricciardo
23419,25150,1061,817,1,3,6,5,5,5,10.0,...,51,8,1:31.284,232.325,1,ricciardo,RIC,1989-07-01,Australian,Daniel Ricciardo
25382,25143,1060,854,210,47,19,18,18,18,0.0,...,56,18,1:09.394,224.007,12,mick_schumacher,MSC,1999-03-22,German,Mick Schumacher
1533,25175,1062,8,51,7,13,10,10,10,1.0,...,58,8,1:21.518,193.473,11,raikkonen,RAI,1979-10-17,Finnish,Kimi Räikkönen
23220,25400,1073,815,9,11,4,15,15,15,0.0,...,51,2,1:26.419,219.993,5,perez,PER,1990-01-26,Mexican,Sergio Pérez


In [110]:
results_y = results_year[['driverId',\
    'driverName','raceId','grid','position','code','dob','nationality']]

In [111]:
results_y_abr = results_y[['position','grid','driverName']].sort_values(['position','grid','driverName'])

A grid position value of '0' indicates the driver started from the pit lane.

In [112]:
results_y_abr.head()

Unnamed: 0,position,grid,driverName
269,1,1,Lewis Hamilton
285,1,1,Lewis Hamilton
286,1,1,Lewis Hamilton
24514,1,1,Max Verstappen
24515,1,1,Max Verstappen


In [113]:
results_y_abr['driverGrid'] = \
results_y_abr['driverName'] + ' - ' + results_y_abr['grid'].astype(str)

In [114]:
results_y_abr.drop(columns=['grid','driverName'],inplace=True)

In [115]:
d_g_list = []

curr_d_g = list(results_y_abr.iloc[0,:]) + [1]

for i in range(1,len(results_y_abr['position'])):
    if results_y_abr['position'].iloc[i] == results_y_abr['position'].iloc[i-1]:
        if results_y_abr['driverGrid'].iloc[i] == results_y_abr['driverGrid'].iloc[i-1]:
            curr_d_g[2] += 1
        else:
            d_g_list.append(curr_d_g)
            curr_d_g = list(results_y_abr.iloc[i,:]) + [1]
    else:
        d_g_list.append(curr_d_g)
        curr_d_g = list(results_y_abr.iloc[i,:]) + [1]

d_g_list.append(curr_d_g)

In [116]:
d_g_list[:20]

[['1', 'Lewis Hamilton - 1', 3],
 ['1', 'Max Verstappen - 1', 7],
 ['1', 'Valtteri Bottas - 1', 1],
 ['1', 'Daniel Ricciardo - 2', 1],
 ['1', 'Lewis Hamilton - 2', 3],
 ['1', 'Max Verstappen - 2', 1],
 ['1', 'Max Verstappen - 3', 2],
 ['1', 'Lewis Hamilton - 4', 1],
 ['1', 'Sergio Pérez - 6', 1],
 ['1', 'Esteban Ocon - 8', 1],
 ['1', 'Lewis Hamilton - 10', 1],
 ['10', 'George Russell - 3', 1],
 ['10', 'Lando Norris - 5', 1],
 ['10', 'Charles Leclerc - 7', 1],
 ['10', 'Lando Norris - 7', 1],
 ['10', 'Pierre Gasly - 9', 1],
 ['10', 'Antonio Giovinazzi - 10', 1],
 ['10', 'Lance Stroll - 10', 1],
 ['10', 'Sebastian Vettel - 10', 1],
 ['10', 'Carlos Sainz - 11', 1]]

In [117]:
d_g_frame = pd.DataFrame(d_g_list,columns=['position','name','weight'])\
    .sort_values(['position','weight'],ascending=[True,False])

In [118]:
labels_line = list(d_g_frame['name'])+list(d_g_frame['position'])
labels_uni_line = list(dict.fromkeys(labels_line))
labels_final = [labels_uni_line.index(x) for x in labels_line]
weights_final = list(d_g_frame['weight'])

In [119]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = labels_uni_line
    ),
    link = dict(
      source = labels_final[:len(labels_final)//2], # indices correspond to labels, eg A1, A2, A1, B1, ...
      target = labels_final[len(labels_final)//2:],
      value = weights_final
  ))])

fig.update_layout(title_text="Placings in "+str(year), font_size=10)
fig.show()

In [120]:
races.head()

Unnamed: 0,raceId,year,round,circuitId,name,date,time,url
0,1,2009,1,1,Australian Grand Prix,3/29/2009,06:00:00,http://en.wikipedia.org/wiki/2009_Australian_G...
1,2,2009,2,2,Malaysian Grand Prix,4/5/2009,09:00:00,http://en.wikipedia.org/wiki/2009_Malaysian_Gr...
2,3,2009,3,17,Chinese Grand Prix,4/19/2009,07:00:00,http://en.wikipedia.org/wiki/2009_Chinese_Gran...
3,4,2009,4,3,Bahrain Grand Prix,4/26/2009,12:00:00,http://en.wikipedia.org/wiki/2009_Bahrain_Gran...
4,5,2009,5,4,Spanish Grand Prix,5/10/2009,12:00:00,http://en.wikipedia.org/wiki/2009_Spanish_Gran...


### Year and Driver


In [121]:
year_drivers = races[['raceId', 'year']].merge(driver_standings[['raceId', 'driverId' ]], on = 'raceId')
year_drivers = year_drivers.merge(drivers[['driverId', 'forename', 'surname']], on='driverId')
year_drivers = year_drivers.merge(results[['raceId', 'driverId','grid']], on=['raceId','driverId'])

years_drivers_list = list(set(np.array(year_drivers['year'])))


In [122]:
races

Unnamed: 0,raceId,year,round,circuitId,name,date,time,url
0,1,2009,1,1,Australian Grand Prix,3/29/2009,06:00:00,http://en.wikipedia.org/wiki/2009_Australian_G...
1,2,2009,2,2,Malaysian Grand Prix,4/5/2009,09:00:00,http://en.wikipedia.org/wiki/2009_Malaysian_Gr...
2,3,2009,3,17,Chinese Grand Prix,4/19/2009,07:00:00,http://en.wikipedia.org/wiki/2009_Chinese_Gran...
3,4,2009,4,3,Bahrain Grand Prix,4/26/2009,12:00:00,http://en.wikipedia.org/wiki/2009_Bahrain_Gran...
4,5,2009,5,4,Spanish Grand Prix,5/10/2009,12:00:00,http://en.wikipedia.org/wiki/2009_Spanish_Gran...
...,...,...,...,...,...,...,...,...
1074,1092,2022,18,22,Japanese Grand Prix,10/9/2022,05:00:00,http://en.wikipedia.org/wiki/2022_Japanese_Gra...
1075,1093,2022,19,69,United States Grand Prix,10/23/2022,19:00:00,http://en.wikipedia.org/wiki/2022_United_State...
1076,1094,2022,20,32,Mexico City Grand Prix,10/30/2022,20:00:00,http://en.wikipedia.org/wiki/2022_Mexican_Gran...
1077,1095,2022,21,18,Brazilian Grand Prix,11/13/2022,18:00:00,http://en.wikipedia.org/wiki/2022_Brazilian_Gr...


In [123]:
year_drivers

Unnamed: 0,raceId,year,driverId,forename,surname,grid
0,1,2009,8,Kimi,Räikkönen,7
1,2,2009,8,Kimi,Räikkönen,7
2,3,2009,8,Kimi,Räikkönen,8
3,4,2009,8,Kimi,Räikkönen,10
4,5,2009,8,Kimi,Räikkönen,16
...,...,...,...,...,...,...
24946,1070,2021,852,Yuki,Tsunoda,17
24947,1071,2021,852,Yuki,Tsunoda,15
24948,1072,2021,852,Yuki,Tsunoda,8
24949,1073,2021,852,Yuki,Tsunoda,8


In [124]:
drivID = year_drivers[year_drivers['year']==year]['driverId'].sample().iloc[0]

In [125]:
year_drivers[year_drivers['driverId']==drivID]

Unnamed: 0,raceId,year,driverId,forename,surname,grid
22753,849,2011,817,Daniel,Ricciardo,24
22754,850,2011,817,Daniel,Ricciardo,22
22755,851,2011,817,Daniel,Ricciardo,22
22756,852,2011,817,Daniel,Ricciardo,23
22757,853,2011,817,Daniel,Ricciardo,23
...,...,...,...,...,...,...
22959,1069,2021,817,Daniel,Ricciardo,6
22960,1070,2021,817,Daniel,Ricciardo,7
22961,1071,2021,817,Daniel,Ricciardo,11
22962,1072,2021,817,Daniel,Ricciardo,11


In [126]:
results_year_driver = results_year[results_year['driverId']==drivID]

In [127]:
drivName = results_year_driver['driverName'].iloc[0]

In [128]:
drivName

'Daniel Ricciardo'

In [129]:
results_year_driver

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,...,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,driverRef,code,dob,nationality,driverName
23410,24972,1052,817,1,3,6,7,7,7,6.0,...,36,10,1:34.932,205.233,1,ricciardo,RIC,1989-07-01,Australian,Daniel Ricciardo
23411,24991,1053,817,1,3,6,6,6,6,8.0,...,54,12,1:19.341,222.739,1,ricciardo,RIC,1989-07-01,Australian,Daniel Ricciardo
23412,25014,1054,817,1,3,16,9,9,9,2.0,...,64,10,1:21.987,204.310,1,ricciardo,RIC,1989-07-01,Australian,Daniel Ricciardo
23413,25031,1055,817,1,3,7,6,6,6,8.0,...,60,10,1:21.853,205.612,1,ricciardo,RIC,1989-07-01,Australian,Daniel Ricciardo
23414,25057,1056,817,1,3,12,12,12,12,0.0,...,43,4,1:14.578,161.082,11,ricciardo,RIC,1989-07-01,Australian,Daniel Ricciardo
23415,25074,1057,817,1,3,13,9,9,9,2.0,...,43,14,1:45.713,204.428,1,ricciardo,RIC,1989-07-01,Australian,Daniel Ricciardo
23416,25091,1059,817,1,3,10,6,6,6,8.0,...,45,14,1:38.324,213.896,1,ricciardo,RIC,1989-07-01,Australian,Daniel Ricciardo
23417,25118,1058,817,1,3,13,13,13,13,0.0,...,45,9,1:09.305,224.295,11,ricciardo,RIC,1989-07-01,Australian,Daniel Ricciardo
23418,25132,1060,817,1,3,13,7,7,7,6.0,...,56,14,1:08.820,225.876,1,ricciardo,RIC,1989-07-01,Australian,Daniel Ricciardo
23419,25150,1061,817,1,3,6,5,5,5,10.0,...,51,8,1:31.284,232.325,1,ricciardo,RIC,1989-07-01,Australian,Daniel Ricciardo


In [130]:
results_yd = results_year_driver[['driverId',\
    'driverName','raceId','grid','position']]

In [131]:
results_yd.head()

Unnamed: 0,driverId,driverName,raceId,grid,position
23410,817,Daniel Ricciardo,1052,6,7
23411,817,Daniel Ricciardo,1053,6,6
23412,817,Daniel Ricciardo,1054,16,9
23413,817,Daniel Ricciardo,1055,7,6
23414,817,Daniel Ricciardo,1056,12,12


In [132]:
results_yd_abr = results_yd[['position','grid']].sort_values(['position','grid'])

In [133]:
results_yd_abr.head()#['position']#.iloc[11]

Unnamed: 0,position,grid
23423,1,2
23422,11,10
23420,11,11
23427,12,7
23431,12,10


In [134]:
list(results_yd_abr.iloc[0,:])+[1]

['1', 2, 1]

In [135]:
posit_list = []

curr_posit = list(results_yd_abr.iloc[0,:]) + [1]

for i in range(1,len(results_yd_abr['position'])):
    if results_yd_abr['position'].iloc[i] == results_yd_abr['position'].iloc[i-1]:
        if results_yd_abr['grid'].iloc[i] == results_yd_abr['grid'].iloc[i-1]:
            curr_posit[2] += 1
        else:
            posit_list.append(curr_posit)
            curr_posit = list(results_yd_abr.iloc[i,:]) + [1]
    else:
        posit_list.append(curr_posit)
        curr_posit = list(results_yd_abr.iloc[i,:]) + [1]

posit_list.append(curr_posit)


In [136]:
curr_posit

['DNF', 11, 1]

In [137]:
posit_frame = pd.DataFrame(posit_list,columns=['position','grid','weight'])\
    .sort_values(['position','weight'],ascending=[True,False])

In [138]:
labels_line = list(posit_frame['grid'])+list(posit_frame['position'])
labels_uni_line = list(dict.fromkeys(labels_line))
labels_final = [labels_uni_line.index(x) for x in labels_line]
weights_final = list(posit_frame['weight'])

In [139]:
labels_uni_line

[2,
 10,
 11,
 7,
 12,
 14,
 13,
 20,
 4,
 5,
 6,
 16,
 '1',
 '11',
 '12',
 '13',
 '4',
 '5',
 '6',
 '7',
 '9',
 'DNF']

In [140]:
labels_final[:len(labels_final)//2]

[0, 1, 2, 3, 1, 4, 5, 6, 7, 8, 9, 10, 2, 10, 3, 1, 10, 6, 6, 11, 2]

In [141]:
print(labels_final[:len(labels_final)//2]) # indices correspond to labels, eg A1, A2, A1, B1, .
print(labels_final[len(labels_final)//2:])
print(weights_final)

[0, 1, 2, 3, 1, 4, 5, 6, 7, 8, 9, 10, 2, 10, 3, 1, 10, 6, 6, 11, 2]
[12, 13, 13, 14, 14, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 18, 19, 19, 20, 20, 21]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [142]:
colorstr = 'aliceblue, antiquewhite, aqua, aquamarine, azure, beige, bisque, black, blanchedalmond, blue, blueviolet, brown, burlywood, cadetblue, chartreuse, chocolate, coral, cornflowerblue, cornsilk, crimson, cyan, darkblue, darkcyan, darkgoldenrod, darkgray, darkgrey, darkgreen, darkkhaki, darkmagenta, darkolivegreen, darkorange, darkorchid, darkred, darksalmon, darkseagreen, darkslateblue, darkslategray, darkslategrey, darkturquoise, darkviolet, deeppink, deepskyblue, dimgray, dimgrey, dodgerblue, firebrick, floralwhite, forestgreen, fuchsia, gainsboro, ghostwhite, gold, goldenrod, gray, grey, green, greenyellow, honeydew, indianred, indigo, ivory, lavender, lavenderblush, lemonchiffon, lightblue, lightcoral, lightcyan, lightgoldenrodyellow, lightgray, lightgrey, lightgreen, lightpink, lightsalmon, lightseagreen, lightskyblue, lightslategray, lightslategrey, lightsteelblue, lightyellow, lime, limegreen, linen, magenta, maroon, mediumaquamarine, mediumblue, mediumorchid, mediumpurple, mediumseagreen, mediumslateblue, mediumspringgreen, mediumturquoise, mediumvioletred, midnightblue, mintcream, mistyrose, moccasin, navy, oldlace, olive, olivedrab, orange, orangered, orchid, palegoldenrod, palegreen, paleturquoise, palevioletred, papayawhip, peachpuff, peru, pink, plum, powderblue, purple, red, rosybrown, royalblue, rebeccapurple, saddlebrown, salmon, sandybrown, seagreen, seashell, sienna, silver, skyblue, slateblue, slategray, slategrey, snow, springgreen, steelblue, tan, teal, thistle, tomato, turquoise, violet, wheat, white, whitesmoke, yellow, yellowgreen'

In [143]:
colorbiglist = colorstr.split(', ')

In [144]:


random.seed(47)

colorlist=random.sample(colorbiglist,k=len(labels_uni_line))

In [145]:
colorlist

['mediumspringgreen',
 'coral',
 'peru',
 'rosybrown',
 'mediumpurple',
 'lightcoral',
 'springgreen',
 'oldlace',
 'olivedrab',
 'blueviolet',
 'palevioletred',
 'bisque',
 'darkgreen',
 'aliceblue',
 'lightgoldenrodyellow',
 'seashell',
 'aqua',
 'royalblue',
 'palegoldenrod',
 'red',
 'deeppink',
 'lightsalmon']

In [146]:
# override gray link colors with 'source' colors
opacity = 0.2

colorlinks = [colorlist[src].replace("0.8", str(opacity))
                                    for src in labels_final[:len(labels_final)//2]]

In [199]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = labels_uni_line
    ),
    link = dict(
      source = labels_final[:len(labels_final)//2], # indices correspond to labels, eg A1, A2, A1, B1, ...
      target = labels_final[len(labels_final)//2:],
      value = weights_final
  ))])

fig.update_layout(title_text="Starting positions and placings of driver "\
   + drivName + " in "+str(year), font_size=10)
fig.show()

#### Year, driver and start

In [148]:
pstn = year_drivers[(year_drivers['year']==year) & \
    (year_drivers['driverId']==drivID)]['grid'].sample().iloc[0]

In [149]:
results_year_driver_pstn = results_year_driver[results_year_driver['grid']==pstn]

In [150]:
results_year_driver_pstn

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,...,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,driverRef,code,dob,nationality,driverName
23415,25074,1057,817,1,3,13,9,9,9,2.0,...,43,14,1:45.713,204.428,1,ricciardo,RIC,1989-07-01,Australian,Daniel Ricciardo
23417,25118,1058,817,1,3,13,13,13,13,0.0,...,45,9,1:09.305,224.295,11,ricciardo,RIC,1989-07-01,Australian,Daniel Ricciardo
23418,25132,1060,817,1,3,13,7,7,7,6.0,...,56,14,1:08.820,225.876,1,ricciardo,RIC,1989-07-01,Australian,Daniel Ricciardo


In [190]:
results_ydp = results_year_driver_pstn[['driverId',\
    'driverName','raceId','grid','position']]
results_ydp_abr = results_ydp['position'].sort_values()

In [191]:
results_ydp_abr.iloc[0]

'13'

In [188]:
all_list = []

curr_end_p = [results_ydp_abr.iloc[0], 1]

for i in range(1,len(results_ydp_abr)):
    if results_ydp_abr.iloc[i] == results_ydp_abr.iloc[i-1]:
        curr_end_p[1] += 1
    else:
        all_list.append(curr_end_p)
        curr_end_p = [results_ydp_abr.iloc[i], 1]


all_list.append(curr_end_p)


In [193]:
all_list

[['13', 1], ['7', 1], ['9', 1]]

In [194]:
all_frame = pd.DataFrame(all_list,columns=['position','weight'])\
    .sort_values(['position','weight'],ascending=[True,False])

In [None]:
labels_line = list(all_frame['position'])
labels_uni_line = list(dict.fromkeys(labels_line))
labels_final = [labels_uni_line.index(x) for x in labels_line]
weights_final = list(all_frame['weight'])

In [197]:
labels_final

[0,
 1,
 2,
 3,
 1,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 2,
 10,
 3,
 1,
 10,
 6,
 6,
 11,
 2,
 12,
 13,
 13,
 14,
 14,
 14,
 14,
 15,
 15,
 16,
 16,
 17,
 17,
 18,
 18,
 18,
 19,
 19,
 20,
 20,
 21]

In [198]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = labels_uni_line
    ),
    link = dict(
      source = len(labels_final)*[drivID], # indices correspond to labels, eg A1, A2, A1, B1, ...
      target = labels_final,
      value = weights_final
  ))])

fig.update_layout(title_text="Placings in "+str(year)+\
  "of driver " + str(data)
  " starting at position "+str(pstn), font_size=10)
fig.show()

#### Year and position

In [None]:
results_year_pstn = results_year[results_year['grid']==pstn]

In [None]:
results_yp = results_year_pstn[['driverId',\
    'driverName','raceId','grid','position','code','dob','nationality']]

In [None]:
results_yp_abr = results_yp[['position','driverName']].sort_values(['position','driverName'])

In [None]:
driver_list = []

curr_driver = list(results_yp_abr.iloc[0,:]) + [1]

for i in range(1,len(results_yp_abr['position'])):
    if results_yp_abr['position'].iloc[i] == results_yp_abr['position'].iloc[i-1]:
        if results_yp_abr['driverName'].iloc[i] == results_yp_abr['driverName'].iloc[i-1]:
            curr_driver[2] += 1
        else:
            driver_list.append(curr_driver)
            curr_driver = list(results_yp_abr.iloc[i,:]) + [1]
    else:
        driver_list.append(curr_driver)
        curr_driver = list(results_yp_abr.iloc[i,:]) + [1]

driver_list.append(curr_driver)

In [None]:
driver_list

[['12', 'Kimi Räikkönen', 1],
 ['13', 'Antonio Giovinazzi', 1],
 ['13', 'Yuki Tsunoda', 1],
 ['15', 'Sebastian Vettel', 1],
 ['16', 'Nicholas Latifi', 1],
 ['16', 'Sergio Pérez', 1],
 ['18', 'Kimi Räikkönen', 1],
 ['19', 'Sergio Pérez', 1],
 ['5', 'Sergio Pérez', 1],
 ['8', 'Sergio Pérez', 1],
 ['DNF', 'Pierre Gasly', 1],
 ['DNF', 'Yuki Tsunoda', 1]]

In [None]:
driver_frame = pd.DataFrame(driver_list,columns=['position','name','weight'])\
    .sort_values(['position','weight'],ascending=[True,False])

In [None]:
labels_line = list(driver_frame['name'])+list(driver_frame['position'])
labels_uni_line = list(dict.fromkeys(labels_line))
labels_final = [labels_uni_line.index(x) for x in labels_line]
weights_final = list(driver_frame['weight'])

In [None]:
print(labels_final[:len(labels_final)//2]) # indices correspond to labels, eg A1, A2, A1, B1, .
print(labels_final[len(labels_final)//2:])
print(weights_final)

[0, 1, 2, 3, 4, 5, 0, 5, 5, 5, 6, 2]
[7, 8, 8, 9, 10, 10, 11, 12, 13, 14, 15, 15]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [None]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = labels_uni_line
    ),
    link = dict(
      source = labels_final[:len(labels_final)//2], # indices correspond to labels, eg A1, A2, A1, B1, ...
      target = labels_final[len(labels_final)//2:],
      value = weights_final
  ))])

fig.update_layout(title_text="Placings in "+str(year)+\
  " starting at position "+str(pstn), font_size=10)
fig.show()

#### Year, start and driver

In [None]:
results['grid'].replace(to_replace='Pit',value=0,inplace=True)

In [None]:
np.sort(results['grid'].unique())[1:]

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34],
      dtype=int64)

#### Sankey in Dash

In [None]:
from dash import Dash, dcc, html, Input, Output
import plotly.graph_objects as go
import json, urllib

app = Dash(__name__)

app.layout = html.Div([
    html.H4('Supply chain of the energy production'),
    dcc.Graph(id="graph"),
    html.P("Opacity"),
    dcc.Slider(id='slider', min=0, max=1, 
               value=0.5, step=0.1)
])

@app.callback(
    Output("graph", "figure"), 
    Input("slider", "value"))
def display_sankey(opacity):
    url = 'https://raw.githubusercontent.com/plotly/plotly.js/master/test/image/mocks/sankey_energy.json'
    response = urllib.request.urlopen(url)
    data = json.loads(response.read()) # replace with your own data source

    node = data['data'][0]['node']
    node['color'] = [
        f'rgba(255,0,255,{opacity})' 
        if c == "magenta" else c.replace('0.8', str(opacity)) 
        for c in node['color']]

    link = data['data'][0]['link']
    link['color'] = [
        node['color'][src] for src in link['source']]

    fig = go.Figure(go.Sankey(link=link, node=node))
    fig.update_layout(font_size=10)
    return fig

app.run_server(debug=True)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


SystemExit: 1


To exit: use 'exit', 'quit', or Ctrl-D.



### Tab code

In [None]:
from dash import Dash, dcc, html

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div([
    dcc.Tabs([
        dcc.Tab(label='Tab one', children=[
            dcc.Graph(
                figure={
                    'data': [
                        {'x': [1, 2, 3], 'y': [4, 1, 2],
                            'type': 'bar', 'name': 'SF'},
                        {'x': [1, 2, 3], 'y': [2, 4, 5],
                         'type': 'bar', 'name': u'Montréal'},
                    ]
                }
            )
        ]),
        dcc.Tab(label='Tab two', children=[
            dcc.Graph(
                figure={
                    'data': [
                        {'x': [1, 2, 3], 'y': [1, 4, 1],
                            'type': 'bar', 'name': 'SF'},
                        {'x': [1, 2, 3], 'y': [1, 2, 3],
                         'type': 'bar', 'name': u'Montréal'},
                    ]
                }
            )
        ]),
        dcc.Tab(label='Tab three', children=[
            dcc.Graph(
                figure={
                    'data': [
                        {'x': [1, 2, 3], 'y': [2, 4, 3],
                            'type': 'bar', 'name': 'SF'},
                        {'x': [1, 2, 3], 'y': [5, 4, 3],
                         'type': 'bar', 'name': u'Montréal'},
                    ]
                }
            )
        ]),
    ])
])

if __name__ == '__main__':
    app.run_server(debug=True)