In [3]:
# Installing Dash

pip install dash

SyntaxError: invalid syntax (1207338341.py, line 3)

In [4]:
# Importing all the necessary libraries for this project 

from dash import Dash, html, dcc, callback, Output, Input
import plotly.express as px
import pandas as pd
import numpy as np
from dash import jupyter_dash
jupyter_dash.default_mode="external"

In [5]:
# Creating a new dataframe from the the csv file stored in the GitHub repository

df = pd.read_csv('https://raw.githubusercontent.com/Nikhil-1421/Visualization-Interactive-Dashboard/main/output.csv?token=GHSAT0AAAAAACNIINFAZNULWRKMW4OAHZXEZNSXDDQ')

In [6]:
# Getting a glimpse at the dataframe 

df.head()

Unnamed: 0,position,q1,q2,q3,year,Grand Prix
0,1,1:26.572,1:25.187,1:26.714,2008,Australian Grand Prix
1,4,1:35.392,1:34.627,1:36.709,2008,Malaysian Grand Prix
2,3,1:32.750,1:31.922,1:33.292,2008,Bahrain Grand Prix
3,5,1:21.366,1:20.825,1:22.096,2008,Spanish Grand Prix
4,3,1:26.192,1:26.477,1:27.923,2008,Turkish Grand Prix


In [7]:
# Checking what kinds of data types are within each column 

df.dtypes

position       int64
q1            object
q2            object
q3            object
year           int64
Grand Prix    object
dtype: object

In [8]:
# Replacing any missing values (stored as '\N') with 0:00.000 format

NTC = ['q1','q2','q3']
df[NTC] = df[NTC].replace(r'\N','0:00.000')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
df[NTC]

Unnamed: 0,q1,q2,q3
0,1:26.572,1:25.187,1:26.714
1,1:35.392,1:34.627,1:36.709
2,1:32.750,1:31.922,1:33.292
3,1:21.366,1:20.825,1:22.096
4,1:26.192,1:26.477,1:27.923
5,1:15.582,1:15.322,1:15.839
6,1:16.909,1:17.034,1:17.886
7,1:15.634,1:15.293,1:16.693
8,1:20.288,1:19.537,1:21.835
9,1:15.218,1:14.603,1:15.666


In [9]:
# Converting the 'year' column to numeric type and then sorting the dataframe to display by year

df['year'] = pd.to_numeric(df['year'])
df = df.sort_values(by='year')

In [10]:
# Writing a function that will take our object type lap time values and store them as float values with 3 decimal places, this will help to plot the lap times per qualifying session

def convert_time_columns_to_float(df, time_columns):
    for col in time_columns:
        df[col] = df[col].apply(lambda x: float(x.split(':')[0])*60 + float(x.split(':')[1]) + float('0.' + x.split('.')[1]) if pd.notnull(x) else x)
    return df

In [11]:
# Running the function and viewing the output  

convert_time_columns_to_float(df,NTC)

Unnamed: 0,position,q1,q2,q3,year,Grand Prix
21,4,81.24,81.426,82.57,2007,Spanish Grand Prix
20,2,93.16,92.504,93.87,2007,Bahrain Grand Prix
22,2,76.37,75.958,76.81,2007,Monaco Grand Prix
23,1,77.152,75.972,76.414,2007,Canadian Grand Prix
24,1,73.126,72.13,72.662,2007,United States Grand Prix
25,2,75.61,75.59,75.208,2007,French Grand Prix
26,1,80.77,79.8,80.994,2007,British Grand Prix
19,4,95.056,95.3,96.09,2007,Malaysian Grand Prix
27,10,92.174,91.37,94.666,2007,European Grand Prix
29,2,88.026,87.872,87.746,2007,Turkish Grand Prix


In [12]:
# Creatinga function that will output the laptimes in the standard minutes:seconds.milliseconds format

def time2(time_in_seconds):
        time_in_seconds = float(time_in_seconds)
        minutes = int(time_in_seconds // 60)
        seconds = int(time_in_seconds % 60)
        milliseconds = int((time_in_seconds - int(time_in_seconds)) * 1000)
        formatted_time = f'{minutes:02d}:{seconds:02d}.{milliseconds:03d}'     
        return formatted_time

In [13]:
# Creating the dashboard 
app = Dash(__name__)

#Using a function to serve_layout so that the dashboard will continuously update 
def serve_layout():
    return html.Div([
        html.H1(children='Lewis Hamilton Qualifying Performances',
               style={
                   'textAlign':'center'
               }),
        html.H3('Grand Prix',
               style={
                   'textAlign':'center'
               }),
        dcc.Dropdown(options=[
            {'label':gp, 'value': gp} for gp in df['Grand Prix'].unique()
        ],
                    value=df['Grand Prix'].unique()[0],
                    id='gpd'
                    ),
        html.H3('Year',
               style={
                   'textAlign':'center'
               }),
        dcc.Slider(
        min=df['year'].min(),
        max=df['year'].max(),
        step=1,
        value=df['year'].min(),
        id='year-slider',
        marks={int(year): {'label': str(year)} for year in df['year'].unique()}
        ),
        html.Div(id='scatter-plot'),
        html.H3(id='position-info',
                style={
                    'textAlign':'center'
                })
    ])

# Creating the callback that will update the scatter plot as different dropdown items and slider items are selected
@app.callback([
    Output('scatter-plot','children'),
    [Input('gpd','value'),
    Input('year-slider','value')]
])

# Writing a function to update the scatter plot based on the selected inputs 
def usp(selected_gp,selected_yr):
    fdf=df[(df['Grand Prix'] == selected_gp) & (df['year'] == selected_yr)]
    mfdf=pd.melt(fdf, id_vars=['Grand Prix','year'], value_vars=['q1','q2','q3'],
                var_name='Lap Type', value_name='Lap Time')

    fig=px.scatter(mfdf,
                  x='Lap Type',
                  y='Lap Time',
                  labels={
                      'Lap Type':'Qualifying Session',
                      'Lap Time':'Lap Time in Seconds'
                  },
                  hover_data={
                      'Lap Type': False,
                      'Lap Time': False
                  })
    
    fig.update_xaxes(tickvals=['q1', 'q2', 'q3'], ticktext=['Q1', 'Q2', 'Q3'])

    mfdf['Formatted Time'] = mfdf['Lap Time'].apply(time2)
    
    fig.update_traces(text=mfdf['Formatted Time'], mode='markers+text', textposition='top center')
    
    if mfdf.empty:
        return [html.H1('Looks like Lewis wasnt here!', style={'textAlign':'center'})]
    else:
        return [dcc.Graph(figure=fig)]
    
# Creating the callback that will output the result of the displayed scatter plot 
@app.callback([
    Output('position-info','children'),
    [Input('gpd','value'),
    Input('year-slider','value')]
])

# Writing a function that will update based on the finishing position, and will update based on whether or not a qualifying session was achieved by Lewis
def uppos(selected_gp,selected_yr):
    if df[(df['Grand Prix'] == selected_gp) & (df['year'] == selected_yr)].empty:
        return [f'Either the {selected_gp} did not take place in {selected_yr} or Lewis did not participate']
    
    else:
        position = df[(df['Grand Prix'] == selected_gp) & (df['year'] == selected_yr)]['position'].values[0]
        q1p = [16,17,18,19,20]
        q2p = [11,12,13,14,15]
        q3p = [1,2,3,4,5,6,7,8,9,10]
        if position in q1p:
            return [f'Lewis failed to reach Q2, Qualifying P{position} at the {selected_yr} {selected_gp}']
        else:
            if position in q2p:
                return [f'Lewis failed to reach Q3, Qualifying P{position} at the {selected_yr} {selected_gp}']
            else:
                if position in q3p:
                    return [f'Qualified P{position} at the {selected_yr} {selected_gp}']
                
# Using the serve_layout from before to initialize the app         
app.layout = serve_layout

if __name__ == '__main__':
    app.run(debug=True, port=8060)

Dash app running on http://127.0.0.1:8060/
