# UK Road Safety Data Set

## Part 2 - Dashboard

## Uploading the data after cleaning and feature engineering

In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df= pd.read_csv('UK Road Safety cleaned data.csv')

In [3]:
df.columns

Index(['1st_Road_Class', '1st_Road_Number', 'Accident_Severity',
       'Carriageway_Hazards', 'Date', 'Day_of_Week',
       'Did_Police_Officer_Attend_Scene_of_Accident', 'Junction_Control',
       'Junction_Detail', 'Latitude', 'Light_Conditions', 'Longitude',
       'LSOA_of_Accident_Location', 'Number_of_Casualties',
       'Number_of_Vehicles', 'Pedestrian_Crossing-Human_Control',
       'Pedestrian_Crossing-Physical_Facilities', 'Road_Surface_Conditions',
       'Road_Type', 'Special_Conditions_at_Site', 'Speed_limit', 'Time',
       'Urban_or_Rural_Area', 'Weather_Conditions', 'Year', 'InScotland'],
      dtype='object')

In [4]:
df.describe()

Unnamed: 0,1st_Road_Number,Did_Police_Officer_Attend_Scene_of_Accident,Latitude,Longitude,Number_of_Casualties,Number_of_Vehicles,Pedestrian_Crossing-Human_Control,Pedestrian_Crossing-Physical_Facilities,Speed_limit,Year
count,1902303.0,1902303.0,1902303.0,1902303.0,1902303.0,1902303.0,1902303.0,1902303.0,1902303.0,1902303.0
mean,1005.277,1.205081,52.31047,-1.23811,1.348534,1.843022,0.008487081,0.7517288,38.63659,2010.553
std,1800.42,0.4103535,1.119097,1.283559,0.8195977,0.7135347,0.1207529,1.838287,14.05026,3.77135
min,0.0,1.0,49.91294,-6.317477,1.0,1.0,0.0,0.0,0.0,2005.0
25%,0.0,1.0,51.46863,-2.130733,1.0,1.0,0.0,0.0,30.0,2007.0
50%,129.0,1.0,52.00587,-1.222729,1.0,2.0,0.0,0.0,30.0,2010.0
75%,676.0,1.0,53.30845,-0.1748175,1.0,2.0,0.0,0.0,50.0,2014.0
max,9999.0,3.0,60.58659,1.76201,93.0,67.0,2.0,8.0,70.0,2017.0


###  Road Accidents in the UK over specific time period.

In [5]:
import plotly.express as px
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc
The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html


In [6]:
# Total accidents by year

In [7]:
# Convert 'Date' and 'Time' columns to datetime format
df["Date"] = pd.to_datetime(df["Date"])
df["Time"] = pd.to_datetime(df["Time"], format="%H:%M:%S")

# Extract year from 'Date' column and create new 'Year' column
df["Year"] = df["Date"].dt.year

# Create charts
accidents_by_year_linegraph = px.line(df.groupby("Year").size().reset_index(name="Total Accidents"), x="Year", y="Total Accidents", title="Total Accidents by Year")

severity_counts_pie = px.pie(df, names="Accident_Severity", title="Accident Severity Counts")

urban_vs_rural_histo = px.histogram(df, x="Urban_or_Rural_Area", color="Accident_Severity", title="Accidents by Urban vs Rural Area")


month_of_year_bargraph = px.bar(df.groupby(df["Date"].dt.strftime("%B")).size().reset_index(name="Count"), x="Date", y="Count", title="Accidents by Month")


# Create pivot table with accidents by month and day of week
df_pivot = pd.pivot_table(df, values='Accident_Severity', index=df['Date'].dt.month_name(), columns=df['Day_of_Week'], aggfunc='count')

# Create pivot table with accidents by hour of day and day of week
df_pivot_hour = pd.pivot_table(df, values='Accident_Severity', index=df['Date'].dt.hour, columns=df['Day_of_Week'], aggfunc='count')

accidents_by_hour_bargraph = px.bar(df_pivot_hour, barmode="group", title="Accidents by Hour of Day and Day of Week")




In [8]:
# Create app layout
app = dash.Dash(__name__)
app.layout = html.Div(children=[
    dcc.RangeSlider(
        id='year-slider',
        min=df['Year'].min(),
        max=df['Year'].max(),
        value=[2005, 2015],
        marks={str(year): str(year) for year in df['Year'].unique()},
        step=None
    ),
    html.Br(),
    html.H1(children="UK Road Accidents Dashboard"),
    dcc.Graph(id="line-chart", figure=accidents_by_year_linegraph),
    dcc.Graph(id="pie-chart", figure=severity_counts_pie),
    dcc.Graph(id="histogram", figure=urban_vs_rural_histo),
    dcc.Graph(id="month-of-year-bargraph", figure=month_of_year_bargraph),
    dcc.Graph(id="pivot-table", figure=px.imshow(df_pivot, x=df_pivot.columns, y=df_pivot.index, title="Accidents by Month and Day of Week")),
    dcc.Graph(id="bar-chart-hour", figure=accidents_by_hour_bargraph)
])

In [9]:
# Define callback to update graphs
@app.callback(
    [Output("line-chart", "figure"), Output("pie-chart", "figure"), Output("histogram", "figure"), Output("month-of-year-bargraph", "figure"), Output("pivot-table", "figure"), Output("bar-chart-hour", "figure")],
    [Input("year-slider", "value")]
)
def update_graphs(year_range):
    filtered_df = df[(df["Year"] >= year_range[0]) & (df["Year"] <= year_range[1])]
    
    updated_accidents_by_year = px.line(filtered_df.groupby("Year").size().reset_index(name="Total Accidents"), x="Year", y="Total Accidents", title="Total Accidents by Year")
    
    updated_severity_counts = px.pie(filtered_df, names="Accident_Severity", title="Accident Severity Counts")
    
    updated_urban_vs_rural = px.histogram(filtered_df, x="Urban_or_Rural_Area", color="Accident_Severity", title="Accidents by Urban vs Rural Area")
    

    updated_month_of_year = px.bar(filtered_df.groupby(filtered_df["Date"].dt.strftime("%B")).size().reset_index(name="Count"), x="Date", y="Count", title="Accidents by Month")

    
    df_pivot = pd.pivot_table(filtered_df, values='Accident_Severity', index=filtered_df['Date'].dt.month_name(), 
                              columns=filtered_df['Day_of_Week'], aggfunc='count')
    
    df_pivot_hour = pd.pivot_table(filtered_df, values='Accident_Severity', index=filtered_df['Date'].dt.hour, columns=filtered_df['Day_of_Week'], aggfunc='count')
    
    updated_accidents_by_month_day = px.imshow(df_pivot, x=df_pivot.columns, y=df_pivot.index, 
                                                title="Accidents by Month and Day of Week")
    updated_accidents_by_hour = px.bar(df_pivot_hour, barmode="group", title="Accidents by Hour of Day and Day of Week")
    
    return updated_accidents_by_year, updated_severity_counts, updated_urban_vs_rural, updated_month_of_year, updated_accidents_by_month_day, updated_accidents_by_hour


In [10]:
# Run app
if __name__ == "__main__":
    app.run_server()
    
#note when using slider, it takes time to get the output(min 2 minutes).

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [25/Apr/2023 05:47:03] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [25/Apr/2023 05:47:04] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [25/Apr/2023 05:47:07] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [25/Apr/2023 05:47:07] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 200 -
127.0.0.1 - - [25/Apr/2023 05:47:07] "GET /_dash-component-suites/dash/dcc/async-slider.js HTTP/1.1" 200 -
127.0.0.1 - - [25/Apr/2023 05:47:07] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 200 -
127.0.0.1 - - [25/Apr/2023 05:47:45] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [25/Apr/2023 05:47:48] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [25/Apr/2023 05:48:13] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [25/Apr/2023 05:49:17] "POST /_dash-update-component HTTP/1.1" 200 -


This dashboard summarizes the information about road accidents in the UK.I have created 4 graph which will help us visualize the information.The dashboard includes four charts that display different aspects of the data:
- A line chart showing the total number of accidents by year.
- A pie chart showing the distribution of accidents by severity level.
- A histogram showing the distribution of accidents by urban or rural area, with different severity levels represented by different colors(BLUE-Slight,RED-Serious,GREEN-Fatal).
- A bar chart showing the number of accidents by day of the week.
- A heatmap of accidents by month and day of the week.
- A grouped bar chart of accidents by hour of the day and day of the week. 