## Import the libraries

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from os import listdir
from os.path import isfile, join
import glob
import re
import csv
pd.set_option('display.float_format', lambda x: '%.5f' % x)
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 75

## Load data

In [2]:
df = pd.read_csv('./dft-road-casualty-statistics-accident-1979-2020 (1).csv')
df.head()

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,accident_index,accident_year,accident_reference,location_easting_osgr,location_northing_osgr,longitude,latitude,police_force,accident_severity,number_of_vehicles,...,pedestrian_crossing_physical_facilities,light_conditions,weather_conditions,road_surface_conditions,special_conditions_at_site,carriageway_hazards,urban_or_rural_area,did_police_officer_attend_scene_of_accident,trunk_road_flag,lsoa_of_accident_location
0,197901A11AD14,1979,01A11AD14,,,,,1,3,2,...,-1,1,8,1,-1,0,-1,-1,-1,-1
1,197901A1BAW34,1979,01A1BAW34,198460.0,894000.0,,,1,3,1,...,-1,4,8,3,-1,0,-1,-1,-1,-1
2,197901A1BFD77,1979,01A1BFD77,406380.0,307000.0,,,1,3,2,...,-1,4,8,3,-1,0,-1,-1,-1,-1
3,197901A1BGC20,1979,01A1BGC20,281680.0,440000.0,,,1,3,2,...,-1,4,8,3,-1,0,-1,-1,-1,-1
4,197901A1BGF95,1979,01A1BGF95,153960.0,795000.0,,,1,2,2,...,-1,4,3,3,-1,0,-1,-1,-1,-1


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8602824 entries, 0 to 8602823
Data columns (total 36 columns):
 #   Column                                       Dtype  
---  ------                                       -----  
 0   accident_index                               object 
 1   accident_year                                int64  
 2   accident_reference                           object 
 3   location_easting_osgr                        float64
 4   location_northing_osgr                       float64
 5   longitude                                    float64
 6   latitude                                     float64
 7   police_force                                 int64  
 8   accident_severity                            int64  
 9   number_of_vehicles                           int64  
 10  number_of_casualties                         int64  
 11  date                                         object 
 12  day_of_week                                  int64  
 13  time        

In [4]:
# Missing values by column
missing = df.isnull().sum().sort_values(ascending=False)
missing = missing[missing!=0]
missing

longitude                 4887402
latitude                  4887402
location_easting_osgr       11621
location_northing_osgr      11621
speed_limit                    37
time                            8
date                            8
dtype: int64

In [5]:
accident_df = pd.DataFrame(df['accident_year'].value_counts()).sort_values(by=['accident_year'], ascending=False).reset_index().rename(columns={'index':'Year','accident_year':'Accidents'})

In [6]:
accident_df = accident_df.sort_values('Year')
    

In [104]:

fig = px.line(accident_df, x='Year', y='Accidents', markers=True, 
              title='<b> Annual Number of Accidents in the UK</b>', 
              labels={'Year':'<b>Year</b>', 'Accidents':'<b>Accidents</b>'})
fig.update_layout(yaxis_range=[50000,290000], plot_bgcolor="#edf0f5",width=1200,height=600)
fig.add_shape(
      type= 'line',
      x0= 2020, x1= 2020, y0= 0, y1= 300000,
      line=dict(
      color='lightseagreen',
      dash='dot',
      width=2,
      ) 
    )
fig.add_shape(
      type= 'line',
      x0= 2008, x1= 2008, y0= 0, y1= 300000,
      line=dict(
      color='darkorange',
      dash='dot',
      width=2,
      ) 
)
fig.add_shape(
      type= 'line',
      x0= 1981, x1= 1981, y0= 0, y1= 300000,
      line=dict(
      color='darkred',
      dash='dot',
      width=2,
      ) 
)
fig.add_annotation(x=2020.5, y=100000,
            text="Coronavirus Outbreak",
            showarrow=False,
            arrowhead=0, textangle=-90) 
    
fig.add_annotation(x=2008.5, y=100000,
            text="Financial Crisis",
            showarrow=False,
            arrowhead=0, textangle=-90) 

fig.add_annotation(x=1981.5, y=100000,
            text="Deep Recession",
            showarrow=False,
            arrowhead=0, textangle=-90) 
fig.show()

In [8]:
df[['day_of_week', 'time']].isna().sum()

day_of_week    0
time           8
dtype: int64

In [9]:
df['time'] = pd.to_datetime(df['time'])

In [10]:
df.groupby(df['day_of_week']).describe()['time']

KeyError: 'time'

In [None]:
df.groupby(df['time'].dt.hour)['day_of_week'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0.0,151364.0,4.04481,2.39906,1.0,1.0,4.0,7.0,7.0
1.0,101737.0,4.05177,2.47149,1.0,1.0,4.0,7.0,7.0
2.0,81610.0,4.0683,2.52624,1.0,1.0,4.0,7.0,7.0
3.0,50246.0,3.95707,2.47282,1.0,1.0,4.0,7.0,7.0
4.0,36388.0,3.96969,2.34022,1.0,1.0,4.0,6.0,7.0
5.0,56518.0,4.05082,2.02077,1.0,2.0,4.0,6.0,7.0
6.0,119846.0,4.07227,1.79357,1.0,3.0,4.0,6.0,7.0
7.0,334648.0,4.068,1.62778,1.0,3.0,4.0,5.0,7.0
8.0,591424.0,4.07809,1.58131,1.0,3.0,4.0,5.0,7.0
9.0,393902.0,4.13816,1.78493,1.0,3.0,4.0,6.0,7.0
