# Visualization of data 2005 - 2020

This document contains some visualizations of non-flitered data from 2005 to 2020. The visualized data is mainly bicycle data, or bicycle in compare to other vihecles.

We used Plotly library to get more interactive graphics.

### Import libraries & packages

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_theme()

In [3]:
import plotly.express as px
import plotly.graph_objects as go
# execute "!pip install --upgrade plotly"  if you encounter a problem with legend display

### Import & prepare Data

In [4]:
# Note : the new version of 'donnes_19_20_fusionnes.csv' has one column in addition to the olds ones : 'victim_id'
uvcl= pd.read_csv('data/merged_data_2005_2020.csv', sep=';', index_col=1,
    parse_dates={'date':['an','mois','jour','hrmn']}, keep_date_col=True)

  exec(code_obj, self.user_global_ns, self.user_ns)
  mask |= (ar1 == a)


In [35]:
uvcl.columns


Index(['date', 'Num_Acc', 'place', 'catu', 'grav', 'sexe', 'trajet', 'secu1',
       'locp', 'actp', 'etatp', 'an_nais', 'num_veh', 'an', 'mois', 'jour',
       'hrmn', 'lum', 'agg', 'int', 'atm', 'col', 'com', 'adr', 'lat', 'long',
       'dep', 'catr', 'voie', 'v1', 'v2', 'circ', 'nbv', 'pr', 'pr1', 'vosp',
       'prof', 'plan', 'lartpc', 'larrout', 'surf', 'infra', 'situ', 'senc',
       'catv', 'occutc', 'obs', 'obsm', 'choc', 'manv', 'motor'],
      dtype='object')

In [5]:
#Bicycle accidents victims (drivers+passengers):
bicyle_crash= uvcl[(uvcl.catv==1) & ((uvcl.catu==1) | (uvcl.catu==2))]

#Motocycles crashs victims (drivers & passengers):
moto_crash= uvcl[((uvcl.catv==2) | (uvcl.catv==30) | (uvcl.catv==31) 
    | (uvcl.catv==32) | (uvcl.catv==33) | (uvcl.catv==34)) & ((uvcl.catu==1) | (uvcl.catu==2))]

#Light vehicule crash victims (drivers & passengers):
car_crash= uvcl[(uvcl.catv==7) & ((uvcl.catu==1) | (uvcl.catu==2))]

# Concat between bicycle, motocyle & car DataFrames : 
df= pd.concat([bicyle_crash, moto_crash, car_crash], axis=0)

In [6]:
  # vehicule category
df.catv= df['catv'].astype( 'object')
df.catv.replace([1,7,2,30,31,32,33,34], 
    ['bicycle','light car','motocycle','motocycle','motocycle','motocycle','motocycle','motocycle'], inplace=True)

  #crash severity
df.grav= df.grav.astype('object')
df.grav.replace([1,2,3,4], ['unharmed','killed','hospitalized','slight injuries'], inplace=True)

  #Mobile obstacles:
df.obsm= df.obsm.astype('object')
df.obsm.replace([0,1,2,4,5,6,9],['none','pedestrian','vehicle','rail vehicle', 'domestic animal','wild animal', 'other'], inplace=True)

# Accidents victims according to hitted obstacle : 
df['obs'] = np.where( df.obs==0 , np.where(df.obsm=='none', 'no obstacle','mobile obstacle'), 'fixed obstacle')

#Sex :
df.sexe= df.sexe.astype('object')
df['sexe'].replace([1,2], ['Male','Female'], inplace=True )

#Replace an_nais with age :
df['an_nais']= pd.to_datetime(df.date).dt.year - df['an_nais']+1
df.rename(columns = {'an_nais':'age'}, inplace = True)


In [7]:
df['hour']= pd.to_datetime(df.date).dt.hour

df['weekday']= pd.to_datetime(df.date).dt.weekday
df.weekday.replace(range(7),[ 
              'Monday', 
              'Tuesday', 
              'Wednesday', 
              'Thursday',  
              'Friday', 
              'Saturday', 
              'Sunday'] , inplace=True)
df['weekday']= df.weekday.astype('str')

df['month']= pd.to_datetime(df.date).dt.month
df['month'] = df.month.replace(range(1,13), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
df['month']= df.month.astype('str')

### Build Graphics

In [33]:
#counts, bins = np.histogram(bicyle_crash["grav"], bins=range(1,6))# turn into data frame
#dp = pd.DataFrame({"Bins":bins[1:]-1, "Counts":counts})# chart using Plotly.Express
dp= df[df.catv=='bicycle'].groupby(['grav']).count().reset_index()
dp['Percentage'] =  dp.sexe.apply(lambda x: str(round(x/dp['sexe'].sum()*100 , 2))+" %" )

fig = px.bar(dp, x="grav", y="sexe", text="Percentage",  template="plotly_dark", color='grav', 
    color_discrete_sequence= px.colors.sequential.Sunsetdark, labels={'sexe': 'Count', 'grav':'Severity'}, 
    category_orders= {"grav": ['unharmed', 'slight injuries','hospitalized',"killed"]}
    )

fig.update_xaxes(type='category')

fig.update_layout(width=1000, height=500,xaxis_title_text='Severity' , title='Bicycle crashing severity 2005-2020', title_x=.5
    )

fig.update_traces(textposition='inside',  opacity=0.8)

fig.show()
fig.write_html("streamlit_app/plots/bicycle_acidents_severity.html")

In [57]:
dx= df.groupby(['catv', 'grav']).count().reset_index()

fig = px.bar( dx, x='grav', y='sexe', title="Wide-Form Input", color='catv',  barmode='group',
    opacity=.8,color_discrete_sequence=px.colors.qualitative.T10,
    labels={'catv': 'Vehicle type','grav':'Severity', 'sexe':'Count'} , template="plotly_dark")
    #category_orders={"col": ["val1", ...], ...})
fig.update_xaxes(type='category', categoryorder='total ascending')
fig.update_layout(width=1100, height=500,xaxis_title_text='Casualties severity', yaxis_title_text='Count', 
    title='Accidents injuries 2005-2020', title_x=.5 , legend=dict(yanchor="top",y=0.99,xanchor="left",x=0.01) )
fig.show()
fig.write_html("streamlit_app/plots/acidents_injuries.html")

In [14]:
dx= df[df.catv=='bicycle'].groupby(['grav', 'obsm']).count().reset_index()
dx=dx[dx.obsm!=-1]
fig = px.bar( dx, x='obsm', y='infra', title="Wide-Form Input", color='grav',  barmode='group',
    opacity=.8, color_discrete_sequence= px.colors.sequential.Sunsetdark, 
    labels={'catv': 'Vehicle type','obsm':'Mobile obstacle', 'infra':'Count', 'grav':'Severity'} , template="plotly_dark",
    category_orders= {"grav": ['unharmed', 'slight injuries','hospitalized',"killed"]})
    #category_orders={"col": ["val1", ...], ...})
fig.update_xaxes(type='category', categoryorder='total ascending')
fig.update_layout(width=1100, height=500,xaxis_title_text='Mobile obstacle', yaxis_title_text='Count', 
    title='Categories of mobile obstacle causing bicycle accidents in 2005-2020', title_x=.5,
    legend=dict(yanchor="top",y=0.99,xanchor="left",x=0.01) )
fig.show()
fig.write_html("streamlit_app/plots/bike_acidents_mobile_obstacles.html")

In [43]:
counts, bins = np.histogram(bicyle_crash["obsm"], bins=[i for i in range(0,11) if i not in [4,8,9]] )# turn into data frame
dp = pd.DataFrame({"Bins":bins[1:]-1, "Counts":counts})# chart using Plotly.Express
dp['Percentage'] =  dp.Counts.apply(lambda x: str(round(x/dp['Counts'].sum()*100 , 2))+" %" )

fig = px.bar(dp, x="Bins", y="Counts", text="Percentage", template="plotly_dark")

fig.update_xaxes(type='category', categoryorder='total ascending')

fig.update_layout(width=1000, height=500,xaxis_title_text='Mobile obstacles' , 
    title='Bicycle crash victims 2005-2020 by mobile obstacle hitted',
    title_x=.5, xaxis= dict(tickmode = 'array',tickvals = [0,1,2,4,5,6,9],
    ticktext = ['none','pedestrian','vehicule','rail vehicule', 'domestic animal','wild animal', 'other']))

fig.update_traces(textposition='outside', marker_color='crimson', opacity=0.7)


fig.show()
fig.write_html("streamlit_app/plots/bike_acidents_mobile_obstacles2.html")

In [44]:
df.obsm.value_counts()

vehicle            1279742
none                443857
pedestrian          156386
other                24773
wild animal           4175
domestic animal       1904
rail vehicle          1849
-1                     837
Name: obsm, dtype: int64

In [45]:
dx= df[df.catv=='bicycle'].groupby(['grav', 'obs']).count().reset_index()

fig = px.bar( dx, x='obs', y='infra', title="Wide-Form Input", color='grav',  barmode='group',
    opacity=.8, color_discrete_sequence= px.colors.sequential.Sunsetdark,
    labels={'catv': 'Vehicle type','obs':'Obstacle category', 'infra':'Count', 'grav':'Severity'}, template="plotly_dark",
    category_orders= {"grav": ['unharmed', 'slight injuries','hospitalized',"killed"]} )
    # color_discrete_sequence=px.colors.qualitative.T10,
    #color_discrete_map={'killed':'red', 'hospitalized':'orange','slight injuries':'gold','unharmed':'greenyellow'}
    #category_orders={"col": ["val1", ...], ...})
fig.update_xaxes(type='category', categoryorder='total ascending')
fig.update_layout(width=1100, height=500,xaxis_title_text='Obstacle category', yaxis_title_text='Count', 
    title='Obstacles hitted during bicycle accidents 2005-2020', title_x=.5, title_y=.85 ,
    legend=dict(yanchor="top",y=0.99,xanchor="left",x=0.01) )
fig.show()
fig.write_html("streamlit_app/plots/bike_acidents_obstacles.html")

In [52]:
dx= df[df.catv=='bicycle'].groupby(['hour', 'grav']).count().reset_index()
#dx=dx[dx.catv=="bicycle"]
dx['hour']= dx.hour.astype('str')+':00'
fig = px.bar_polar(dx, r="infra" , theta="hour",template="plotly_dark", color='grav',
   color_discrete_sequence= px.colors.sequential.Sunsetdark , 
   labels={'catv': 'Sehicle type', 'infra':'Count', 'grav':'Severity', 'hour':'Hour'},
   category_orders= {"grav": ['unharmed', 'slight injuries','hospitalized',"killed"]}
    )

fig.update_layout(
    width=1000, height=500,
    title='Bicycle accidents hourly distribution 2005-2020', title_x=0.5, title_y=0.01,
    font_size=14,
    legend_font_size=13,
    polar_radialaxis= dict(angle=0, tickangle= -45, tickfont_size=10),
    polar_angularaxis_rotation=90,
    legend=dict(yanchor="top",y=0.9,xanchor="left",x=0.01)
)
fig.update_traces(opacity=0.8)
fig.show()
fig.write_html("streamlit_app/plots/bike_acidents_by_hour.html")

In [53]:
dx= df[df.catv=='bicycle'].groupby(['weekday', 'grav']).count().reset_index()

fig = px.bar(dx, x="weekday", y="infra", text="infra", color="grav" ,
    template="plotly_dark",color_discrete_sequence= px.colors.sequential.Sunsetdark,
    labels={'catv': 'Vehicle type', 'infra':'Count', 'weekday': 'Week day', 'grav':'Severity'} ,
    category_orders= {"weekday":['Monday','Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], "grav": ['unharmed', 'slight injuries','hospitalized',"killed"]},
)

fig.update_layout(width=1000, height=500,xaxis_title_text='Week days' , 
    title='Bicycle accidents victims by week days 2005-2020',
    title_x=.5 
 )
fig.update_traces(textposition='inside', opacity=0.8  )
fig.show()

fig.write_html("streamlit_app/plots/bike_acidents_by_weekday.html")

In [48]:
dx= df[df.catv=='bicycle'].groupby(['grav','month']).count().reset_index()

fig = px.bar(dx, x="month", y="infra", text="infra", color="grav" ,
    template="plotly_dark",color_discrete_sequence= px.colors.sequential.Sunsetdark,
    labels={'catv': 'Vehicle type', 'infra':'Count', 'month': 'Months', 'grav':'Severity'} ,
    category_orders= {"month":['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
    "grav": ['unharmed', 'slight injuries','hospitalized',"killed"]}
    )

fig.update_layout(width=1000, height=500,xaxis_title_text='Months' , 
    title='Monthly bicycle accidents victims 2005-2020',
    title_x=.5 
 )
fig.update_traces(textposition='inside', opacity=0.8  )
fig.show()
fig.write_html("streamlit_app/plots/bike_acidents_by_month.html")

In [49]:
dx= df[df.catv=='bicycle'].groupby(['grav','sexe']).count().reset_index()

fig = px.bar(dx, x="sexe", y="infra", text="infra", color="grav" ,
    template="plotly_dark",color_discrete_sequence= px.colors.sequential.Sunsetdark,
    labels={'grav': 'Severity', 'infra':'Count', 'sexe': 'Gender'}, barmode='group',
    category_orders= {"grav": ['unharmed', 'slight injuries','hospitalized',"killed"]}
    )

fig.update_layout(width=900, height=500,xaxis_title_text='Gender' , 
    title='Bicycle accidents by gender 2005-2020',
    title_x=.5, 
 )
fig.update_traces(textposition='inside', opacity=0.8  )
fig.show()
fig.write_html("streamlit_app/plots/bike_acidents_by_gender.html")

In [54]:
dx= df.copy()
#dx.grav.replace([1,2,3,4], ['unharmed','killed','hospitalized','slight injuries'], inplace=True)

#dx.grav.replace([1,2,3,4], ['unharmed','killed','hospitalized','slight injuries'], inplace=True)
dx = dx[df.catv=='bicycle'].groupby(['grav', 'age']).agg({'sexe': 'count'}).reset_index()
dx['percentage'] = dx.groupby('age')['sexe'].apply(lambda x: round(100*x/np.sum(x),2 )) 
dx= dx[dx.age<=100]
fig1 = px.bar(dx, x="age", y="sexe", text="sexe", color="grav" ,
    template="plotly_dark",
    
    labels={ 'sexe': 'Count', 'grav':'Severity', 'age':'Age'},
    category_orders={"grav": ['unharmed', 'slight injuries','hospitalized',"killed"]}, barmode='relative',
    color_discrete_sequence= px.colors.sequential.Sunsetdark
    )

fig1.update_layout(width=1100, height=500,xaxis_title_text='Age' , yaxis_title_text='Victims count',
    title='Bicycle accidents by age 2005-2020',
    title_x=.5
 )
fig1.update_traces(textposition='inside', opacity=0.8 , visible=True)
fig1.show()
fig1.write_html("streamlit_app/plots/bike_acidents_by_age.html")

In [55]:
fig2 = px.bar(dx, x="age", y="percentage", text="percentage", color="grav" ,
    template="plotly_dark",
    
    labels={ 'sexe': 'Count', 'grav':'Severity', 'age':'Age'},
    category_orders={"grav": ['unharmed', 'slight injuries','hospitalized',"killed"]},
    color_discrete_sequence= px.colors.sequential.Sunsetdark
    )

fig2.update_layout(width=1100, height=500,xaxis_title_text='Age' , yaxis_title_text='Victims by severity (%)',
    title='Bicycle accidents Mortality by age 2005-2020',
    title_x=.5
 )
fig2.update_traces(textposition='inside', opacity=0.8 , visible=True)
fig2.show()
fig2.write_html("streamlit_app/plots/bike_acidents_mortality_by_age.html")