In [2]:
import pandas as pd
import numpy as np
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

from google.colab import drive


# Remarques générales 

Il faudrait peut etre ajouter le traffic routier moyen à Chicago selon les heures, les semaines et les mois pour le comparer au nombre d'accidents durant l'année : https://www.chicago.gov/city/en/dataset/traffic_counts.html

Il faudrait comparer le nombre d'accidents sous temps de pluie par rapport à la pluiviométrie sur une année pour voir si la pluie augmente considérablement le nombre d'accidents : https://fr.weatherspark.com/y/14091/M%C3%A9t%C3%A9o-habituelle-%C3%A0-Chicago-Illinois-%C3%89tats-Unis 

In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [5]:
data = pd.read_csv('/content/drive/My Drive/DaSci/chicago_crashes_cleaned_32.csv', delimiter=',', encoding='iso-8859-1',error_bad_lines='ignore')
data2 = pd.read_csv('/content/drive/My Drive/DaSci/Traffic_Crashes_-_Crashes Chicago.csv', delimiter=',', encoding='iso-8859-1',error_bad_lines='ignore')


In [6]:
data.head()

Unnamed: 0.1,Unnamed: 0,CRASH_DATE,POSTED_SPEED_LIMIT,TRAFFIC_CONTROL_DEVICE,DEVICE_CONDITION,WEATHER_CONDITION,LIGHTING_CONDITION,FIRST_CRASH_TYPE,TRAFFICWAY_TYPE,ROADWAY_SURFACE_COND,CRASH_TYPE,INTERSECTION_RELATED_I,NOT_RIGHT_OF_WAY_I,DAMAGE,PRIM_CONTRIBUTORY_CAUSE,SEC_CONTRIBUTORY_CAUSE,STREET_NO,STREET_DIRECTION,STREET_NAME,LATITUDE,LONGITUDE,CRASH_WEEKDAY,CRASH_HOUR,CRASH_Month,grid
0,3,2017-06-30 16:00:00,35,STOP SIGN/FLASHER,FUNCTIONING PROPERLY,CLEAR,DAYLIGHT,TURNING,NOT DIVIDED,DRY,INJURY AND / OR TOW DUE TO CRASH,Y,,"OVER $1,500",FAILING TO YIELD RIGHT-OF-WAY,NOT APPLICABLE,8301,S,CICERO AVE,41.741804,-87.740954,4,16,6,"(10.0, 8.0)"
1,4,2019-03-21 22:50:00,30,TRAFFIC SIGNAL,FUNCTIONING PROPERLY,CLEAR,"DARKNESS, LIGHTED ROAD",TURNING,NOT DIVIDED,DRY,NO INJURY / DRIVE AWAY,Y,,"OVER $1,500",UNABLE TO DETERMINE,UNABLE TO DETERMINE,8301,S,CICERO AVE,41.741804,-87.740954,3,22,3,"(10.0, 8.0)"
2,5,2018-03-26 14:23:00,35,NO CONTROLS,NO CONTROLS,CLEAR,DAYLIGHT,PARKED MOTOR VEHICLE,NOT DIVIDED,DRY,NO INJURY / DRIVE AWAY,,,"$501 - $1,500",UNABLE TO DETERMINE,UNABLE TO DETERMINE,3999,N,AVONDALE AVE,41.953647,-87.732082,0,14,3,"(11.0, 26.0)"
3,6,2018-08-30 17:45:00,30,TRAFFIC SIGNAL,FUNCTIONING PROPERLY,CLEAR,DAYLIGHT,TURNING,NOT DIVIDED,DRY,NO INJURY / DRIVE AWAY,Y,,"OVER $1,500",IMPROPER OVERTAKING/PASSING,IMPROPER LANE USAGE,600,W,DIVISION ST,41.903825,-87.643286,3,17,8,"(19.0, 22.0)"
4,7,2019-06-11 08:40:00,30,TRAFFIC SIGNAL,FUNCTIONING PROPERLY,CLEAR,DAYLIGHT,REAR END,DIVIDED - W/MEDIAN BARRIER,DRY,NO INJURY / DRIVE AWAY,Y,,"$501 - $1,500",UNABLE TO DETERMINE,NOT APPLICABLE,50,E,GARFIELD BLVD,41.794779,-87.623828,1,8,6,"(21.0, 12.0)"


In [None]:
#Fonction pour tracer des bar graphs
def figfunc(x_, y_, title_, x_title, y_title, height_, width_):
    fig = go.Figure(go.Bar(
        x=x_,y=y_,marker={'color': y_, 
        'colorscale': 'Viridis'},
    ))
    fig.update_layout(title_text=title_,xaxis_title=x_title,yaxis_title=y_title,height=height_,width=width_,title_x=0.5)
    fig.show()
    return


In [None]:
data2.head()

NameError: ignored

## 1) Impact of the Weather conditions

In [None]:
#On calcule le nombre d'accidents en fonction des conditions météo
AccByWeatherConditions = data2.groupby(['LIGHTING_CONDITION']).agg('count')['CRASH_RECORD_ID'].sort_values(ascending=False)[:30].to_frame(name='totalAccidents').reset_index()


In [None]:
figfunc(
    x_ = AccByWeatherConditions['LIGHTING_CONDITION'],
    y_ = AccByWeatherConditions['totalAccidents'],
    title_="Nombre d'accidents en fonction des conditions météo",       
    x_title="Conditions météo", y_title="Nombre total d'accidents", 
    height_=600, width_=600)

In [None]:
AccNuit = data['LIGHTING_CONDITION'].value_counts()[1] + data['LIGHTING_CONDITION'].value_counts()[2]
AccTot = len(data) - data['LIGHTING_CONDITION'].value_counts()[3]
print("Les accidents de nuit représente ", AccNuit/AccTot*100, "% des accidents totaux" )

Les accidents de nuit représente  26.95445599897523 % des accidents totaux


Bien que le traffic routier de nuit représente que 10% du traffic routier total, il représente près de 27% des accidents totaux. Il y a alors un risque plus important des accidents la nuit. 

In [None]:
#On dtle nombre moyen le plus important de d'insultes, de points d'exclation, d'intérroagtion, de mots en majuscule, de mots allongés 
#selon la catégorie du média
list_to_change = ['WEATHER_CONDITION', 'ROADWAY_SURFACE_COND']


for i in range (len(list_to_change)):
  nbrTotal = data2.groupby([list_to_change[i]]).agg('count')['CRASH_RECORD_ID'].to_frame(name='total').reset_index()

  fig = go.Figure([go.Pie(labels=nbrTotal[[list_to_change[i]]],values=nbrTotal['total'])])

  fig.update_traces(hoverinfo='label+percent', textfont_size=15)
  fig.update_layout(title=list_to_change[i] + " selon les catégorie de média", title_x=0.5, height=600)
  fig.show()

Il semble que la pluie ne soit pas un si grand danger. Du moins, les jours de pluie ne représente pas une partie beaucoup plus importante que le reste

### 2) Device conditions

In [None]:
#On calcule le nombre d'accidents en fonction des états des dispositifs
AccByDeviceConditions = data2.groupby(['DEVICE_CONDITION']).agg('count')['CRASH_RECORD_ID'].sort_values(ascending=False)[:30].to_frame(name='totalAccidents').reset_index()

In [None]:
figfunc(
    x_ = AccByDeviceConditions['DEVICE_CONDITION'],
    y_ = AccByDeviceConditions['totalAccidents'],
    title_="Nombre d'accidents en fonction des états des dispositifs", 
    x_title="Device conditions", y_title="Nombre total d'accidents", 
    height_=600, width_=600)

Seul 0.005% des accidents ont été causé en parti par des défauts de la signalisation. 
58% des accidents ont lieu sur des espaces sans control. Faut-il augmenter le nombre de control ? Qu'est ce que cela veut dire exactement ? 

## 2) Répartition des accidents sur l'année, mois

In [None]:
data2.head()

Unnamed: 0,CRASH_RECORD_ID,RD_NO,CRASH_DATE_EST_I,CRASH_DATE,POSTED_SPEED_LIMIT,TRAFFIC_CONTROL_DEVICE,DEVICE_CONDITION,WEATHER_CONDITION,LIGHTING_CONDITION,FIRST_CRASH_TYPE,TRAFFICWAY_TYPE,LANE_CNT,ALIGNMENT,ROADWAY_SURFACE_COND,ROAD_DEFECT,REPORT_TYPE,CRASH_TYPE,INTERSECTION_RELATED_I,NOT_RIGHT_OF_WAY_I,HIT_AND_RUN_I,DAMAGE,DATE_POLICE_NOTIFIED,PRIM_CONTRIBUTORY_CAUSE,SEC_CONTRIBUTORY_CAUSE,STREET_NO,STREET_DIRECTION,STREET_NAME,BEAT_OF_OCCURRENCE,PHOTOS_TAKEN_I,STATEMENTS_TAKEN_I,DOORING_I,WORK_ZONE_I,WORK_ZONE_TYPE,WORKERS_PRESENT_I,NUM_UNITS,MOST_SEVERE_INJURY,INJURIES_TOTAL,INJURIES_FATAL,INJURIES_INCAPACITATING,INJURIES_NON_INCAPACITATING,INJURIES_REPORTED_NOT_EVIDENT,INJURIES_NO_INDICATION,INJURIES_UNKNOWN,CRASH_HOUR,CRASH_DAY_OF_WEEK,CRASH_MONTH,LATITUDE,LONGITUDE,LOCATION,date
0,073682ef84ff827659552d4254ad1b98bfec24935cc9cc...,JB460108,,10/02/2018 06:30:00 PM,10,NO CONTROLS,NO CONTROLS,CLEAR,DARKNESS,PARKED MOTOR VEHICLE,OTHER,,STRAIGHT AND LEVEL,DRY,NO DEFECTS,ON SCENE,NO INJURY / DRIVE AWAY,,,,"OVER $1,500",10/02/2018 07:35:00 PM,NOT APPLICABLE,NOT APPLICABLE,517,W,OHARE ST,1654.0,,,,,,,2,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,0.0,1.0,0.0,18,3,10,,,,2018-10-02 18:30:00
1,1560fb8a1e32b528fef8bfd677d2b3fc5ab37278b157fa...,JC325941,,06/27/2019 04:00:00 PM,45,NO CONTROLS,NO CONTROLS,CLEAR,DAYLIGHT,SIDESWIPE SAME DIRECTION,ONE-WAY,,STRAIGHT AND LEVEL,DRY,NO DEFECTS,ON SCENE,NO INJURY / DRIVE AWAY,,,,"OVER $1,500",06/27/2019 04:00:00 PM,UNABLE TO DETERMINE,UNABLE TO DETERMINE,3,W,TERMINAL ST,1653.0,,,,,,,2,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,0.0,2.0,0.0,16,5,6,,,,2019-06-27 16:00:00
2,c7e667807d86ff2e83da48267e1135680cad68c86eb00f...,JD150617,,02/13/2020 04:30:00 PM,30,NO CONTROLS,NO CONTROLS,CLEAR,DAYLIGHT,SIDESWIPE SAME DIRECTION,ONE-WAY,,STRAIGHT AND LEVEL,WET,NO DEFECTS,ON SCENE,INJURY AND / OR TOW DUE TO CRASH,,,,"OVER $1,500",02/13/2020 04:35:00 PM,FAILING TO YIELD RIGHT-OF-WAY,NOT APPLICABLE,3,W,TERMINAL ST,1653.0,,,,,,,2,"REPORTED, NOT EVIDENT",1.0,0.0,0.0,0.0,1.0,2.0,0.0,16,5,2,,,,2020-02-13 16:30:00
3,009e9e67203442370272e1a13d6ee51a4155dac65e583d...,JA329216,,06/30/2017 04:00:00 PM,35,STOP SIGN/FLASHER,FUNCTIONING PROPERLY,CLEAR,DAYLIGHT,TURNING,NOT DIVIDED,4.0,STRAIGHT AND LEVEL,DRY,NO DEFECTS,ON SCENE,INJURY AND / OR TOW DUE TO CRASH,Y,,,"OVER $1,500",06/30/2017 04:01:00 PM,FAILING TO YIELD RIGHT-OF-WAY,NOT APPLICABLE,8301,S,CICERO AVE,834.0,,,,,,,2,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,0.0,3.0,0.0,16,6,6,41.741804,-87.740954,POINT (-87.740953581987 41.741803598989),2017-06-30 16:00:00
4,00e47f189660cd8ba1e85fc63061bf1d8465184393f134...,JC194776,,03/21/2019 10:50:00 PM,30,TRAFFIC SIGNAL,FUNCTIONING PROPERLY,CLEAR,"DARKNESS, LIGHTED ROAD",TURNING,NOT DIVIDED,4.0,STRAIGHT AND LEVEL,DRY,NO DEFECTS,ON SCENE,NO INJURY / DRIVE AWAY,Y,,,"OVER $1,500",03/21/2019 10:52:00 PM,UNABLE TO DETERMINE,UNABLE TO DETERMINE,8301,S,CICERO AVE,834.0,,Y,,,,,2,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,0.0,2.0,0.0,22,5,3,41.741804,-87.740954,POINT (-87.740953581987 41.741803598989),2019-03-21 22:50:00


In [None]:
#converting date column from str to date
data2['date']=pd.to_datetime(data2['CRASH_DATE'])
data2['year'] = pd.DatetimeIndex(data2['CRASH_DATE']).year
data2['month'] = pd.DatetimeIndex(data2['CRASH_DATE']).month
data2['month_year']= pd.to_datetime(data2['CRASH_DATE']).dt.to_period('M')
data2['weekdays']=data2['CRASH_DATE'].dt.strftime('%A') 

count_year= data2.groupby(['year']).agg('count')['id'].to_frame(name='count').reset_index()
count_year= data2.groupby(['year']).agg('count')['id'].to_frame(name='count').reset_index()

KeyboardInterrupt: ignored

In [None]:
data2['date']=pd.to_datetime(data2['CRASH_DATE'])
