In [None]:
#Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
print('Libraries Imported')

In [None]:
#importing datasets
Accidents= pd.read_csv('../input/dft-accident-data/Accidents0515.csv',index_col='Accident_Index')
Casualities=pd.read_csv('../input/dft-accident-data/Casualties0515.csv',error_bad_lines=False,index_col='Accident_Index',warn_bad_lines=False)
Vehicles=pd.read_csv('../input/dft-accident-data/Casualties0515.csv',error_bad_lines=False,index_col='Accident_Index',warn_bad_lines=False)
print('Datasets Imported')

### Accidents dataset:

In [None]:
Accidents.head()

### Analyzing the dataset:

In [None]:
Accidents.shape

In [None]:
Accidents.columns

In [None]:
#dropping unwanted columns:
Accidents.drop(['Location_Easting_OSGR', 'Location_Northing_OSGR','LSOA_of_Accident_Location',
                'Junction_Control' ,'2nd_Road_Class','Did_Police_Officer_Attend_Scene_of_Accident'], axis=1, inplace=True)

In [None]:
Accidents.info()

In [None]:
#checking for any null values
Accidents.isnull().sum()

In [None]:
#dropping all rows that contains null values in it:
#Here dropping null rows doesn't affect the processing: 
Accidents=Accidents.dropna()
Accidents.shape

### Casualities Dataset:

In [None]:
Casualities.head()

In [None]:
Casualities.isnull().sum()

In [None]:
Casualities.info()

In [None]:
Casualities.shape

In [None]:
Casualities.columns

In [None]:
Correlation = Accidents.corr()
plt.figure(figsize=(20,10))
sns.heatmap(Correlation, annot=True)

In [None]:
#Distribution of accients based on week:
plt.figure(figsize=(10,5))
Accidents['Day_of_Week'].hist(color='purple')
plt.grid(alpha=0.4)

### Accident Severity distribution based on Road type : 

In [None]:
#Accidents severity based on road type:
plt.figure(figsize=(10,5))
ax=sns.countplot('Road_Type',hue='Accident_Severity',data=Accidents)
ax.set_xticklabels(['Roundabout','One_way_street ','Dual_carriageway','Single carriageway','Slip road','Unknown'])
plt.xticks(rotation=90)
plt.legend(['Fatal','Serious','Slight'])
plt.grid(alpha=0.4)

In [None]:
#Scatter plot of Longitude/Latitude
plt.figure(figsize=(10,5))
plt.scatter(x='Latitude',y='Longitude',data=Accidents,c='orange')
plt.xlabel('Latitude',fontsize=12)
plt.ylabel('Longitude',fontsize=12)
plt.grid(alpha=0.4)

In [None]:
plt.figure(figsize=(10,15))
sns.jointplot(x='Weather_Conditions',y='Number_of_Casualties',data=Accidents)

In [None]:
plt.figure(figsize=(10,5))
ax=sns.countplot('Light_Conditions',data=Accidents,color='orange') 
ax.set_xticklabels(['Daylight','Darkness - lights lit',
                    'Darkness - lights unlit','Darkness - no lighting','Darkness - lighting unknown'])
plt.xticks(Rotation=90)
plt.title('ACCIDENT RATES BASED ON LIGHT CONDITIONS',fontsize=15)
plt.grid(alpha=0.4)
plt.show()

In [None]:
correlation=Casualities.corr()
plt.figure(figsize=(25,8))
sns.heatmap(Correlation,annot=True)

In [None]:
plt.figure()
Casualities.hist(figsize=(15,15));

### The following plot clearly shows that most of the casualities suffered in accidents are mostly teenagers and youngsters around the age of 17-21.

In [None]:
#Distrubution of casualities based on age:
plt.figure(figsize=(20,5))
sns.countplot('Age_of_Casualty',data=Casualities)
plt.title('CASUALITY DISTRIBUTION BASED ON AGE', fontsize=15)
plt.xticks(rotation=90)
plt.grid(alpha=0.4)
plt.show()

### From the below plot it clearly indicates most of the casualities suffered are males from every age groups: 

In [None]:
plt.figure(figsize=(40,10))
sns.countplot('Age_of_Casualty',hue='Sex_of_Casualty',data=Casualities)
plt.xticks(fontsize=15,rotation=90)
plt.legend(['Missing data','Male','Female'],prop={'size': 30}, loc=1)
plt.grid(alpha=0.4)
plt.xlabel('AGE_OF_CASUALITIES', fontsize=25)
plt.ylabel('COUNT', fontsize=25)
plt.show()

In [None]:
plt.figure()
ax=sns.countplot('Casualty_Class', data=Casualities)
ax.set_xticklabels(['Driver or rider','Passenger','Pedestrian'])
plt.grid(alpha=0.4)
plt.show()

In [None]:
Vehicles.head()

In [None]:
Vehicles.drop(['Vehicle_Reference',
       'Casualty_Reference', 'Casualty_Class', 'Sex_of_Casualty',
       'Age_of_Casualty', 'Age_Band_of_Casualty', 'Casualty_Severity',
       'Pedestrian_Location', 'Pedestrian_Movement', 'Car_Passenger',
       'Bus_or_Coach_Passenger', 'Pedestrian_Road_Maintenance_Worker',
       'Casualty_Type', 'Casualty_Home_Area_Type'], axis=1, inplace=True)

In [None]:
Dataframe1=Accidents.merge(Casualities, right_index=True, left_index=True)
Dataframe2=Dataframe1.merge(Vehicles, right_index=True, left_index=True)
Dataframe2.head()

In [None]:
Dataframe2.columns

### Accident severity distribution based on sex:

In [None]:
plt.figure(figsize=(10,5))
ax=sns.countplot('Accident_Severity',hue='Sex_of_Casualty',data=Dataframe1)
ax.set_xticklabels(['Fatal','Serious','Slight'])
plt.legend(['Unlabelled','Male','Female'],fontsize=12)
plt.title('ACCIDENT SEVERITY DISTRIBUTION BASED ON SEX', fontsize=15)
plt.grid(alpha=0.4)

### Accident severity distribution based on casuality class:

In [None]:
plt.figure(figsize=(10,5))
ax=sns.countplot('Accident_Severity',hue='Casualty_Class',data=Dataframe1)
plt.legend(['Driver or rider','Passenger','Pedestrian'],fontsize=12)
ax.set_xticklabels(['Fatal','Serious','Slight'])
plt.title('ACCIDENT SEVERITY DISTRIBUTION BASED ON CASUALITY CLASS', fontsize=15)
plt.grid(alpha=0.4)

### Accident Severity distribution based on Light conditions:

In [None]:
plt.figure(figsize=(10,5))
ax=sns.countplot('Accident_Severity', hue='Light_Conditions', data=Dataframe2)
plt.legend(['Daylight','Darkness - lights lit','Darkness - lights unlit','Darkness - no lighting','Darkness - lighting unknown'],fontsize=12)
ax.set_xticklabels(['Fatal','Serious','Slight'])
plt.title('ACCIDENT SEVERITY DISTRIBUTION BASED ON LIGHT CONDITIONS', fontsize=15)
plt.grid(alpha=0.4)

### Accident severity distribution based on road type

In [None]:
plt.figure(figsize=(10,5))
sns.countplot('Accident_Severity', hue='Road_Type', data=Dataframe2)
ax.set_xticklabels(['Fatal','Serious','Slight'])
plt.legend(['Roundabout','One way street','Dual carriageway','Single carriageway','Slip road','Unknown'], fontsize=12)
plt.title('ACCIDENT SEVERITY DISTRIBUTION BASED ON ROAD TYPE', fontsize=15)
plt.grid(alpha=0.4)

In [None]:
#Scatter plot of Longitude/Latitude
plt.figure(figsize=(10,5))
sns.jointplot(x='Latitude',y='Longitude',kind = 'scatter',data=Dataframe2)
plt.xlabel('Latitude',fontsize=12)
plt.ylabel('Longitude',fontsize=12)
plt.grid(alpha=0.4)

In [None]:
plt.figure(figsize=(10,5))
ax=sns.countplot('Accident_Severity', hue='Speed_limit', data=Dataframe2)
plt.grid(alpha=0.4)
ax.set_xticklabels(['Fatal','Serious','Slight'])
plt.legend(loc=2) 

In [None]:
Dataframe3=Dataframe2[['Latitude','Longitude']].dropna()
locationlist = Dataframe3.values.tolist()
len(locationlist)
locationlist[7]
Dataframe3.shape

In [None]:
import folium
from folium.plugins import MarkerCluster
m = folium.Map(location=[51.5085300,-0.1257400], tiles='openstreetmap', zoom_start=15)
marker_cluster = MarkerCluster().add_to(m)
for i in range(0,len(locationlist)):
    folium.CircleMarker(locationlist[i],radius = float(Dataframe2["Accident_Severity"].values[0]/1e7),
                        popup="Accident Severity : %s"%Dataframe2["Accident_Severity"].values[0],color="red",fill_color='pink').add_to(m)
m

### From the above analysis it clearly shows that most of the accidents occured in the region comes under the age group of 17-21, and notably most of them are falls under the male category. Also from the above analysis, it clearly shows that most of the accidents are occured in the daylight time at single carriageway. Most of the victims of these car accidents are drivers and more than one half of them are males. 