# Accident Data
### Analyst: Ryann Kim Sesgundo

#### Import dependencies

In [1]:
import pandas as pd, numpy as np, seaborn as sns, matplotlib.pyplot as plt
from scipy.stats import f_oneway

In [2]:
data = pd.read_csv("datasets\\accident_data.csv")

In [3]:
data.dtypes

Index                       object
Accident_Severity           object
Accident Date               object
Latitude                   float64
Light_Conditions            object
District Area               object
Longitude                  float64
Number_of_Casualties         int64
Number_of_Vehicles           int64
Road_Surface_Conditions     object
Road_Type                   object
Urban_or_Rural_Area         object
Weather_Conditions          object
Vehicle_Type                object
dtype: object

In [4]:
data.isnull().sum()

Index                          0
Accident_Severity              0
Accident Date                  0
Latitude                      25
Light_Conditions               0
District Area                  0
Longitude                     26
Number_of_Casualties           0
Number_of_Vehicles             0
Road_Surface_Conditions      726
Road_Type                   4520
Urban_or_Rural_Area           15
Weather_Conditions         14128
Vehicle_Type                   0
dtype: int64

In [5]:
data['Latitude'] = data['Latitude'].fillna(data['Latitude'].mode())
data['Longitude'] = data['Longitude'].fillna(data['Longitude'].mode())
data['Road_Surface_Conditions'] = data['Road_Surface_Conditions'].fillna(data['Road_Surface_Conditions'].mode()[0])
data['Urban_or_Rural_Area'] = data['Urban_or_Rural_Area'].fillna(data['Urban_or_Rural_Area'].mode()[0])
data['Road_Type'] = data['Road_Type'].fillna('Unknown Road Type')
data['Weather_Conditions'] = data['Weather_Conditions'].fillna('Unknown Weather Conditions')

In [6]:
# If ever, convert first to string then convert back to Date Time
data['Accident Date'] = data['Accident Date'].astype("str")
data['Accident Date'] = data['Accident Date'].str.strip()
data['Accident Date'] = data['Accident Date'].str.replace('/', '-')

In [7]:
data['Accident Date'] = pd.to_datetime(data['Accident Date'], dayfirst=True, errors='coerce')

In [8]:
data['Accident_Severity'] = data['Accident_Severity'].astype('category')
data['Light_Conditions'] = data['Light_Conditions'].astype('category')
data['Latitude'] = data['Light_Conditions'].astype('category')
data['Longitude'] = data['Light_Conditions'].astype('category')
data['District Area'] = data['District Area'].astype('category')
data['Road_Surface_Conditions'] = data['Road_Surface_Conditions'].astype('category')
data['Road_Type'] = data['Road_Type'].astype('category')
data['Urban_or_Rural_Area'] = data['Urban_or_Rural_Area'].astype('category')
data['Weather_Conditions'] = data['Weather_Conditions'].astype('category')
data['Vehicle_Type'] = data['Vehicle_Type'].astype('category')

In [9]:
data.dtypes

Index                              object
Accident_Severity                category
Accident Date              datetime64[ns]
Latitude                         category
Light_Conditions                 category
District Area                    category
Longitude                        category
Number_of_Casualties                int64
Number_of_Vehicles                  int64
Road_Surface_Conditions          category
Road_Type                        category
Urban_or_Rural_Area              category
Weather_Conditions               category
Vehicle_Type                     category
dtype: object

In [10]:
data.isnull().sum()

Index                      0
Accident_Severity          0
Accident Date              0
Latitude                   0
Light_Conditions           0
District Area              0
Longitude                  0
Number_of_Casualties       0
Number_of_Vehicles         0
Road_Surface_Conditions    0
Road_Type                  0
Urban_or_Rural_Area        0
Weather_Conditions         0
Vehicle_Type               0
dtype: int64

### Adding more fields

In [11]:
data['Year'] = data['Accident Date'].dt.year
data['Month'] = data['Accident Date'].dt.month
data['DayOfWeek'] = data['Accident Date'].dt.dayofweek
data

Unnamed: 0,Index,Accident_Severity,Accident Date,Latitude,Light_Conditions,District Area,Longitude,Number_of_Casualties,Number_of_Vehicles,Road_Surface_Conditions,Road_Type,Urban_or_Rural_Area,Weather_Conditions,Vehicle_Type,Year,Month,DayOfWeek
0,200701BS64157,Serious,2019-06-05,Darkness - lights lit,Darkness - lights lit,Kensington and Chelsea,Darkness - lights lit,1,2,Dry,Single carriageway,Urban,Fine no high winds,Car,2019,6,2
1,200701BS65737,Serious,2019-07-02,Daylight,Daylight,Kensington and Chelsea,Daylight,1,2,Wet or damp,Single carriageway,Urban,Raining no high winds,Car,2019,7,1
2,200701BS66127,Serious,2019-08-26,Darkness - lighting unknown,Darkness - lighting unknown,Kensington and Chelsea,Darkness - lighting unknown,1,3,Dry,Unknown Road Type,Urban,Unknown Weather Conditions,Taxi/Private hire car,2019,8,0
3,200701BS66128,Serious,2019-08-16,Daylight,Daylight,Kensington and Chelsea,Daylight,1,4,Dry,Single carriageway,Urban,Fine no high winds,Bus or coach (17 or more pass seats),2019,8,4
4,200701BS66837,Slight,2019-09-03,Darkness - lights lit,Darkness - lights lit,Kensington and Chelsea,Darkness - lights lit,1,2,Dry,Unknown Road Type,Urban,Unknown Weather Conditions,Other vehicle,2019,9,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
660674,201091NM01760,Slight,2022-02-18,Daylight,Daylight,Highland,Daylight,2,1,Dry,Single carriageway,Rural,Fine no high winds,Car,2022,2,4
660675,201091NM01881,Slight,2022-02-21,Darkness - no lighting,Darkness - no lighting,Highland,Darkness - no lighting,1,1,Frost or ice,Single carriageway,Rural,Fine no high winds,Car,2022,2,0
660676,201091NM01935,Slight,2022-02-23,Daylight,Daylight,Highland,Daylight,1,3,Frost or ice,Single carriageway,Rural,Fine no high winds,Car,2022,2,2
660677,201091NM01964,Serious,2022-02-23,Darkness - no lighting,Darkness - no lighting,Highland,Darkness - no lighting,1,2,Wet or damp,Single carriageway,Rural,Fine no high winds,Motorcycle over 500cc,2022,2,2


### 1. Common Weather Conditions

In [12]:
data['Weather_Conditions'].mode()[0]

'Fine no high winds'

### 2. Common Vehicle

In [13]:
data['Vehicle_Type'].mode()[0]

'Car'

### 3. Common Location
#### Rural or Urban

In [14]:
data['Urban_or_Rural_Area'].mode()[0]

'Urban'

### 4. Most accident date

In [15]:
accident_date = data['Accident Date'].mode()[0]
accident_date

Timestamp('2019-11-30 00:00:00')

### 5. Number of accidents result from #4

In [16]:
### Using the earlier data, where it gives the data how many are the accidents recorded with this date
data[data['Accident Date'] == accident_date].count()['Index']

np.int64(704)

### 6. Common District Area Involve

In [17]:
district_data = data['District Area'].mode()[0]
district_data

'Birmingham'

### 7. Common weather where the accidents in Common District Area Involve

In [18]:
weather_x_district = data[data['District Area'] == district_data]['Weather_Conditions'].mode()[0]
weather_x_district

'Fine no high winds'

### 8.

In [19]:
vehicles_x_weather_x_district = data[(data['District Area'] == district_data) & (data['Weather_Conditions'] == weather_x_district)]
vehicles_x_weather_x_district['Vehicle_Type'].mode()[0]

'Car'

### 7. Average Accident rate in Urban and Rural Area

In [20]:
data[data['Urban_or_Rural_Area'] == 'Rural'].count()


Index                      238990
Accident_Severity          238990
Accident Date              238990
Latitude                   238990
Light_Conditions           238990
District Area              238990
Longitude                  238990
Number_of_Casualties       238990
Number_of_Vehicles         238990
Road_Surface_Conditions    238990
Road_Type                  238990
Urban_or_Rural_Area        238990
Weather_Conditions         238990
Vehicle_Type               238990
Year                       238990
Month                      238990
DayOfWeek                  238990
dtype: int64