In [1]:
import folium
from folium.plugins import HeatMap
import numpy as np
import matplotlib as plt
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import f_oneway

In [2]:
accident = pd.read_csv('dataset\\uk_road_accident.csv')

In [3]:
accident.isnull().sum()

Index                          0
Accident_Severity              0
Accident Date                  0
Latitude                      25
Light_Conditions               0
District Area                  0
Longitude                     26
Number_of_Casualties           0
Number_of_Vehicles             0
Road_Surface_Conditions      726
Road_Type                   4520
Urban_or_Rural_Area           15
Weather_Conditions         14128
Vehicle_Type                   0
dtype: int64

In [4]:
accident['Latitude'] = accident['Latitude'].fillna(accident['Latitude'].mean())
accident['Longitude'] = accident['Longitude'].fillna(accident['Longitude'].mean())
accident['Road_Surface_Conditions'] = accident['Road_Surface_Conditions'].fillna(accident['Road_Surface_Conditions'].mode()[0])
accident['Road_Type'] = accident['Road_Type'].fillna(accident['Road_Type'].mode()[0])
accident['Urban_or_Rural_Area'] = accident['Urban_or_Rural_Area'].fillna(accident['Urban_or_Rural_Area'].mode()[0])
accident['Weather_Conditions'] = accident['Weather_Conditions'].fillna(accident['Weather_Conditions'].mode()[0])

In [5]:
accident['Accident Date'] = accident['Accident Date'].str.strip()
accident['Accident Date'] = accident['Accident Date'].astype('str')
accident['Accident Date'] = accident['Accident Date'].str.replace('/','-')
accident['Accident Date'] = pd.to_datetime(accident['Accident Date'],dayfirst = True, errors = 'coerce')

In [6]:
accident['Year'] = accident['Accident Date'].dt.year
accident['Month'] = accident['Accident Date'].dt.month
accident['Day'] = accident['Accident Date'].dt.day
accident['DayOfWeek'] = accident['Accident Date'].dt.dayofweek

In [22]:
accident.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660679 entries, 0 to 660678
Data columns (total 18 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   Index                    660679 non-null  object        
 1   Accident_Severity        660679 non-null  object        
 2   Accident Date            660679 non-null  datetime64[ns]
 3   Latitude                 660679 non-null  float64       
 4   Light_Conditions         660679 non-null  object        
 5   District Area            660679 non-null  object        
 6   Longitude                660679 non-null  float64       
 7   Number_of_Casualties     660679 non-null  int64         
 8   Number_of_Vehicles       660679 non-null  int64         
 9   Road_Surface_Conditions  660679 non-null  object        
 10  Road_Type                660679 non-null  object        
 11  Urban_or_Rural_Area      660679 non-null  object        
 12  Weather_Conditio

<h1>STEP 1) Extract Latitude and Longitude from the DataFrame </h1>

In [7]:
london_df = accident[accident['District Area'] == 'City of London']

In [8]:
locations = list(zip(london_df['Latitude'], london_df['Longitude']))

<h1>STEP 2) Create a base app centered in a specific location</h1>

In [9]:
m = folium.Map(location=[london_df['Latitude'].mean(), london_df['Longitude'].mean()], zoom_start=10)

<h1>STEP 3) Add the heatmap layer</h1>

In [10]:
HeatMap(locations).add_to(m)

<folium.plugins.heat_map.HeatMap at 0x28dd5e8d6d0>

<h1>STEP 4) Save or Displat the map</h1>

In [36]:
m.save('accident_heatmap.htm')
m

In [14]:
accident['District Area'].unique()

array(['Kensington and Chelsea', 'Westminster', 'Richmond upon Thames',
       'Hammersmith and Fulham', 'Hounslow', 'Tower Hamlets',
       'City of London', 'Southwark', 'Camden', 'Hackney', 'Islington',
       'Barnet', 'Brent', 'Haringey', 'Merton', 'Ealing', 'Enfield',
       'Greenwich', 'Newham', 'London Airport (Heathrow)', 'Hillingdon',
       'Waltham Forest', 'Redbridge', 'Barking and Dagenham', 'Bromley',
       'Havering', 'Croydon', 'Lambeth', 'Wandsworth', 'Sutton', 'Bexley',
       'Lewisham', 'Harrow', 'Kingston upon Thames', 'Barrow-in-Furness',
       'South Lakeland', 'Carlisle', 'Eden', 'Allerdale', 'Copeland',
       'Fylde', 'Blackpool', 'Wyre', 'Lancaster', 'Chorley',
       'West Lancashire', 'South Ribble', 'Preston',
       'Blackburn with Darwen', 'Hyndburn', 'Ribble Valley', 'Burnley',
       'Pendle', 'Rossendale', 'Wirral', 'Liverpool', 'Sefton',
       'Knowsley', 'St. Helens', 'Manchester', 'Salford', 'Tameside',
       'Stockport', 'Bolton', 'Wigan', '

<h1> 1) What is the number of accidents on the District of Bolton </h1>

In [46]:
from folium.plugins import HeatMap
import pandas as pd
import numpy as np

Bolton_df = accident[accident['District Area'] == 'Bolton']
Bolton_df_clean = Bolton_df.dropna(subset=['Latitude', 'Longitude'])
Bolton_locations = list(zip(Bolton_df_clean['Latitude'], Bolton_df_clean['Longitude']))
center_lat = Bolton_df_clean['Latitude'].mean()
center_lon = Bolton_df_clean['Longitude'].mean()

Bolton_m = folium.Map(
    location=[center_lat, center_lon],
    zoom_start=12,
    tiles='OpenStreetMap'
)

HeatMap(
    locations,
    min_opacity=0.2,
    radius=15,
    blur=10,
    max_zoom=1
).add_to(Bolton_m) 
Bolton_m.save('london_accident_heatmap.html')
print("Heatmap created successfully!")
print(f"Map center: ({center_lat:.6f}, {center_lon:.6f})")
print(f"The number of accidents that happened on Bolton is: {len(locations):,}")
Bolton_m

Heatmap created successfully!
Map center: (53.572276, -2.446266)
The number of accidents that happened on Bolton is: 1,317


<h1>2) What is the number of Severe accidents that happend on Camdem </h1>

In [47]:

Camden_deadly = accident[(accident['Accident_Severity'] != 'Slight') & 
                        (accident['District Area'] == 'Camden') & (accident['Year'] == 2020)]

# Step 2: Clean the data - remove any missing coordinates
Camden_df_clean = Camden_deadly.dropna(subset=['Latitude', 'Longitude'])
# Step 3: Create locations list (lat, lon pairs)
locations = list(zip(Camden_df_clean['Latitude'], Camden_df_clean['Longitude']))
# Step 4: Create the map
center_lat = Camden_df_clean['Latitude'].mean()
center_lon = Camden_df_clean['Longitude'].mean()

a = folium.Map(
    location=[center_lat, center_lon],
    zoom_start=12,
    tiles='OpenStreetMap'
)
# Step 5: Add heatmap
HeatMap(
    locations,
    min_opacity=0.2,
    radius=15,
    blur=10,
    max_zoom=1
).add_to(a) 
print("Heatmap created successfully!")
print(f"Map center: ({center_lat:.6f}, {center_lon:.6f})")
print(f"Total Severe accidents ( Fatal & Serious ) : {len(locations):,}")
a

Heatmap created successfully!
Map center: (51.535689, -0.143146)
Total Severe accidents ( Fatal & Serious ) : 116


<h1> 3) What's the number of accidents during Raining and High winds in the city of Lancaster  </h1>

In [48]:
from folium.plugins import HeatMap
import folium

lancaster = accident[(accident['District Area'] == 'Lancaster')&(accident['Weather_Conditions'] == 'Raining + high winds')]
lancaster_df_clean = lancaster.dropna(subset=['Latitude', 'Longitude'])
locations = list(zip(lancaster_df_clean['Latitude'], lancaster_df_clean['Longitude']))
center_lat = lancaster_df_clean['Latitude'].mean()
center_lon = lancaster_df_clean['Longitude'].mean()

c = folium.Map(
    location=[center_lat, center_lon],
    zoom_start=12,
    tiles='OpenStreetMap'
)

HeatMap(
    locations,
    min_opacity=0.2,
    radius=15,
    blur=10,
    max_zoom=1
).add_to(c) 
c.save('lancaster_accident_heatmap.html')

print("Heatmap created successfully!")
print(f"Map center: ({center_lat:.6f}, {center_lon:.6f})")
print(f"Total accidents on Lancaster involving rain and high winds: {len(lancaster):,}")
c

Heatmap created successfully!
Map center: (54.067294, -2.790350)
Total accidents on Lancaster involving rain and high winds: 33


<h1> 4) The number of accidents that occured during on weekends vs weekdays on Burnley  </h1>

In [59]:
burnley = accident[
    (accident['District Area'] == 'Burnley') &
    (~accident['DayOfWeek'].isin(['Saturday', 'Sunday']))
]
burnley_df_clean = burnley.dropna(subset=['Latitude', 'Longitude'])
burnley_locations = list(zip(burnley_df_clean['Latitude'], burnley_df_clean['Longitude']))
burnley_center_lat = burnley_df_clean['Latitude'].mean()
burnley_locations = list(zip(burnley_df_clean['Latitude'], burnley_df_clean['Longitude']))
burnley_center_lat = burnley_df_clean['Latitude'].mean()
burnley_center_lon = burnley_df_clean['Longitude'].mean()

burnley_m = folium.Map(
    location=[burnley_center_lat, burnley_center_lon],
    zoom_start=12,
    tiles='OpenStreetMap'
)

HeatMap(
    burnley_locations,
    min_opacity=0.2,
    radius=15,
    blur=10,
    max_zoom=1
).add_to(burnley_m) 

print("Heatmap created successfully!")
print(f"Map center: ({center_lat:.6f}, {center_lon:.6f})")
burnley_m

Heatmap created successfully!
Map center: (54.067294, -2.790350)


<h1> 5) Which districts experience more severe accidents on Richmond upon tames, using cars </h1>

In [69]:
R_df = accident[(accident['District Area'] == 'Richmond upon Thames') & (accident['Vehicle_Type'] =='Car') & (accident['Accident_Severity'] !='Slight')]
R_df_clean = R_df.dropna(subset=['Latitude', 'Longitude'])
R_locations = list(zip(R_df_clean['Latitude'], R_df_clean['Longitude']))
R_center_lat = R_df_clean['Latitude'].mean()
R_center_lon = R_df_clean['Longitude'].mean()

R_m = folium.Map(
    location=[R_center_lat, R_center_lon],
    zoom_start=12,
    tiles='OpenStreetMap'
)

HeatMap(
    R_locations,
    min_opacity=0.2,
    radius=15,
    blur=10,
    max_zoom=1
).add_to(R_m) 
R_m.save('london_accident_heatmap.html')
print("Heatmap created successfully!")
print(f"Map center: ({center_lat:.6f}, {center_lon:.6f})")
print(f"The number of accidents that happened on Bolton is: {len(locations):,}")
R_m

Heatmap created successfully!
Map center: (54.067294, -2.790350)
The number of accidents that happened on Bolton is: 33
