In [2]:
import pandas as pd
import osmnx as ox
from pyproj import Transformer

file_path = '../data/london_bike_network.graphml'
G = ox.load_graphml(file_path)

In [4]:
df_acc = pd.read_csv("../data/road_accident.csv")
df_acc

Unnamed: 0,_Collision Id,Borough Name,Easting,Northing,_Casualty Count,_Casualty Severity,Collision Location
0,1240486807,Camden,527188,184782,1.0,Slight,"On Belsize Park Gardens, Near The Junction Wit..."
1,1240486821,Enfield,528936,194721,2.0,Slight,"On Chase Side, Near The Junction With Chase Side."
2,1240486824,Havering,552699,185940,1.0,Slight,"On Elm Park Avenue, 25 Metres East Of The Junc..."
3,1240486825,Greenwich,545623,177185,1.0,Slight,"On Highmead, Near The Junction With Combeside."
4,1240486828,Lewisham,536554,178468,1.0,Slight,"On Grove Street, Near The Junction With Oxesta..."
...,...,...,...,...,...,...,...
15584,48241510857,City Of London,531403,181565,1.0,Slight,Holborn Circus (A4) At Junction With Holborn V...
15585,48241510863,City Of London,533736,181276,1.0,Slight,Middlesex Street (A11) Near Junction With Aldg...
15586,48241510869,City Of London,532700,181132,1.0,Slight,"Prince'S Street Near Junction With Cornhill, L..."
15587,48241510874,City Of London,533018,181360,1.0,Slight,Old Broad Street


In [6]:
df_acc.isnull().sum()

_Collision Id         0
Borough Name          0
Easting               0
Northing              0
_Casualty Count       1
_Casualty Severity    0
Collision Location    0
dtype: int64

In [10]:
missing_row = df_acc[df_acc['_Casualty Count'].isnull()]
missing_row

Unnamed: 0,_Collision Id,Borough Name,Easting,Northing,_Casualty Count,_Casualty Severity,Collision Location
9449,1240517188,Hounslow,516707,178548,,Serious,"On Boston Manor Road, 50 Metres West Of The Ju..."


In [14]:
df_acc['_Casualty Count'].fillna(1.0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_acc['_Casualty Count'].fillna(1.0, inplace=True)


In [16]:
df_acc.isnull().sum()

_Collision Id         0
Borough Name          0
Easting               0
Northing              0
_Casualty Count       0
_Casualty Severity    0
Collision Location    0
dtype: int64

In [None]:
# Create a coordinate system converter: from EPSG:27700 (British) → EPSG:4326 (WGS84)
transformer = Transformer.from_crs("EPSG:27700", "EPSG:4326", always_xy=True)

def BNGToLatLongConverter(easting, northing):

    long, lat = transformer.transform(easting, northing)
    return lat, long

In [22]:
lat_list = []
lon_list = []

for row in df_acc.itertuples():
    lat, lon = BNGToLatLongConverter(row.Easting, row.Northing)
    lat_list.append(lat)
    lon_list.append(lon)

df_acc['Latitude'] = lat_list
df_acc['Longitude'] = lon_list


In [26]:
df_acc.drop(columns=['Easting', 'Northing'], inplace=True)

In [28]:
df_acc

Unnamed: 0,_Collision Id,Borough Name,_Casualty Count,_Casualty Severity,Collision Location,Latitude,Longitude
0,1240486807,Camden,1.0,Slight,"On Belsize Park Gardens, Near The Junction Wit...",51.547605,-0.167122
1,1240486821,Enfield,2.0,Slight,"On Chase Side, Near The Junction With Chase Side.",51.636526,-0.138278
2,1240486824,Havering,1.0,Slight,"On Elm Park Avenue, 25 Metres East Of The Junc...",51.551689,0.201026
3,1240486825,Greenwich,1.0,Slight,"On Highmead, Near The Junction With Combeside.",51.474891,0.095429
4,1240486828,Lewisham,1.0,Slight,"On Grove Street, Near The Junction With Oxesta...",51.488680,-0.034575
...,...,...,...,...,...,...,...
15584,48241510857,City Of London,1.0,Slight,Holborn Circus (A4) At Junction With Holborn V...,51.517731,-0.107569
15585,48241510863,City Of London,1.0,Slight,Middlesex Street (A11) Near Junction With Aldg...,51.514587,-0.074074
15586,48241510869,City Of London,1.0,Slight,"Prince'S Street Near Junction With Cornhill, L...",51.513537,-0.089050
15587,48241510874,City Of London,1.0,Slight,Old Broad Street,51.515511,-0.084384


In [30]:
df_acc.to_csv("../data/road_accident_final.csv")