## Geo-parsing and Geo-Visualization of Road Traffic Crash Incident Locations from Print Media for Emergency Response and Planning

### Code for RTC-NER Model

#### Setup the Environment

In [4]:
!python3 -m pip install tensorflow
!pip install spacy-transformers seqeval folium

Collecting spacy-transformers
  Downloading spacy_transformers-1.3.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (197 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m197.8/197.8 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting transformers<4.37.0,>=3.4.0 (from spacy-transformers)
  Downloading transformers-4.36.2-py3-none-any.whl (8.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m60.9 MB/s[0m eta [36m0:00:00[0m
Collecting spacy-alignments<1.0.0,>=0.7.2 (from spacy-transformers)
  Downloading spacy_alignments-0.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (313 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.0/314.0 k

In [5]:
!python -m spacy info

[1m

spaCy version    3.7.4                         
Location         /usr/local/lib/python3.10/dist-packages/spacy
Platform         Linux-6.1.58+-x86_64-with-glibc2.35
Python version   3.10.12                       
Pipelines        en_core_web_sm (3.7.1)        



In [6]:
import spacy
from spacy.tokens import DocBin
from tqdm import tqdm
import json

from seqeval.metrics import classification_report #to evaluate model
from spacy.tokens import DocBin #to load model

nlp = spacy.blank("en") # load a new spacy model
db = DocBin() # create a DocBin object

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


### Load the annotated Training and Test datasets

In [None]:
rtc_train = open('rtc_train_annotations.json')
TRAIN_DATA = json.load(rtc_train)

rtc_test = open('rtc_test_annotations.json')
TEST_DATA = json.load(rtc_test)

#### Convert the .json files into .spacy format

In [None]:
# Create DocBin Objects
for text, annot in tqdm(TRAIN_DATA['annotations']):
    doc = nlp.make_doc(text)
    ents = []
    for start, end, label in annot["entities"]:
        span = doc.char_span(start, end, label=label, alignment_mode="contract")
        if span is None:
            print("Skipping entity")
        else:
            ents.append(span)
    doc.ents = ents
    db.add(doc)

db.to_disk("./training_data.spacy") # save the docbin object

for text, annot in tqdm(TEST_DATA['annotations']):
    doc = nlp.make_doc(text)
    ents = []
    for start, end, label in annot["entities"]:
        span = doc.char_span(start, end, label=label, alignment_mode="contract")
        if span is None:
            print("Skipping entity")
        else:
            ents.append(span)
    doc.ents = ents
    db.add(doc)

db.to_disk("./test_data.spacy") # save the docbin object

100%|██████████| 1/1 [00:01<00:00,  1.05s/it]
100%|██████████| 1/1 [00:00<00:00,  3.96it/s]
100%|██████████| 1/1 [00:00<00:00, 52.08it/s]


### Train the RTC-NER Model

In [None]:
! python -m spacy init config config.cfg --lang en --pipeline ner --optimize efficiency

2023-12-07 21:31:18.361856: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-07 21:31:18.361916: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-07 21:31:18.362000: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[38;5;4mℹ Generated config template specific for your use case[0m
- Language: en
- Pipeline: ner
- Optimize for: efficiency
- Hardware: CPU
- Transformer: None
[38;5;2m✔ Auto-filled config with all values[0m
[38;5;2m✔ Saved config[0m
config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy

In [None]:
! python -m spacy train config.cfg --output ./ --paths.train ./training_data.spacy --paths.dev ./test_data.spacy --gpu-id 0

2023-12-07 21:31:28.731760: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-07 21:31:28.731826: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-07 21:31:28.731882: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[38;5;4mℹ Saving to output directory: .[0m
[38;5;4mℹ Using GPU: 0[0m
[1m
[38;5;2m✔ Initialized pipeline[0m
[1m
[38;5;4mℹ Pipeline: ['tok2vec', 'ner'][0m
[38;5;4mℹ Initial learn rate: 0.001[0m
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0

## **Evaluation of the RTC-NER model**

In [7]:
!python -m spacy evaluate model-best/ "./test_data.spacy"

[38;5;4mℹ Using CPU[0m
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
[1m

TOK     100.00
NER P   93.63 
NER R   93.61 
NER F   93.62 
SPEED   12488 

[1m

               P       R       F
CASUALTY   90.28   89.44   89.86
TOWN       91.15   92.57   91.85
LGA        95.32   96.40   95.86
STATE      94.32   94.16   94.24
ROAD       97.54   97.88   97.71
INJURED    90.41   90.59   90.50
LANDMARK   92.33   85.56   88.82
HOSPITAL   95.02   95.69   95.35
SUBURB     86.84   91.67   89.19



### Toponym Recognition Using the RTC-NER Model

In [12]:
#Load the newly created model
rtc_ner = spacy.load("model-best")

In [13]:
doc1 = rtc_ner(""" No fewer than five people were confirmed dead on Sunday while 12 others sustained varying degrees of injuries in an accident near the Foursquare Camp in Ajebo on the Lagos-Ibadan Expressway.

Mrs Florence Okpe, the Public Education Officer of the Federal Road Safety Corps (FRSC) in Ogun, confirmed the development in an interview with newsmen in Abeokuta.

She stated that the injured people were taken to Victory Hospital, Ogbere for medical attention while the dead were deposited at a morgue in Ipara community, near Abeokuta.

""")

In [14]:
spacy.displacy.render(doc1, style="ent", jupyter=True) # display in Jupyter

In [15]:
for ent in doc1.ents:
	print(ent.text, " ->>>> ", ent.label_)

five people  ->>>>  CASUALTY
12 others  ->>>>  INJURED
near the Foursquare Camp  ->>>>  LANDMARK
Ajebo  ->>>>  TOWN
Lagos-Ibadan Expressway.  ->>>>  ROAD
Victory Hospital, Ogbere  ->>>>  HOSPITAL


### Toponym Resolution (Geocoding)

In [18]:
#Displaying selected RTC location information
locations = []
location_tag = []

tags = ['ROAD', 'LANDMARK', 'TOWN', 'LGA', 'STATE', 'HOSPITAL']

for tag in tags:
  for ent in doc1.ents:
      if ent.label_ == tag:
        locations.append(ent.text)
        #location_tag.append(tag)

#print(locations)
#print(location_tag)

#function to concatenate all elements of locations list (except the last element) as rtc_site
def conexclast(strlst):
    output = ""
    for elem in strlst:
       strng = str(elem)
       output = output+strng
    return ', '.join(strlst[0:-1])


rtc_site = conexclast(locations[1:])
rtc_road = locations[0]
hospital = locations[-1]


rtc_places = []
rtc_places.append(rtc_road)
rtc_places.append(rtc_site)
rtc_places.append(hospital)

print("Road Traffic Crash Location details:", rtc_places)

Road Traffic Crash Location details: ['Lagos-Ibadan Expressway.', 'near the Foursquare Camp, Ajebo', 'Victory Hospital, Ogbere']


In [19]:
#Using Google maps geocoding api
import requests
import pandas as pd

API_KEY = 'xxxxxxxx' #insert your Google Geocoding API key inplace of xxxxxxxx

df = pd.DataFrame()

latitude_list = []
longitude_list = []

for loc in rtc_places:
  params = {
    'key': API_KEY,
    'address': loc
  }

  base_url = 'https://maps.googleapis.com/maps/api/geocode/json'
  response = requests.get(base_url, params=params).json()

  geometry = response['results'][0]['geometry']
  lat = geometry['location']['lat']
  lon = geometry['location']['lng']
  latitude_list.append(lat)
  longitude_list.append(lon)

df = pd.DataFrame({'Location': rtc_places})

df['Latitude'] = latitude_list
df['Longitude'] = longitude_list

df['Latitude'] = df['Latitude'].astype(float)
df['Longitude'] = df['Longitude'].astype(float)

df



Unnamed: 0,Location,Latitude,Longitude
0,Lagos-Ibadan Expressway.,6.923588,3.636422
1,"near the Foursquare Camp, Ajebo",7.10912,3.723304
2,"Victory Hospital, Ogbere",6.739754,4.164174


### Geo-Visualization

In [20]:
import folium

In [21]:
df

Unnamed: 0,Location,Latitude,Longitude
0,Lagos-Ibadan Expressway.,6.923588,3.636422
1,"near the Foursquare Camp, Ajebo",7.10912,3.723304
2,"Victory Hospital, Ogbere",6.739754,4.164174


In [23]:
#coordinate of the road
road_coord = [df['Latitude'].iloc[0], df['Longitude'].iloc[0]]
print(road_coord)

[6.9235877, 3.6364221]


In [24]:
map = folium.Map(location=road_coord, zoom_start=13)

folium.Marker(road_coord, popup='RTC Road').add_to(map)
map

In [25]:
# Coordinates of the points
points = []
for i in range(df.shape[0]):
    rtc_coords = [df['Latitude'].iloc[i], df['Longitude'].iloc[i]]
    points.append(rtc_coords)

points

[[6.9235877, 3.6364221],
 [7.1091204, 3.7233044],
 [6.7397541, 4.164174099999999]]

In [26]:
for point in points:
    folium.Marker(point, popup='Point').add_to(map)
map