In [8]:
from geodesk import *
import geopandas
import pandas as pd
import geojson
import json
import reverse_geocoder as rg
from datetime import date

In [None]:
#####################################################
# Variables to amend before running the code 
#####################################################

# set the filter area to retrieve data features from OpenStreetMap 
filterarea = Features('data/london')

# define project area bounds
west=-0.64081
south=51.65435
east=-0.60184 
north=51.67426

# set the reference details for the project url
url = "https://oldamersham.uk"

In [4]:
# creates bounding box and coordinate objects
# get all features in the area, show number found
area_bounds = Box(
    west=west, south=south,
    east=east, north=north)
coords = (south,west),(north,east)
thisyear = date.today().year
source = "https://www.openstreetmap.org/"
gazurl = "https://www.visionofbritain.org.uk/data/"

features_in_area = filterarea(area_bounds)
features_in_area.count

2353

In [5]:
# get area data from coordinates
codes = rg.search(coords)
cc = codes[0]['cc']
area = codes[0]['name']
widerarea = codes[0]['admin2']
country = codes[0]['admin1']
print(cc, codes)

Loading formatted geocoded file...
GB [{'lat': '51.66667', 'lon': '-0.61667', 'name': 'Amersham', 'admin1': 'England', 'admin2': 'Buckinghamshire', 'cc': 'GB'}, {'lat': '51.67468', 'lon': '-0.60742', 'name': 'Amersham on the Hill', 'admin1': 'England', 'admin2': 'Buckinghamshire', 'cc': 'GB'}]


In [6]:
# get just the roads in the area
arearoads = features_in_area("w[highway][name]")
arearoads.count

141

In [None]:
# convert to geojson, then to geopandas, then merge sections of roads together
arearoadsgj = arearoads.geojson
arearoadsgdf = geopandas.read_file(arearoadsgj)
arearoadsdf = arearoadsgdf.dissolve(by = 'name')

{"type":"FeatureCollection","generator":"geodesk-py/0.2.0","features":[{"type":"Feature","id":"W599925911","geometry":{"type":"LineString","coordinates":[[-0.6785971,51.6612469],[-0.6779026,51.6613897],[-0.6764289,51.6617124],[-0.676004,51.6617811],[-0.6755781,51.6618058],[-0.6746161,51.6617864],[-0.673346,51.6617607],[-0.6723188,51.6617315],[-0.6708506,51.6616889],[-0.6708031,51.6616875],[-0.6707585,51.6616857],[-0.6694852,51.6616339],[-0.6683919,51.6615512],[-0.6674498,51.6614461],[-0.666635,51.6612881],[-0.6662196,51.6611945],[-0.6658587,51.6611132],[-0.6654191,51.6610008],[-0.6649599,51.6608834],[-0.6641489,51.6606565],[-0.6631182,51.6603144],[-0.6624869,51.6600965],[-0.6618244,51.6598474],[-0.6611726,51.6595294],[-0.6601856,51.6590103],[-0.6595543,51.6587181],[-0.657675,51.6578309],[-0.6566968,51.6573957],[-0.6552083,51.6567522],[-0.6524313,51.6556356],[-0.6504128,51.6548297],[-0.6482418,51.6539716],[-0.6475525,51.6537437],[-0.6470161,51.6535689],[-0.6463427,51.6534028],[-0.645814

In [10]:
# duplicate index (roadname) column as title, then select needed fields to create df, amend road types to include 'road'
arearoadsdf['title'] = arearoadsdf.index
arearoadsdf['ccodes'] = cc
arearoadsdf['fclasses'] = "R"
arearoadsdf.loc[arearoadsdf['highway'] == 'residential', 'highway'] = 'residential road'
arearoadsdf.loc[arearoadsdf['highway'] == 'primary', 'highway'] = 'primary road'
arearoadsdf.loc[arearoadsdf['highway'] == 'tertiary', 'highway'] = 'tertiary road'
arearoadsdf.head(2)

Unnamed: 0_level_0,geometry,id,highway,source,surface,lit,maxspeed,ref,oneway,foot,...,parking:left:orientation,parking:right:orientation,bicycle_road,flood_prone,parking:both:zone,width,source:width,title,ccodes,fclasses
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Baden Road,"LINESTRING (-0.1257 51.58847, -0.1255 51.58908)",W8037977,residential road,,asphalt,yes,20 mph,,,,...,,,,,,,,Baden Road,GB,R
Beechwood Road,"LINESTRING (-0.12662 51.58922, -0.12566 51.589...",W8037990,residential road,,,,20 mph,,,,...,,,,,,,,Beechwood Road,GB,R


In [11]:
# add in a description of the roads
arearoadsdf['description'] = "A "+arearoadsdf['highway']+" in "+area+", "+widerarea+", "+country
arearoadslpdf = arearoadsdf[['id','title','description','geometry','ccodes','fclasses']]
arearoadslpdf.head(2)

Unnamed: 0_level_0,id,title,description,geometry,ccodes,fclasses
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Baden Road,W8037977,Baden Road,"A residential road in Crouch End, Greater Lond...","LINESTRING (-0.1257 51.58847, -0.1255 51.58908)",GB,R
Beechwood Road,W8037990,Beechwood Road,"A residential road in Crouch End, Greater Lond...","LINESTRING (-0.12662 51.58922, -0.12566 51.589...",GB,R


In [12]:
# convert to json, add context
jroads = arearoadslpdf.to_json()
roadsj = json.loads(jroads)
roadsj["@context"] = url

In [None]:
# create a geodataframe of places from the 1900 gazetteer, 
# filter by bounding area, 
# get names and convert to proper nouns
# return as list

streets1900 = list()

gaz = pd.read_csv('data/gb1900_abridged.csv', encoding='utf-16')
gdf = geopandas.GeoDataFrame(gaz, geometry=geopandas.points_from_xy(gaz.longitude, gaz.latitude), crs="EPSG:4326")
area1900 = gdf.cx[east:west,south:north]
names1900 = area1900['final_text'].tolist()
for i in names1900:
    s = i.title().replace("'S","'s")
    streets1900.append(s)

  gaz = pd.read_csv('GB1900_gazetteer_abridged_july_2018/gb1900_abridged.csv', encoding='utf-16')


In [None]:
# add approximated data for start date of street
# add citations for OpenStreetMap and the GB1900 Gazetteer 
thisyear = date.today().year
for i in roadsj['features']:
    prop = i["properties"]
    rn = prop['title']
    i["@id"] = url+"places/"+prop["id"]
    if rn in streets1900:
        i['when'] = {"timespans": [{ "start": { 'latest':'1900'}}]}
    else:
        i['when'] = {"timespans": [{ "start": { "earliest": "1901" }}]}
    i['names'] = [{"toponym":rn, "lang":"en", "citations": [{"label": "GB1900 Gazetteer (Abridged)", "year": 2018, "@id":gazurl},{"label": "OpenStreetMap", "year": thisyear, "@id":source}] }]
    i['descriptions'] = [{"value": prop['description'], "lang": "en", "source": source }]
    i.pop('id')
    prop.pop('id')
    prop.pop('description')


In [15]:
# write the data our in linked places format
with open('roads.json', 'w', encoding='utf-8') as f:
    json.dump(roadsj, f, ensure_ascii=False, indent=4)


In [None]:
# print out the json file for checking
roadsj

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'properties': {'title': 'Baden Road', 'ccodes': 'GB', 'fclasses': 'R'},
   'geometry': {'type': 'LineString',
    'coordinates': [[-0.1257004, 51.5884661], [-0.125505, 51.5890785]]},
   '@id': 'https://campsbourne.uk/memorymapper/places/W8037977',
   'when': {'timespans': [{'start': {'earliest': '1901'}}]},
   'names': [{'toponym': 'Baden Road',
     'lang': 'en',
     'citations': [{'label': 'OpenStreetMap',
       'year': 2025,
       '@id': 'https://www.openstreetmap.org/'}]}],
   'descriptions': [{'value': 'A residential road in Crouch End, Greater London, England',
     'lang': 'en',
     'source': 'https://www.openstreetmap.org/'}]},
  {'type': 'Feature',
   'properties': {'title': 'Beechwood Road', 'ccodes': 'GB', 'fclasses': 'R'},
   'geometry': {'type': 'LineString',
    'coordinates': [[-0.126618, 51.5892173],
     [-0.1256649, 51.5893584],
     [-0.1254461, 51.5893656],
     [-0.1250202, 51.5893694],
     [-0