## DigiMap 

http://digimap.edina.ac.uk

import libraries

In [1]:
import geopandas as gpd
import pandas as pd
import sys

import project functions

In [2]:
sys.path.append("..")
from src.geoIndexFunctions import *
from src.plotFunctions import *
from config import *

Points of Interest [CSV geospatial data], Scale 1:1250, Items: 708329, Updated: 5 March 2019, Ordnance Survey (GB), Using: EDINA Digimap Ordnance Survey Service, <https://digimap.edina.ac.uk>, Downloaded: 2019-05-12 09:25:26.149

We downloaded all pois from digimap as csv, import in QGIS, select by London GADM boundary shape and then save them as shapefile with WGS4326 projection because it was projected initially in BNG.

In [46]:
digi_path = '../../data/DigiMap/poi_2988870/poi_wgs4326_london_gadm.shp'
digi_shp = gpd.read_file(digi_path, crs = {'init' :'epsg:4326'})
digi_shp_new = digi_shp[['Name', 'PointX Cla', 'Brand', 'geometry']]
digi_shp_new.head(1)

Unnamed: 0,Name,PointX Cla,Brand,geometry
0,Chessington,3190259,,POINT (-0.3036005237590843 51.35673720534218)


We downloaded the points-of-interest-classification-scheme.pdf from https://www.ordnancesurvey.co.uk/business-and-government/help-and-support/products/points-of-interest.html and had to rearrange the values to meet our joining data purpose.

In [48]:
digi_class_poi = pd.read_excel('../data input/Digimap/Edina/points-of-interest-classification-scheme.xlsx',\
                               index_col=None,\
                               dtype={'group id': str, 'category id': str,'class id': str,'PointX Cla': str})
digi_class_poi.head(1)

Unnamed: 0,group id,group name,category id,category name,class id,class name,PointX Cla
0,1,"Accommodation, eating and drinking",1,"Accommodation, eating and drinking",3,Bed and breakfast and backpacker Accommodation,1010003


Now merge the geodataframe with the classification dataframe to add the classification info into the geodataframe.

In [6]:
digi_poi = pd.merge(digi_shp_new,digi_class_poi, on='PointX Cla', how='left')
digi_poi.head(1)

Unnamed: 0,Name,PointX Cla,Brand,geometry,group id,group name,category id,category name,class id,class name
0,Chessington,3190259,,POINT (-0.3036005237590843 51.35673720534218),3,Attractions,19,Landscape features,259,Trigonometric points


In [7]:
digi_poi.count()

Name             357081
PointX Cla       357081
Brand             43864
geometry         357081
group id         357081
group name       357081
category id      357081
category name    357081
class id         357081
class name       357081
dtype: int64

Explore the groups and categories within our dataset

In [8]:
digi_poi['group name'].nunique()

9

In [9]:
digi_poi['group name'].unique()

array(['Attractions', 'Education and health', 'Commercial services',
       'Manufacturing and production', 'Public infrastructure',
       'Sport and entertainment', 'Transport',
       'Accommodation, eating and drinking', 'Retail'], dtype=object)

we need to keep only the ones that will help us address the question of the project

In [10]:
digi_poi = digi_poi[~digi_poi['group name'].isin(['Commercial services'])]
len(digi_poi)

228945

In [11]:
digi_poi['category name'].nunique()

38

In [12]:
digi_poi = digi_poi[~digi_poi['category name'].isin(digi_category_not)]
len(digi_poi)

150276

In [13]:
digi_poi['class name'].nunique()

263

In [14]:
# digi_poi['class name'].unique()

In [15]:
digi_poi = digi_poi[~digi_poi['class name'].isin(digi_class_not)]
len(digi_poi)

121978

In [16]:
digi_poi.reset_index(drop=True,inplace=True)

In [17]:
# single core
digi_poi['lat'] = digi_poi['geometry'].apply(lambda x: x.y)
digi_poi['lon'] = digi_poi['geometry'].apply(lambda x: x.x)

## Visualisation

### Folium

In [18]:
import folium
from folium.plugins import MarkerCluster

digi_map = folium.Map(location=[51.509091, -0.124038], zoom_start=11)
marker_cluster = MarkerCluster().add_to(digi_map)

In [19]:
locations = digi_poi[["lat","lon"]]
locationlist = locations.values.tolist()

In [20]:
# point cluster
for point in range(0, len(locationlist[:1000])):
    folium.Marker(locationlist[point], popup=digi_poi['class name'][point]).add_to(marker_cluster)

In [21]:
digi_map#.save('digi_leaflet.html')

In [21]:
# digi_poi.to_csv('/home/lefteris/Desktop/trajectories/data/DigiMap/poi_2988870/digimap_poi_london.csv',index=False)

### Get the hex list for London boundary

In [49]:
csv_path= '../data output/0 london_boundary_hex9_list.csv'

In [51]:
all_hexagons_df = pd.read_csv(csv_path)

In [52]:
all_hexagons_df.head()

Unnamed: 0,hex9
0,89195dadc07ffff
1,89194e6d0a7ffff
2,89194ac2e7bffff
3,89194ad5c93ffff
4,89194ad2473ffff


In [28]:
all_hexagons_df.count()

hex9    17047
dtype: int64

### Convert POI to Hex 

In [29]:
hex_digi = digi_poi.copy()

In [30]:
hex_digi.head(1)

Unnamed: 0,Name,PointX Cla,Brand,geometry,group id,group name,category id,category name,class id,class name,lat,lon
0,Physiotherapy Musculoskeletal Services,5280365,,POINT (0.006898795817570942 51.54251803721387),5,Education and health,28,Health practitioners and establishments,365,Clinics and health centres,51.542518,0.006899


In [31]:
APERTURE_SIZE = 9
hex_col = 'hex'+str(APERTURE_SIZE)

# find hexs containing the points
hex_digi[hex_col] = hex_digi.apply(lambda x: h3.geo_to_h3(x.lat,x.lon,APERTURE_SIZE),axis=1)
hex_digi_only = hex_digi[['class name', 'hex9']]

In [33]:
hex_digi_only.head()

Unnamed: 0,class name,hex9
0,Clinics and health centres,89194e69993ffff
1,Clinics and health centres,89195da66cbffff
2,Chemists and pharmacies,89194ac3063ffff
3,Chemists and pharmacies,89194adaddbffff
4,Chemists and pharmacies,89194ad894bffff


### Groupby Hex and count amenities

In [34]:
# aggregate the points
hex_digi_total_cnt = hex_digi_only.groupby(hex_col).size().to_frame('total_cnt').reset_index()

hex_digi_poi_type_cnt = hex_digi_only.groupby([hex_col,'class name']).size().unstack(fill_value=0)

In [35]:
hex_digi_total_cnt.sort_values(by='total_cnt',ascending=False).head()

Unnamed: 0,hex9,total_cnt
8977,89195da49cfffff,397
2895,89194ad32d7ffff,346
4745,89194ada44bffff,344
8961,89195da4987ffff,339
8936,89195da4913ffff,299


In [36]:
hex_digi_poi_type_cnt.head()

class name,Adult shops,Adult venues,Airports and landing strips,Alcoholic drinks,Alcoholic drinks including off licences and wholesalers,Amusement parks and arcades,Angling and sports fishing,"Animal feeds, pet foods, hay and straw",Aquaria and sea life centres,Archaeological sites,...,Training providers and centres,"Tram, metro and light railway stations and stops",Travel agencies,Underground network stations,Unspecified and other attractions,Unspecified and other schools,Walk-in centre,Watersports,Youth accommodation,Zoos and animal collections
hex9,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
89194ac0007ffff,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
89194ac000bffff,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
89194ac001bffff,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
89194ac0023ffff,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
89194ac002bffff,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [37]:
# hex_osm_poi_type_cnt.loc[hex_osm_poi_type_cnt.index=='89195da4987ffff']
hex_digi_poi_type_cnt.loc['89195da4987ffff'].sort_values(ascending = False).head()

class name
Restaurants                          102
Pubs, bars and inns                   45
Unspecified and other attractions     26
Fast food and takeaway outlets        21
Cafes, snack bars and tea rooms       19
Name: 89195da4987ffff, dtype: int64

### Add the rest of the Hex into hex_digi_total_cnt and hex_digi_poi_type_cnt

In [38]:
# merge them only where there is no values in gdf_hex_osm_g
all_hex_digi_total_cnt = pd.merge(hex_digi_total_cnt, all_hexagons_df, how='right', on=hex_col)
all_hex_digi_total_cnt.fillna(0,inplace=True)

all_hex_digi_poi_type_cnt = pd.merge(hex_digi_poi_type_cnt, all_hexagons_df, how='right', on=hex_col)
all_hex_digi_poi_type_cnt.fillna(0,inplace=True)

In [39]:
# select all float64 type columns
cols1 = all_hex_digi_total_cnt.columns[all_hex_digi_total_cnt.dtypes.eq('float64')]
cols2 = all_hex_digi_poi_type_cnt.columns[all_hex_digi_poi_type_cnt.dtypes.eq('float64')]
# convert them into int
all_hex_digi_total_cnt[cols1] = all_hex_digi_total_cnt[cols1].apply(pd.to_numeric, downcast='integer')
all_hex_digi_poi_type_cnt[cols2] = all_hex_digi_poi_type_cnt[cols2].apply(pd.to_numeric, downcast='integer')

In [40]:
all_hex_digi_total_cnt.head()

Unnamed: 0,hex9,total_cnt
0,89194ac0007ffff,7
1,89194ac000bffff,3
2,89194ac001bffff,2
3,89194ac0023ffff,5
4,89194ac002bffff,3


In [41]:
all_hex_digi_poi_type_cnt.head()

Unnamed: 0,hex9,Adult shops,Adult venues,Airports and landing strips,Alcoholic drinks,Alcoholic drinks including off licences and wholesalers,Amusement parks and arcades,Angling and sports fishing,"Animal feeds, pet foods, hay and straw",Aquaria and sea life centres,...,Training providers and centres,"Tram, metro and light railway stations and stops",Travel agencies,Underground network stations,Unspecified and other attractions,Unspecified and other schools,Walk-in centre,Watersports,Youth accommodation,Zoos and animal collections
0,89194ac0007ffff,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,89194ac000bffff,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,89194ac001bffff,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,89194ac0023ffff,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,89194ac002bffff,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [42]:
# all_hex_digi_total_cnt.to_csv('./1 london_boundary_hex9_total_digi_cnt.csv', index=False)

In [43]:
# all_hex_digi_poi_type_cnt.to_csv('./2 london_boundary_hex9_digi_type_cnt.csv', index=False)

In [44]:
all_hex_digi_poi_type_cnt_total_cnt =all_hex_digi_poi_type_cnt.copy()
all_hex_digi_poi_type_cnt_total_cnt['total_cnt'] = all_hex_digi_poi_type_cnt_total_cnt.sum(axis = 1)

In [45]:
# all_hex_digi_poi_type_cnt_total_cnt.to_csv('./3 london_boundary_hex9_digi_type_cnt_total_cnt.csv', index=False)

### Optional: Save it as shapefile to check in GIS 

In [62]:
gdf_all_hex_with_geom = all_hex_digi_total_cnt.copy()

In [63]:
# that was for all_hex_digi_poi_type_cnt_total_cnt
# gdf_all_hex_with_geom.rename(columns={"Cookers and stoves – non electrical": "Cookers and stoves"}, inplace=True)

In [64]:
from shapely.geometry import Polygon
gdf_all_hex_with_geom['geometry'] = gdf_all_hex_with_geom.apply(lambda x: Polygon(h3.h3_to_geo_boundary(x[hex_col], geo_json=True)),axis=1)

In [66]:
# convert into geodataframe
gdf_all_hex_with_geom = gpd.GeoDataFrame(gdf_all_hex_with_geom, crs = {'init' :'epsg:4326'}, geometry='geometry')

In [67]:
# gdf_all_hex_with_geom.to_file(filename= "all_hex_digi_total_cnt_diversity_with_geom.shp" , driver = 'ESRI Shapefile')

### Appendix

In [16]:
# # write the unique classification values to a csv
# digi_list = digi_shp['PointX Cla'].unique().tolist()
# digi_list

# import csv

# res = digi_list
# csvfile = "digi_list.csv"

# #Assuming res is a flat list
# with open(csvfile, "w") as output:
#     writer = csv.writer(output, lineterminator='\n')
#     for val in res:
#         writer.writerow([val]) 