Reads in a geopackage and returns a metadata csv

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import warnings
from shapely import box

In [2]:
# relative paths to the mra geopackage files 
egpkg = 'mra/England/Public Sector England/England_GPKG/England_GPKG.gpkg'
sgpkg = 'mra/Scotland/Public Sector Scotland/Scotland_GPKG/Scotland_GPKG.gpkg'
wgpkg = 'Wales/Public Sector Wales/Wales_GPKG/Wales_GPKG.gpkg'

In [3]:
# filter out warnings to prevent verbose output
warnings.filterwarnings("ignore", message=".*Measured \\(M\\) geometry types are not supported.*")

In [4]:
# retrieves a list of layers in the geopackage as layer_list
pkg_layers = gpd.list_layers(egpkg)
layer_list = pkg_layers['name'].values.tolist()
layer_list

['Coalfield_Consultation_Area',
 'Mine_Entry',
 'Probable_Working',
 'Geological_Disturbance',
 'Outcrops',
 'Legal_Notice',
 'Licence_Area_of_Responsibility',
 'Licence_Area',
 'Parent_Phase',
 'In_Seam_Level_Contour',
 'In_Seam_Level',
 'Underground_Working',
 'Unlicensed_Opencast',
 'Spine_Roadways',
 'Working_Dates',
 'Licensed_Roadways',
 'Court_Order',
 'Court_Order_Amendment',
 'Licence_Table',
 'Seam_Table',
 'Shallow_Working',
 'England_GPKG']

In [5]:
# get the bounding box directional values
def getbbe(bvalue,dir):
    if np.isnan(bvalue):
        res = None
    else:
        bvint = int(bvalue)
        if bvint >= 0:
            if dir == 'WE':
                res = 'E'+str(bvint)
            else:
                res = 'N'+str(bvint)
        else:
            if dir == 'WE':
                res = 'W'+str(bvint)
            else:
                res = 'S'+str(bvint)
    return res    

In [6]:
# create a dataframe of values for each layer in the geopackage with bounds, geometry, geometry type and bounding box values
lbounds, lgeom, lgt, gtlist, bbw, bbs, bbe, bbn  = list(), list(), list(), list(), list(), list(), list(), list()

for i in layer_list:
    lay = gpd.read_file(egpkg, layer=i)
    layer_bounds = lay.geometry.total_bounds
    wval = getbbe(layer_bounds[0],'WE')
    sval = getbbe(layer_bounds[1],'NS')
    eval = getbbe(layer_bounds[2],'WE')
    nval = getbbe(layer_bounds[3],'NS')
    bbw.append(wval)
    bbs.append(sval)
    bbe.append(eval)
    bbn.append(nval)
    lbounds.append(layer_bounds)
    geom = box(*lay.geometry.total_bounds)
    lgeom.append(geom)
    gt = lay.geom_type.unique()
    gtlist.append(gt)
tlpd = pd.DataFrame({'name':layer_list,'bounds':lbounds,'geometry':lgeom,'geomtype':gtlist, 'Bounding_Box_W':bbw, 'Bounding_Box_E':bbe, 'Bounding_Box_N':bbn, 'Bounding_Box_S':bbs})
tlpd.head()

Unnamed: 0,name,bounds,geometry,geomtype,Bounding_Box_W,Bounding_Box_E,Bounding_Box_N,Bounding_Box_S
0,Coalfield_Consultation_Area,"[240000.0, 122000.0, 650000.0, 658500.0]","POLYGON ((650000 122000, 650000 658500, 240000...","[Polygon, None]",E240000,E650000,N658500,N122000
1,Mine_Entry,"[245082.8700000001, 125147.99000000022, 633740...","POLYGON ((633740 125147.99000000022, 633740 65...",[Point],E245082,E633740,N658335,N125147
2,Probable_Working,"[244983.41999999993, 125333.75999999978, 48961...","POLYGON ((489617 125333.75999999978, 489617 65...","[Polygon, None]",E244983,E489617,N651236,N125333
3,Geological_Disturbance,"[296168.0, 147000.0, 473000.0, 652998.0]","POLYGON ((473000 147000, 473000 652998, 296168...",[LineString],E296168,E473000,N652998,N147000
4,Outcrops,"[245408.7000000002, 124597.38000000082, 453883...","POLYGON ((453883 124597.38000000082, 453883 65...",[LineString],E245408,E453883,N652643,N124597


In [8]:
# converts the dataframe to a geopandas datarame and adds a column with the geometry in epsg4326 (which can be plotted in folium)
lgpd = gpd.GeoDataFrame(tlpd, crs="EPSG:27700")
lgpd['epsg4326'] = lgpd['geometry'].to_crs("EPSG:4326")
lgpd.head()

Unnamed: 0,name,bounds,geometry,geomtype,Bounding_Box_W,Bounding_Box_E,Bounding_Box_N,Bounding_Box_S,epsg4326
0,Coalfield_Consultation_Area,"[240000.0, 122000.0, 650000.0, 658500.0]","POLYGON ((650000 122000, 650000 658500, 240000...","[Polygon, None]",E240000,E650000,N658500,N122000,"POLYGON ((1.55745 50.9432, 1.98283 55.75521, -..."
1,Mine_Entry,"[245082.8700000001, 125147.99000000022, 633740...","POLYGON ((633740 125147.99, 633740 658335.54, ...",[Point],E245082,E633740,N658335,N125147,"POLYGON ((1.32843 50.97827, 1.72418 55.76185, ..."
2,Probable_Working,"[244983.41999999993, 125333.75999999978, 48961...","POLYGON ((489617 125333.76, 489617 651236, 244...","[Polygon, None]",E244983,E489617,N651236,N125333,"POLYGON ((-0.72368 51.0204, -0.57399 55.74622,..."
3,Geological_Disturbance,"[296168.0, 147000.0, 473000.0, 652998.0]","POLYGON ((473000 147000, 473000 652998, 296168...",[LineString],E296168,E473000,N652998,N147000,"POLYGON ((-0.95617 51.21754, -0.83815 55.76483..."
4,Outcrops,"[245408.7000000002, 124597.38000000082, 453883...","POLYGON ((453883 124597.38, 453883 652643, 245...",[LineString],E245408,E453883,N652643,N124597,"POLYGON ((-1.23319 51.01824, -1.14285 55.76414..."


In [9]:
# gives geodata details of the dataframe
lgpd.crs

<Projected CRS: EPSG:27700>
Name: OSGB36 / British National Grid
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: United Kingdom (UK) - offshore to boundary of UKCS within 49°45'N to 61°N and 9°W to 2°E; onshore Great Britain (England, Wales and Scotland). Isle of Man onshore.
- bounds: (-9.01, 49.75, 2.01, 61.01)
Coordinate Operation:
- name: British National Grid
- method: Transverse Mercator
Datum: Ordnance Survey of Great Britain 1936
- Ellipsoid: Airy 1830
- Prime Meridian: Greenwich

In [None]:
# removes any layers with invalid geometry
vtlpd = tlpd.loc[tlpd['geometry'].is_valid, :]

In [51]:
# outputs to csv 
tlpd.to_csv('gpkg_meta.csv')