In [1]:
!pip install -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ 'pyloci==0.1.1.26'
!pip install numpy pandas

Looking in indexes: https://test.pypi.org/simple/, https://pypi.org/simple/
Collecting pyloci==0.1.1.26
  Downloading https://test-files.pythonhosted.org/packages/f4/3a/badbbe1ed3be821c8c3f28ea5886d5af3890eea724266157ab04eab48fdd/pyloci-0.1.1.26-py3-none-any.whl (25 kB)
Installing collected packages: pyloci
  Attempting uninstall: pyloci
    Found existing installation: pyloci 0.1.1.25
    Uninstalling pyloci-0.1.1.25:
      Successfully uninstalled pyloci-0.1.1.25
Successfully installed pyloci-0.1.1.26


# Reapportioning example using the pyloci API

This notebook show how you would perform reapportioning using the [pyloci](https://pypi.org/project/pyloci/) library using a simple CSV file. The pyloci library interfaces directly with the SPARQL API for the Loc-I Cache GraphDB.

In [2]:
import numpy as np
import pandas as pd
df = pd.read_csv('loci-test-case-A.csv', delimiter = ',')
#show the csv file read in
df

Unnamed: 0,Address,Count
0,http://linked.data.gov.au/dataset/geofabric/co...,100
1,http://linked.data.gov.au/dataset/geofabric/co...,200


In [3]:
#sure the config for the SPARQL endpoint to hit is set
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
print(os.getenv("SPARQL_ENDPOINT"))

http://db.loci.cat/repositories/loci-cache


In [4]:
#import the reapportioning module from pyloci
from pyloci import reapportioning

In [5]:
#use the pyloci reapportioning module to action the reapportioning with the input CSV
# specify outputs as output.csv
output_file='output.csv'
r = reapportioning.entrypoint('loci-test-case-A.csv', verbose_mode=False, output_to_file=True, outputfile='output.csv')

loci-test-case-A.csv


In [6]:
df = pd.read_csv('output.csv', delimiter = ',')

In [7]:
#show how many matches found
len(df)

2186

In [8]:
df['Count reapportioned'] = pd.to_numeric(df['Count reapportioned'], errors='coerce')

#show a sample
df.sample(5)

Unnamed: 0,from,Count,to,Count reapportioned
739,<http://linked.data.gov.au/dataset/geofabric/c...,200,http://linked.data.gov.au/dataset/gnaf-2016-05...,
2143,<http://linked.data.gov.au/dataset/geofabric/c...,200,http://linked.data.gov.au/dataset/gnaf-2016-05...,
1978,<http://linked.data.gov.au/dataset/geofabric/c...,200,http://linked.data.gov.au/dataset/gnaf-2016-05...,
1132,<http://linked.data.gov.au/dataset/geofabric/c...,200,http://linked.data.gov.au/dataset/gnaf-2016-05...,
2141,<http://linked.data.gov.au/dataset/geofabric/c...,200,http://linked.data.gov.au/dataset/gnaf-2016-05...,


In [9]:
#show only meshblocks
df_mb_rows = df[df["to"].str.contains("meshblock")]
df_mb_rows


Unnamed: 0,from,Count,to,Count reapportioned
0,<http://linked.data.gov.au/dataset/geofabric/c...,100,http://linked.data.gov.au/dataset/asgs2016/mes...,0.756541
1,<http://linked.data.gov.au/dataset/geofabric/c...,100,http://linked.data.gov.au/dataset/asgs2016/mes...,0.756541
2,<http://linked.data.gov.au/dataset/geofabric/c...,100,http://linked.data.gov.au/dataset/asgs2016/mes...,0.634379
3,<http://linked.data.gov.au/dataset/geofabric/c...,100,http://linked.data.gov.au/dataset/asgs2016/mes...,0.634379
4,<http://linked.data.gov.au/dataset/geofabric/c...,100,http://linked.data.gov.au/dataset/asgs2016/mes...,0.000575
5,<http://linked.data.gov.au/dataset/geofabric/c...,100,http://linked.data.gov.au/dataset/asgs2016/mes...,0.000575
6,<http://linked.data.gov.au/dataset/geofabric/c...,100,http://linked.data.gov.au/dataset/asgs2016/mes...,0.383239
7,<http://linked.data.gov.au/dataset/geofabric/c...,100,http://linked.data.gov.au/dataset/asgs2016/mes...,0.383239
8,<http://linked.data.gov.au/dataset/geofabric/c...,100,http://linked.data.gov.au/dataset/asgs2016/mes...,1.551541
9,<http://linked.data.gov.au/dataset/geofabric/c...,100,http://linked.data.gov.au/dataset/asgs2016/mes...,1.551541


In [10]:
df_mb_rows_onecc = df_mb_rows[df_mb_rows["from"].str.contains("122")]
df_mb_rows_onecc

Unnamed: 0,from,Count,to,Count reapportioned
392,<http://linked.data.gov.au/dataset/geofabric/c...,200,http://linked.data.gov.au/dataset/asgs2016/mes...,5.192532
393,<http://linked.data.gov.au/dataset/geofabric/c...,200,http://linked.data.gov.au/dataset/asgs2016/mes...,5.192532
394,<http://linked.data.gov.au/dataset/geofabric/c...,200,http://linked.data.gov.au/dataset/asgs2016/mes...,14.045446
395,<http://linked.data.gov.au/dataset/geofabric/c...,200,http://linked.data.gov.au/dataset/asgs2016/mes...,14.045446
396,<http://linked.data.gov.au/dataset/geofabric/c...,200,http://linked.data.gov.au/dataset/asgs2016/mes...,2.938967
397,<http://linked.data.gov.au/dataset/geofabric/c...,200,http://linked.data.gov.au/dataset/asgs2016/mes...,2.938967
398,<http://linked.data.gov.au/dataset/geofabric/c...,200,http://linked.data.gov.au/dataset/asgs2016/mes...,0.45558
399,<http://linked.data.gov.au/dataset/geofabric/c...,200,http://linked.data.gov.au/dataset/asgs2016/mes...,0.45558
400,<http://linked.data.gov.au/dataset/geofabric/c...,200,http://linked.data.gov.au/dataset/asgs2016/mes...,49.960128
401,<http://linked.data.gov.au/dataset/geofabric/c...,200,http://linked.data.gov.au/dataset/asgs2016/mes...,49.960128


In [11]:
#unique values in from column
df_mb_rows_onecc['from'].unique()


array(['<http://linked.data.gov.au/dataset/geofabric/contractedcatchment/12202179>'],
      dtype=object)

In [12]:
#unique values in to column
df_mb_rows_onecc.to.unique()


array(['http://linked.data.gov.au/dataset/asgs2016/meshblock/30563893800',
       'http://linked.data.gov.au/dataset/asgs2016/meshblock/30563500300',
       'http://linked.data.gov.au/dataset/asgs2016/meshblock/30564036900',
       'http://linked.data.gov.au/dataset/asgs2016/meshblock/30563521800',
       'http://linked.data.gov.au/dataset/asgs2016/meshblock/30562930700',
       'http://linked.data.gov.au/dataset/asgs2016/meshblock/30563517100',
       'http://linked.data.gov.au/dataset/asgs2016/meshblock/30562930200',
       'http://linked.data.gov.au/dataset/asgs2016/meshblock/30562989200',
       'http://linked.data.gov.au/dataset/asgs2016/meshblock/30563435600',
       'http://linked.data.gov.au/dataset/asgs2016/meshblock/30478100000',
       'http://linked.data.gov.au/dataset/asgs2016/meshblock/30562424600',
       'http://linked.data.gov.au/dataset/asgs2016/meshblock/30409140000',
       'http://linked.data.gov.au/dataset/asgs2016/meshblock/30563478400',
       'http://linked.dat

In [13]:
df_mb_rows_onecc.to


392    http://linked.data.gov.au/dataset/asgs2016/mes...
393    http://linked.data.gov.au/dataset/asgs2016/mes...
394    http://linked.data.gov.au/dataset/asgs2016/mes...
395    http://linked.data.gov.au/dataset/asgs2016/mes...
396    http://linked.data.gov.au/dataset/asgs2016/mes...
397    http://linked.data.gov.au/dataset/asgs2016/mes...
398    http://linked.data.gov.au/dataset/asgs2016/mes...
399    http://linked.data.gov.au/dataset/asgs2016/mes...
400    http://linked.data.gov.au/dataset/asgs2016/mes...
401    http://linked.data.gov.au/dataset/asgs2016/mes...
402    http://linked.data.gov.au/dataset/asgs2016/mes...
403    http://linked.data.gov.au/dataset/asgs2016/mes...
404    http://linked.data.gov.au/dataset/asgs2016/mes...
405    http://linked.data.gov.au/dataset/asgs2016/mes...
406    http://linked.data.gov.au/dataset/asgs2016/mes...
407    http://linked.data.gov.au/dataset/asgs2016/mes...
408    http://linked.data.gov.au/dataset/asgs2016/mes...
409    http://linked.data.gov.a

In [14]:
max_count = df_mb_rows_onecc['Count reapportioned'].max()
max_count

49.960128134504565

In [15]:
min_count = df_mb_rows_onecc['Count reapportioned'].min()
min_count

0.006123375036075975

In [16]:
count_delta = max_count - min_count

In [17]:
import re
m = re.search(r'<(.*)>', df_mb_rows_onecc['from'].unique()[0])
uri = m.group(1)
uri

'http://linked.data.gov.au/dataset/geofabric/contractedcatchment/12202179'

In [18]:
#you could use rdflib to do some more digging into the semantic description to get the geometry... 
import rdflib
g=rdflib.Graph()
g.load(uri)

#for s,p,o in g:
#    print(s, p, o)

geom_uri = None
for row in g.query(
            '''
            PREFIX geo: <http://www.opengis.net/ont/geosparql#>
            select ?g where { ?feature geo:hasGeometry ?g .}
            '''):
        geom_uri = (row.g)
if(geom_uri != None):
    print(geom_uri)

HTTPError: HTTP Error 500: INTERNAL SERVER ERROR

In [None]:
import requests
# get the geojson representation of the geom
header={'Accept': 'application/json'}
r = requests.get(geom_uri, headers=header)
geojson_data = r.json()

params={'_view': 'centroid'}
r = requests.get(geom_uri, headers=header, params=params)
centroid_geojson_data = r.json()
centroid_geojson_data

## Create a map showing the distribution of reapportioned data for meshblocks

Map target reapportioned features in red - darker (higher counts).
Overlay the source feature geometry in blue.

In [None]:
#map geometries
#draw the geometry for the feature on a map
import ipyleaflet as ipy 
import ipywidgets as ipyw
from ipyleaflet import GeoJSON, Map, Marker

#x_coord = centroid_geojson_data['coordinates'][1]
#y_coord = centroid_geojson_data['coordinates'][0]
#map = ipy.Map(center=[x_coord, y_coord], zoom=7)
#label = ipyw.Label(layout=ipyw.Layout(width='100%'))
#geo_json1 = GeoJSON(data=geojson_data, style = {'color': 'blue', 'opacity':0.1, 'weight':1.9, 'fillOpacity':0.1})
#map.add_layer(geo_json1)
#map

In [None]:
import rdflib, requests
from ipyleaflet import LayerGroup, basemaps

def get_geom_geojson_from_feature_uri(feature_uri):    
    g=rdflib.Graph()
    g.load(feature_uri)
    geom_uri = geojson_data = None
    for row in g.query(
                '''
                PREFIX geo: <http://www.opengis.net/ont/geosparql#>
                select ?g where { ?feature geo:hasGeometry ?g .}
                '''):
        geom_uri = (row.g)
        if geom_uri != None:
            header={'Accept': 'application/json'}
            r = requests.get(geom_uri, headers=header)
            if(r.status_code != 200):
               print("HTTP status code: %s" % r.status_code)
               print("Couldn't fetch geom from %s while processing %s" % (geom_uri, feature_uri) )
               return None
            geojson_data = r.json()
    return geojson_data

def calc_opacity(val, maxv, minv):
    delta = maxv - minv
    proportion = (val - minv)/delta
    return proportion

x_coord = centroid_geojson_data['coordinates'][1]
y_coord = centroid_geojson_data['coordinates'][0]
map = ipy.Map(basemap=basemaps.CartoDB.Positron, center=[x_coord, y_coord], zoom=7)
label = ipyw.Label(layout=ipyw.Layout(width='100%'))

# Create layer group
layer_group = LayerGroup()
map.add_layer(layer_group)

for (index_label, row_series)  in df_mb_rows_onecc.iterrows():
    print("%s %s" % (row_series['to'], row_series['Count reapportioned']))
    # get geometry and add to map
    geom = get_geom_geojson_from_feature_uri(row_series['to'])
    if geom != None:        
        geo_json = GeoJSON(data=geom, style = {'color': 'black', 'fillColor': 'red', 'opacity':0.5, 'weight':0.9, 'fillOpacity':calc_opacity(row_series['Count reapportioned'], max_count, min_count)})
        layer_group.add_layer(geo_json)    
geo_json = GeoJSON(data=geojson_data, style = {'color': 'blue', 'opacity':0.5, 'weight':2, 'fillOpacity':0.1})
layer_group.add_layer(geo_json)

map