In [1]:
!pip install --quiet --ignore-installed --no-cache-dir --force-reinstall git+https://github.com/CSIRO-enviro-informatics/loci-scripts.git@master
!pip install --quiet numpy pandas

Collecting git+https://github.com/CSIRO-enviro-informatics/loci-scripts.git@master
  Cloning https://github.com/CSIRO-enviro-informatics/loci-scripts.git (to revision master) to /tmp/pip-req-build-0wqumv68
  Running command git clone -q https://github.com/CSIRO-enviro-informatics/loci-scripts.git /tmp/pip-req-build-0wqumv68
  Running command git submodule update --init --recursive -q
Collecting XlsxWriter
  Downloading XlsxWriter-1.2.8-py2.py3-none-any.whl (141 kB)
[K     |████████████████████████████████| 141 kB 1.5 MB/s eta 0:00:01
[?25hCollecting SPARQLWrapper<=1.8.5
  Downloading SPARQLWrapper-1.8.5-py3-none-any.whl (26 kB)
Collecting rdflib<=4.2.2
  Downloading rdflib-4.2.2-py3-none-any.whl (344 kB)
[K     |████████████████████████████████| 344 kB 6.1 MB/s eta 0:00:01
[?25hCollecting python-dotenv<=0.11.0
  Downloading python_dotenv-0.11.0-py2.py3-none-any.whl (17 kB)
Collecting requests<=2.23.0
  Downloading requests-2.23.0-py2.py3-none-any.whl (58 kB)
[K     |██████████████

# Reapportioning ASGS16 LGA to Other feature examples using the pyloci API

This notebook show how you would perform reapportioning using the [pyloci](https://pypi.org/project/pyloci/) library using a simple CSV file. The pyloci library interfaces directly with the SPARQL API for the Loc-I Cache GraphDB.

In [2]:
import numpy as np
import pandas as pd
df = pd.read_csv('loci-lga-sa1-test-case1.csv', delimiter = ',')
#show the csv file read in
df

Unnamed: 0,LGA,Estimated Agricultural Area,Estimated value 2016
0,http://linked.data.gov.au/dataset/asgs2016/loc...,608800,118073000
1,http://linked.data.gov.au/dataset/asgs2016/loc...,144111000,39100
2,http://linked.data.gov.au/dataset/asgs2016/loc...,254500,142090000
3,http://linked.data.gov.au/dataset/asgs2016/loc...,18900,5195000
4,http://linked.data.gov.au/dataset/asgs2016/loc...,74100,55850000


In [3]:
#sure the config for the SPARQL endpoint to hit is set
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
print(os.getenv("SPARQL_ENDPOINT"))

http://db.loci.cat/repositories/loci-cache


In [4]:
#import the reapportioning module from pyloci
from pyloci import reapportioning


from pyloci.api.util import Util as API_Util
from pyloci.sparql import util as sparql_util
import time
import json
import ipywidgets as widgets

auth = None
api_util = API_Util()
LOCI_INTEGRATION_API = "https://api.loci.cat/api/v1"

In [5]:
#Listing feature types that have albers area - see https://github.com/CSIRO-enviro-informatics/asgs-dataset/issues/15 for more info
datatypes=[('mb', 'http://linked.data.gov.au/def/asgs#MeshBlock'),
           ('sa1', 'http://linked.data.gov.au/def/asgs#StatisticalAreaLevel1'), 
           ('sa2', 'http://linked.data.gov.au/def/asgs#StatisticalAreaLevel2'), 
           ('gccsa', 'http://linked.data.gov.au/def/asgs#StatisticalAreaLevel3'), 
           ('sa4', 'http://linked.data.gov.au/def/asgs#StatisticalAreaLevel4'), 
           ('ste', 'http://linked.data.gov.au/def/asgs#StateOrTerritory'), 
           ('ra', 'http://linked.data.gov.au/def/asgs#RemotenessArea'), 
           ('cc', 'http://linked.data.gov.au/def/geofabric#ContractedCatchment'), 
           ('rr', 'http://linked.data.gov.au/def/geofabric#RiverRegion'), 
           ('dd', 'http://linked.data.gov.au/def/geofabric#DrainageDivision')
]

In [6]:
ft_selector_widget = widgets.Dropdown(
    options=datatypes,
    description='Feature Type:',
)


In [7]:
display(ft_selector_widget)

Dropdown(description='Feature Type:', options=(('mb', 'http://linked.data.gov.au/def/asgs#MeshBlock'), ('sa1',…

In [8]:
ft_selector_widget.value

'http://linked.data.gov.au/def/asgs#MeshBlock'

In [9]:
ft_selector_widget.label

'mb'

In [10]:
ft_selector_widget
toFeatureType = ft_selector_widget.value
toFeatureTypeLabel = ft_selector_widget.label

In [11]:
%%time
target_col = "Estimated value 2016"
res_df_cols = {
    "LGA" : [],
    toFeatureTypeLabel : [],
    "percent_overlap": [],
    target_col: [],
    "reapportioned_data": [],
}
for index, row in df.iterrows():
    fromFeature = row['LGA']
    print("{}, {}".format(row['LGA'], str(row[target_col])))
    print("Querying overlaps of {} to {}...".format(fromFeature, toFeatureType))
    tic = time.perf_counter()
    list_locations = api_util.query_api_location_overlaps(fromFeature, toFeatureType, LOCI_INTEGRATION_API, crosswalk='true')
    toc = time.perf_counter()
    print(f"query_api_location_overlaps took {toc - tic:0.4f} seconds")
    #print(list_locations)
    for o in list_locations['overlaps']:
        res_df_cols["LGA"].append(fromFeature)
        res_df_cols[toFeatureTypeLabel].append(o['uri'])
        res_df_cols["percent_overlap"].append(o['forwardPercentage'])
        res_df_cols[target_col].append(row[target_col])
        reapportioned = (float(o['forwardPercentage'])/100.0)*float(row[target_col])
        res_df_cols["reapportioned_data"].append(reapportioned)

        

http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/55880, 118073000
Querying overlaps of http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/55880 to http://linked.data.gov.au/def/asgs#MeshBlock...
query_api_location_overlaps took 0.7305 seconds
http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/40070, 39100
Querying overlaps of http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/40070 to http://linked.data.gov.au/def/asgs#MeshBlock...
query_api_location_overlaps took 2.4002 seconds
http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/50080, 142090000
Querying overlaps of http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/50080 to http://linked.data.gov.au/def/asgs#MeshBlock...
query_api_location_overlaps took 3.0872 seconds
http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/10050, 5195000
Querying overlaps of http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/10050 to http://linked.data.gov.au/

In [12]:
res_df = pd.DataFrame (res_df_cols, columns = ['LGA', toFeatureTypeLabel, "percent_overlap", target_col, "reapportioned_data"])
pd.options.display.float_format = '{:.2f}'.format
pd.set_option('display.max_colwidth', 0)
res_df

Unnamed: 0,LGA,mb,percent_overlap,Estimated value 2016,reapportioned_data
0,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/55880,http://linked.data.gov.au/dataset/asgs2016/meshblock/50218930000,0.00,118073000,534.45
1,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/55880,http://linked.data.gov.au/dataset/asgs2016/meshblock/50218752000,0.00,118073000,2660.66
2,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/55880,http://linked.data.gov.au/dataset/asgs2016/meshblock/50383200000,0.01,118073000,7551.87
3,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/55880,http://linked.data.gov.au/dataset/asgs2016/meshblock/50219110000,5.17,118073000,6109849.68
4,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/55880,http://linked.data.gov.au/dataset/asgs2016/meshblock/50383220000,0.01,118073000,11699.95
...,...,...,...,...,...
2625,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/20110,http://linked.data.gov.au/dataset/asgs2016/meshblock/20002010000,0.00,55850000,335.93
2626,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/20110,http://linked.data.gov.au/dataset/asgs2016/meshblock/20688050000,0.10,55850000,58093.53
2627,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/20110,http://linked.data.gov.au/dataset/asgs2016/meshblock/20000431000,0.37,55850000,206086.81
2628,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/20110,http://linked.data.gov.au/dataset/asgs2016/meshblock/20001240000,0.00,55850000,557.55
