In [1]:
!pip install git+https://github.com/CSIRO-enviro-informatics/loci-scripts.git@master
!pip install numpy pandas

Collecting git+https://github.com/CSIRO-enviro-informatics/loci-scripts.git@master
  Cloning https://github.com/CSIRO-enviro-informatics/loci-scripts.git (to revision master) to /tmp/pip-req-build-supzq48c
  Running command git clone -q https://github.com/CSIRO-enviro-informatics/loci-scripts.git /tmp/pip-req-build-supzq48c
  Running command git submodule update --init --recursive -q
Collecting rdflib<=4.2.2
  Downloading rdflib-4.2.2-py3-none-any.whl (344 kB)
[K     |████████████████████████████████| 344 kB 15.6 MB/s eta 0:00:01
[?25hCollecting python-dotenv<=0.11.0
  Downloading python_dotenv-0.11.0-py2.py3-none-any.whl (17 kB)
Collecting requests<=2.23.0
  Downloading requests-2.23.0-py2.py3-none-any.whl (58 kB)
[K     |████████████████████████████████| 58 kB 17.1 MB/s eta 0:00:01
Building wheels for collected packages: pyloci
  Building wheel for pyloci (setup.py) ... [?25ldone
[?25h  Created wheel for pyloci: filename=pyloci-0.1.1.25-py3-none-any.whl size=34955 sha256=fb37c5e

# Reapportioning ASGS16 LGA to SA2  example using the pyloci API

This notebook show how you would perform reapportioning using the [pyloci](https://pypi.org/project/pyloci/) library using a simple CSV file. The pyloci library interfaces directly with the SPARQL API for the Loc-I Cache GraphDB.

In [2]:
import numpy as np
import pandas as pd
df = pd.read_csv('loci-lga-sa1-test-case1.csv', delimiter = ',')
#show the csv file read in
df

Unnamed: 0,LGA,Estimated Agricultural Area,Estimated value 2016
0,http://linked.data.gov.au/dataset/asgs2016/loc...,608800,118073000
1,http://linked.data.gov.au/dataset/asgs2016/loc...,144111000,39100
2,http://linked.data.gov.au/dataset/asgs2016/loc...,254500,142090000
3,http://linked.data.gov.au/dataset/asgs2016/loc...,18900,5195000
4,http://linked.data.gov.au/dataset/asgs2016/loc...,74100,55850000


In [3]:
#sure the config for the SPARQL endpoint to hit is set
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
print(os.getenv("SPARQL_ENDPOINT"))

http://db.loci.cat/repositories/loci-cache


In [4]:
#import the reapportioning module from pyloci
from pyloci import reapportioning


from pyloci.api.util import Util as API_Util
from pyloci.sparql import util as sparql_util
import time

auth = None
api_util = API_Util()


In [5]:
toFeatureType = "http://linked.data.gov.au/def/asgs#StatisticalAreaLevel2"
LOCI_INTEGRATION_API = "https://api.loci.cat/api/v1"

In [6]:
%%time
target_col = "Estimated value 2016"
res_df_cols = {
    "LGA" : [],
    "SA2" : [],
    "percent_overlap": [],
    target_col: [],
    "reapportioned_data": [],
}
for index, row in df.iterrows():
    fromFeature = row['LGA']
    print("{}, {}".format(row['LGA'], str(row[target_col])))
    print("Querying overlaps of {} to {}...".format(fromFeature, toFeatureType))
    tic = time.perf_counter()
    list_locations = api_util.query_api_location_overlaps(fromFeature, toFeatureType, LOCI_INTEGRATION_API, crosswalk='true')
    toc = time.perf_counter()
    print(f"query_api_location_overlaps took {toc - tic:0.4f} seconds")
    #print(list_locations)
    for o in list_locations['overlaps']:
        res_df_cols["LGA"].append(fromFeature)
        res_df_cols["SA2"].append(o['uri'])
        res_df_cols["percent_overlap"].append(o['forwardPercentage'])
        res_df_cols[target_col].append(row[target_col])
        reapportioned = (float(o['forwardPercentage'])/100.0)*float(row[target_col])
        res_df_cols["reapportioned_data"].append(reapportioned)

        

http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/55880, 118073000
Querying overlaps of http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/55880 to http://linked.data.gov.au/def/asgs#StatisticalAreaLevel2...
query_api_location_overlaps took 2.0697 seconds
http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/40070, 39100
Querying overlaps of http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/40070 to http://linked.data.gov.au/def/asgs#StatisticalAreaLevel2...
query_api_location_overlaps took 8.7663 seconds
http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/50080, 142090000
Querying overlaps of http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/50080 to http://linked.data.gov.au/def/asgs#StatisticalAreaLevel2...
query_api_location_overlaps took 10.9773 seconds
http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/10050, 5195000
Querying overlaps of http://linked.data.gov.au/dataset/asgs2016/localgovernmentare

In [7]:
res_df = pd.DataFrame (res_df_cols, columns = ['LGA', "SA2", "percent_overlap", target_col, "reapportioned_data"])
pd.options.display.float_format = '{:.2f}'.format
pd.set_option('display.max_colwidth', 0)
res_df

Unnamed: 0,LGA,SA2,percent_overlap,Estimated value 2016,reapportioned_data
0,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/55880,http://linked.data.gov.au/dataset/asgs2016/statisticalarealevel2/509021242,100.0,118073000,118073001.58
1,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/40070,http://linked.data.gov.au/dataset/asgs2016/statisticalarealevel2/401011001,67.31,39100,26318.24
2,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/40070,http://linked.data.gov.au/dataset/asgs2016/statisticalarealevel2/401011002,32.69,39100,12782.36
3,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/50080,http://linked.data.gov.au/dataset/asgs2016/statisticalarealevel2/509011225,0.71,142090000,1005309.74
4,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/50080,http://linked.data.gov.au/dataset/asgs2016/statisticalarealevel2/509011232,3.31,142090000,4702543.39
5,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/50080,http://linked.data.gov.au/dataset/asgs2016/statisticalarealevel2/509011227,0.58,142090000,819770.3
6,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/50080,http://linked.data.gov.au/dataset/asgs2016/statisticalarealevel2/509011233,2.35,142090000,3335629.02
7,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/50080,http://linked.data.gov.au/dataset/asgs2016/statisticalarealevel2/509011226,93.0,142090000,132141451.0
8,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/50080,http://linked.data.gov.au/dataset/asgs2016/statisticalarealevel2/509011229,0.06,142090000,85364.94
9,http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/10050,http://linked.data.gov.au/dataset/asgs2016/statisticalarealevel2/109011172,33.18,5195000,1723723.89
