In [1]:
!pip install git+https://github.com/jyucsiro/loci-scripts.git@feat-mar2020-cache
!pip install numpy pandas

Collecting git+https://github.com/jyucsiro/loci-scripts.git@feat-mar2020-cache
  Cloning https://github.com/jyucsiro/loci-scripts.git (to revision feat-mar2020-cache) to /tmp/pip-req-build-0fqmq_ls
  Running command git clone -q https://github.com/jyucsiro/loci-scripts.git /tmp/pip-req-build-0fqmq_ls
  Running command git checkout -b feat-mar2020-cache --track origin/feat-mar2020-cache
  Switched to a new branch 'feat-mar2020-cache'
  Branch 'feat-mar2020-cache' set up to track remote branch 'feat-mar2020-cache' from 'origin'.
  Running command git submodule update --init --recursive -q
Collecting XlsxWriter
  Downloading XlsxWriter-1.2.8-py2.py3-none-any.whl (141 kB)
[K     |████████████████████████████████| 141 kB 796 kB/s eta 0:00:01
[?25hCollecting SPARQLWrapper<=1.8.5
  Downloading SPARQLWrapper-1.8.5-py3-none-any.whl (26 kB)
Collecting rdflib<=4.2.2
  Downloading rdflib-4.2.2-py3-none-any.whl (344 kB)
[K     |████████████████████████████████| 344 kB 2.0 MB/s eta 0:00:01
[?25h

# Reapportioning ASGS16 LGA to SA1  example using the pyloci API

This notebook show how you would perform reapportioning using the [pyloci](https://pypi.org/project/pyloci/) library using a simple CSV file. The pyloci library interfaces directly with the SPARQL API for the Loc-I Cache GraphDB.

In [81]:
import numpy as np
import pandas as pd
df = pd.read_csv('loci-lga-sa1-test-case1.csv', delimiter = ',')
#show the csv file read in
df

Unnamed: 0,LGA,Estimated Agricultural Area,Estimated value 2016
0,http://linked.data.gov.au/dataset/asgs2016/loc...,608800,118073000
1,http://linked.data.gov.au/dataset/asgs2016/loc...,144111000,39100
2,http://linked.data.gov.au/dataset/asgs2016/loc...,254500,142090000
3,http://linked.data.gov.au/dataset/asgs2016/loc...,18900,5195000
4,http://linked.data.gov.au/dataset/asgs2016/loc...,74100,55850000


In [82]:
#sure the config for the SPARQL endpoint to hit is set
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
print(os.getenv("SPARQL_ENDPOINT"))

http://db.loci.cat/repositories/loci-cache


In [83]:
#import the reapportioning module from pyloci
from pyloci import reapportioning


from pyloci.api.util import Util as API_Util
from pyloci.sparql import util as sparql_util
import time

auth = None
api_util = API_Util()


In [84]:
toFeatureType = "http://linked.data.gov.au/def/asgs#StatisticalAreaLevel1"
LOCI_INTEGRATION_API = "https://test-api.loci.cat/api/v1"

In [87]:
res_df_cols = {
    "LGA" : [],
    "SA1" : [],
    "percent_overlap": [],
    "source_data": [],
    "reapportioned_data": [],
}
target_col = "Estimated value 2016"
for index, row in df.iterrows():
    fromFeature = row['LGA']
    print("{}, {}".format(row['LGA'], str(row[target_col])))
    print("Querying overlaps of {} to {}...".format(fromFeature, toFeatureType))
    tic = time.perf_counter()
    list_locations = api_util.query_api_location_overlaps(fromFeature, toFeatureType, LOCI_INTEGRATION_API, crosswalk='true')
    toc = time.perf_counter()
    print(f"query_api_location_overlaps took {toc - tic:0.4f} seconds")
    #print(list_locations)
    for o in list_locations['overlaps']:
        res_df_cols["LGA"].append(fromFeature)
        res_df_cols["SA1"].append(o['uri'])
        res_df_cols["percent_overlap"].append(o['forwardPercentage'])
        res_df_cols["source_data"].append(row[target_col])
        reapportioned = (float(o['forwardPercentage'])/100.0)*float(row[target_col])
        res_df_cols["reapportioned_data"].append(reapportioned)

        

http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/55880, 118073000
Querying overlaps of http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/55880 to http://linked.data.gov.au/def/asgs#StatisticalAreaLevel1...
query_api_location_overlaps took 2.0958 seconds
http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/40070, 39100
Querying overlaps of http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/40070 to http://linked.data.gov.au/def/asgs#StatisticalAreaLevel1...
query_api_location_overlaps took 9.0677 seconds
http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/50080, 142090000
Querying overlaps of http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/50080 to http://linked.data.gov.au/def/asgs#StatisticalAreaLevel1...
query_api_location_overlaps took 10.9571 seconds
http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/10050, 5195000
Querying overlaps of http://linked.data.gov.au/dataset/asgs2016/localgovernmentare

In [88]:
res_df = pd.DataFrame (res_df_cols, columns = ['LGA', "SA1", "percent_overlap", "source_data", "reapportioned_data"])
res_df

Unnamed: 0,LGA,SA1,percent_overlap,source_data,reapportioned_data
0,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,20.173726,118073000,2.381972e+07
1,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,79.826275,118073000,9.425328e+07
2,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,0.538743,39100,2.106484e+02
3,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,0.942639,39100,3.685719e+02
4,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,0.486730,39100,1.903116e+02
...,...,...,...,...,...
328,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,0.009745,55850000,5.442514e+03
329,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,0.009403,55850000,5.251614e+03
330,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,0.046452,55850000,2.594350e+04
331,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,0.012826,55850000,7.163146e+03
