In [1]:
!pip install git+https://github.com/jyucsiro/loci-scripts.git@feat-mar2020-cache
!pip install numpy pandas

Collecting git+https://github.com/jyucsiro/loci-scripts.git@feat-mar2020-cache
  Cloning https://github.com/jyucsiro/loci-scripts.git (to revision feat-mar2020-cache) to /tmp/pip-req-build-0fqmq_ls
  Running command git clone -q https://github.com/jyucsiro/loci-scripts.git /tmp/pip-req-build-0fqmq_ls
  Running command git checkout -b feat-mar2020-cache --track origin/feat-mar2020-cache
  Switched to a new branch 'feat-mar2020-cache'
  Branch 'feat-mar2020-cache' set up to track remote branch 'feat-mar2020-cache' from 'origin'.
  Running command git submodule update --init --recursive -q
Collecting XlsxWriter
  Downloading XlsxWriter-1.2.8-py2.py3-none-any.whl (141 kB)
[K     |████████████████████████████████| 141 kB 796 kB/s eta 0:00:01
[?25hCollecting SPARQLWrapper<=1.8.5
  Downloading SPARQLWrapper-1.8.5-py3-none-any.whl (26 kB)
Collecting rdflib<=4.2.2
  Downloading rdflib-4.2.2-py3-none-any.whl (344 kB)
[K     |████████████████████████████████| 344 kB 2.0 MB/s eta 0:00:01
[?25h

# Reapportioning ASGS16 LGA to SA1  example using the pyloci API

This notebook show how you would perform reapportioning using the [pyloci](https://pypi.org/project/pyloci/) library using a simple CSV file. The pyloci library interfaces directly with the SPARQL API for the Loc-I Cache GraphDB.

In [41]:
import numpy as np
import pandas as pd
df = pd.read_csv('loci-lga-sa1-test-case1.csv', delimiter = ',')
#show the csv file read in
df

Unnamed: 0,LGA,Population,Building count
0,http://linked.data.gov.au/dataset/asgs2016/loc...,51,81
1,http://linked.data.gov.au/dataset/asgs2016/loc...,964,593
2,http://linked.data.gov.au/dataset/asgs2016/loc...,16796,6353
3,http://linked.data.gov.au/dataset/asgs2016/loc...,28691,11214
4,http://linked.data.gov.au/dataset/asgs2016/loc...,38948,16782
5,http://linked.data.gov.au/dataset/asgs2016/loc...,44165,17398
6,http://linked.data.gov.au/dataset/asgs2016/loc...,25735,8138


In [42]:
#sure the config for the SPARQL endpoint to hit is set
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
print(os.getenv("SPARQL_ENDPOINT"))

http://db.loci.cat/repositories/loci-cache


In [43]:
#import the reapportioning module from pyloci
from pyloci import reapportioning


from pyloci.api.util import Util as API_Util
from pyloci.sparql import util as sparql_util
import time

auth = None
api_util = API_Util()


In [44]:
toFeatureType = "http://linked.data.gov.au/def/asgs#StatisticalAreaLevel1"
LOCI_INTEGRATION_API = "https://test-api.loci.cat/api/v1"

In [46]:
res_df_cols = {
    "LGA" : [],
    "SA1" : [],
    "percent_overlap": [],
    "source_data": [],
    "reapportioned_data": [],
}
target_col = "Building count"
for index, row in df.iterrows():
    fromFeature = row['LGA']
    print(row['LGA'], row[target_col])
    print("Querying overlaps of {} to {}...".format(fromFeature, toFeatureType))
    tic = time.perf_counter()
    list_locations = api_util.query_api_location_overlaps(fromFeature, toFeatureType, LOCI_INTEGRATION_API, crosswalk='true')
    toc = time.perf_counter()
    print(f"query_api_location_overlaps took {toc - tic:0.4f} seconds")
    #print(list_locations)
    for o in list_locations['overlaps']:
        res_df_cols["LGA"].append(fromFeature)
        res_df_cols["SA1"].append(o['uri'])
        res_df_cols["percent_overlap"].append(o['forwardPercentage'])
        res_df_cols["source_data"].append(row[target_col])
        reapportioned = (float(o['forwardPercentage'])/100.0)*float(row[target_col])
        res_df_cols["reapportioned_data"].append(reapportioned)

        

http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/44000 81
Querying overlaps of http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/44000 to http://linked.data.gov.au/def/asgs#StatisticalAreaLevel1...
query_api_location_overlaps took 0.5424 seconds
http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/55880 593
Querying overlaps of http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/55880 to http://linked.data.gov.au/def/asgs#StatisticalAreaLevel1...
query_api_location_overlaps took 2.2262 seconds
http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/40070 6353
Querying overlaps of http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/40070 to http://linked.data.gov.au/def/asgs#StatisticalAreaLevel1...
query_api_location_overlaps took 8.9448 seconds
http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/40120 11214
Querying overlaps of http://linked.data.gov.au/dataset/asgs2016/localgovernmentarea/40120 to http://lin

In [47]:
res_df = pd.DataFrame (res_df_cols, columns = ['LGA', "SA1", "percent_overlap", "source_data", "reapportioned_data"])
res_df

Unnamed: 0,LGA,SA1,percent_overlap,source_data,reapportioned_data
0,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,0.006331,81,0.005128
1,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,99.993669,81,80.994872
2,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,20.173726,593,119.630197
3,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,79.826275,593,473.369811
4,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,0.538743,6353,34.226317
...,...,...,...,...,...
474,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,3.756180,8138,305.677893
475,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,5.691511,8138,463.175169
476,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,10.927236,8138,889.258466
477,http://linked.data.gov.au/dataset/asgs2016/loc...,http://linked.data.gov.au/dataset/asgs2016/sta...,0.069604,8138,5.664335
