In [19]:
import urllib.request
from pyparsing import ParseException
import rdflib
from rdflib import Graph
from rdflib import Namespace
import os.path
from collections import namedtuple


# Graph Helpers ---------------------------------
URL_OWL_V2 = 'https://www.staircase.co/ontology/'
URL_OWL_V2_PRODUCT = 'https://www.staircase.co/ontology/product/'
GH_TOKEN = 'AWEGHV4RRBN2EWIVAOB6CSLBUG6CA'

# Declare reusable Graph
_graph = None

#-- Read Graph from TTL --------
def read_graph():
    global _graph
    if not _graph:
        staircase_v2 = 'staircase-v2.ttl'
        if (not os.path.exists(staircase_v2)):
            print('Reading the file from GitHub')
            currURL = 'https://raw.githubusercontent.com/StaircaseAPI/translator/master/json-schemas/staircase-v2.ttl?token=' + GH_TOKEN
            with urllib.request.urlopen(currURL) as f:
                with open(staircase_v2, 'w') as file:
                    file.write(f.read().decode('utf-8'))    
        _graph = Graph()
        _graph.parse(staircase_v2, format='turtle')
        _graph.bind("sc", Namespace(URL_OWL_V2))
        _graph.bind("scp", Namespace(URL_OWL_V2_PRODUCT))
    return _graph  
#-- Read Graph from TTL ----end-


#-- Execute SPARQL Query -------
def exec_gquery(gquery):
    read_graph()
    try:
        gquery_result = _graph.query(gquery)
    except ParseException:
        raise ValueError(f"Error processing Graph query: {class_name}")
    return gquery_result
#-- Execute SPARQL Query ---end-
# Graph Helpers -----------------------------end-

print(len(g))


15634


In [33]:
# Queries against TTL ---------------------------
sparql_classes = """\
    SELECT ?name ?cname
    WHERE {
        ?name rdf:type owl:Class .
        ?name rdfs:label ?label .
        MINUS { ?name rdfs:subClassOf sc:referenceable_property } .
        OPTIONAL { ?name sc:container_name ?cname }
    }
    """

sparql_properties = """\
        SELECT ?name ?parentcls ?parentctn
        WHERE {
            ?parentcls rdfs:subClassOf ?anon .
            ?anon owl:onProperty ?name .
            ?name rdfs:subPropertyOf sc:has_referenceable_property .
            ?parentcls sc:container_name ?parentctn
        }
    """

sparql_relationships = """\
        SELECT ?name ?parentcls ?targetcls
        WHERE {
            ?parentcls rdfs:subClassOf ?anon .
            ?anon owl:onProperty ?name .
            ?name rdfs:subPropertyOf owl:topObjectProperty .
            ?anon owl:someValuesFrom ?targetcls .
            ?targetcls rdfs:label ?anything
        }
    """

def query_class_properties(graph, class_name):
    try:
        qres = graph.query(
            """\
            SELECT ?name
            WHERE {{
                VALUES ?cls {{ sc:{0} scp:{0} }}
                ?cls rdfs:subClassOf ?anon .
                ?anon owl:onProperty ?name .
                ?name rdfs:subPropertyOf sc:has_referenceable_property 
            }}""".format(
                class_name
            )
        )
    except ParseException:
        raise ValueError(f"Graph query input invalid: {class_name}")
    return (os.path.split(row[0].toPython())[1] for row in qres)


def query_class_relationships(graph, class_name):
    try:
        qres = graph.query(
            """\
            SELECT ?name
            WHERE {{
                VALUES ?cls {{ sc:{0} scp:{0} }}
                ?cls rdfs:subClassOf ?anon .
                ?anon owl:onProperty ?name .
                ?name rdfs:subPropertyOf owl:topObjectProperty
            }}""".format(
                class_name
            )
        )
    except ParseException:
        raise ValueError(f"Graph query input invalid: {class_name}")
    return (os.path.split(row[0].toPython())[1] for row in qres)


def query_class_subclasses(graph, class_name):
    try:
        qres = graph.query(
            """\
            SELECT ?name
            WHERE {{
                VALUES ?cls {{ sc:{0} scp:{0} }}
                ?name rdfs:subClassOf ?cls
            }}""".format(
                class_name
            )
        )
    except ParseException:
        raise ValueError(f"Graph query input invalid: {class_name}")
    return (os.path.split(row[0].toPython())[1] for row in qres)


def query_class_superclass(graph, class_name):
    try:
        qres = graph.query(
            """\
            SELECT ?name
            WHERE {{
                VALUES ?cls {{ sc:{0} scp:{0} }}
                ?cls rdfs:subClassOf ?name .
                ?name rdf:type owl:Class
            }}""".format(
                class_name
            )
        )
    except ParseException:
        raise ValueError(f"Graph query input invalid: {class_name}")
    return (os.path.split(row[0].toPython())[1] for row in qres)


def query_class_datasources(graph, class_name):
    try:
        qres = graph.query(
            """\
            SELECT ?field
            WHERE {{
                VALUES ?cls {{ sc:{0} scp:{0} }}
                ?cls rdfs:subClassOf ?anon .
                ?anon owl:onProperty sc:data_source_of .
                ?anon owl:someValuesFrom ?anon2 .
                ?anon2 owl:unionOf ?anon3 .
                ?anon3 rdf:rest*/rdf:first ?field .
            }}""".format(
                class_name
            )
        )
    except ParseException:
        raise ValueError(f"Graph query input invalid: {class_name}")
    return (os.path.split(row[0].toPython())[1] for row in qres)


def query_relationship_target(graph, relationship):
    try:
        qres = graph.query(
            """\
            SELECT ?name
            WHERE {{
                VALUES ?cls {{ sc:{0} scp:{0} }}
                ?anon owl:onProperty ?cls .
                ?anon owl:someValuesFrom ?name
                MINUS {{ ?name owl:unionOf ?unionid }} .
            }}""".format(
                relationship
            )
        )
    except ParseException:
        raise ValueError(f"Graph query input invalid: {relationship}")
    return (os.path.split(row[0].toPython())[1] for row in qres)


def query_relationship_origin(graph, relationship):
    try:
        qres = graph.query(
            """\
            SELECT ?name
            WHERE {{
                VALUES ?cls {{ sc:{0} scp:{0} }}
                ?anon owl:onProperty ?cls .
                ?name rdfs:subClassOf ?anon
            }}""".format(
                relationship
            )
        )
    except ParseException:
        raise ValueError(f"Graph query input invalid: {relationship}")
    return (os.path.split(row[0].toPython())[1] for row in qres)


query_property_origin = query_relationship_origin


def query_class_container(graph, name):
    try:
        qres = graph.query(
            """\
            SELECT ?name
            WHERE {{
                VALUES ?cls {{ sc:{0} scp:{0} }}
                ?cls rdf:type owl:Class .
                ?cls sc:container_name ?name
            }}""".format(
                name
            )
        )
    except ParseException:
        raise ValueError(f"Graph query input invalid: {name}")
    return (row[0].toPython() for row in qres)

# Queries against TTL -----------------------end-  

In [34]:
# Extract data from TTL -------------------------

def get_classes():
    for row in exec_gquery(sparql_classes):
        print(f"{os.path.split(row[0].toPython())[1]}, {row[1].toPython() if row[1] else None}")

def get_properties():
    for row in exec_gquery(sparql_properties):
        print(f"{os.path.split(row[0].toPython())[1], os.path.split(row[1].toPython())[1], row[2].toPython()}")

def get_relationships():
    for row in exec_gquery(sparql_relationships):
        print(f"{os.path.split(row[0].toPython())[1], os.path.split(row[1].toPython())[1], os.path.split(row[2].toPython())[1]}")
        
        
#get_classes()        
#get_properties()
get_relationships()

# Extract data from TTL ---------------------end-

('data_source_of', 'document', 'referenceable_property')
('owes_liability', 'borrower', 'liability')
('with_depository_account_inactivity', 'depository_account', 'depository_account_inactivity')
('with_income', 'income_tax', 'income')
('with_data_extraction_metadata', 'referenceable_property', 'data_extraction_metadata')
('with_credit_risk', 'borrower', 'credit_score_information')
('with_servicing_information', 'loan', 'servicing_information')
('with_escrow_disclosure', 'escrow', 'escrow_disclosure')
('with_notes', 'loan_product', 'notes')
('with_fees', 'loan', 'loan_fees')
('serviced_by', 'loan', 'servicer')
('with_payment_information', 'loan', 'loan_payment_information')
('with_product', 'loan', 'loan_product')
('with_adjustment', 'income', 'income_adjustment')
('with_adjustment', 'income_tax', 'income_adjustment')
('with_arm_adjustment', 'loan', 'arm_adjustment')
('with_down_payment', 'loan', 'down_payment')
('owed_by', 'liability', 'person')
('with_closing_information', 'loan', 'cl

In [None]:
get_relationships()