In [1]:
import pandas as pd
import datetime
#need to update this to map parent role instead of subrole

In [2]:
#where role_reconciliation.csv is the csv conversion of the reconciliation sheet provided by TH
rolerecon_df = pd.read_csv("role_reconciliation.csv")

# RDF transformation

## Variables for IRI generation

In [3]:
#define key namespaces to be used in the mapping
famo = "http://ontology.eil.utoronto.ca/FAMO/famo/"
ex = "http://example.com/data/"
rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
owl = "http://www.w3.org/2002/07/owl#"

#current datetime to be used for identifier suffix
dt_now = datetime.datetime.now().strftime("%d%b%Y_%H%M%S")

## Object mapping
Consider what key objects are represented in each row of data. Are the IRIs included in the dataset, or do we need to generate them?
For each object in each row, break down its mapping by assertion of its existence, and assertion of any properties identified in the data.

**TODO** test for null values in all columns before mapping

In [4]:
#these lists will store all of the text to be exported as a ttl files
ttl_list = []
ttl_nothing_list = []
#prefixes
ttl_list.append(f"PREFIX famo: <{famo}>"+'\n')
ttl_list.append(f"PREFIX ex: <{ex}>"+'\n')
ttl_list.append(f"PREFIX rdf: <{rdf}>"+'\n')
ttl_list.append(f"PREFIX owl: <{owl}>"+'\n')
ttl_nothing_list.append(f"PREFIX famo: <{famo}>"+'\n')
ttl_nothing_list.append(f"PREFIX ex: <{ex}>"+'\n')
ttl_nothing_list.append(f"PREFIX rdf: <{rdf}>"+'\n')
ttl_nothing_list.append(f"PREFIX owl: <{owl}>"+'\n')

#note: in theory - it would make more sense for the data to contain the full DHE IRI (as stored in the datahub), not the abbreviation without the namespace
#for now, we'll add the ex:namespace back in, but in an end to end implementation the export (from the data hub) would likely contain the raw IRI and would not need to be modified
#associated validation activity
#if we break down the activity into more levels of detail across the entries we'll need to include this in the loop, but for now a single statement is sufficient
ttl_list.append(f"ex:{rolerecon_df['Project DHE ID'][0]} rdf:type famo:Project."+'\n')

#for each row in the dataset
for i in range(0,len(rolerecon_df)):
    #the role is in the project scope
    ttl_list.append(f"ex:{rolerecon_df['Role DHE URI'][i]} famo:inProjectScope ex:{rolerecon_df['Project DHE ID'][i]}."+'\n')
#parent role record, if applicable
    if (pd.notna(rolerecon_df['Parent Role DHE URI'][i])):
        ttl_list.append(f"ex:role_{rolerecon_df['Role DHE URI'][i]} famo:subjectOf ex:roleincl_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." +'\n')
        ttl_list.append(f"ex:roleincl_{rolerecon_df['Parent Role DHE URI'][i]}_{dt_now} a famo:RoleInclusionRelation."+'\n')
        ttl_list.append(f"ex:roleincl_{rolerecon_df['Parent Role DHE URI'][i]}_{dt_now} famo:hasObject ex:{rolerecon_df['Parent Role DHE URI'][i]}."+'\n')
        ttl_list.append(f"{rolerecon_df['Project DHE ID'][i]} famo:leadsToInformation ex:roleincl_{rolerecon_df['Parent Role DHE URI'][i]}_{dt_now}."+'\n')
        ttl_list.append(f"ex:{rolerecon_df['Parent Role DHE URI'][i]} famo:inProjectScope ex:{rolerecon_df['Project DHE ID'][i]}."+'\n')
#role canonical ID, if applicable
    if (pd.notna(rolerecon_df['Role Canonical ID'][i])):
        ttl_list.append(f"ex:{rolerecon_df['Role DHE URI'][i]} famo:subjectOf ex:role_enum_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." +'\n')
        ttl_list.append(f"ex:role_enum_{rolerecon_df['Role DHE URI'][i]}_{dt_now} a famo:EntityNumberRelation."+'\n')
        ttl_list.append(f"ex:role_enum_{rolerecon_df['Role DHE URI'][i]}_{dt_now} famo:hasObject ex:enum_{rolerecon_df['Role DHE URI'][i]}_{dt_now}."+'\n')
        ttl_list.append(f"ex:enum_{rolerecon_df['Role DHE URI'][i]}_{dt_now} genprop:hasIdentifier \"{rolerecon_df['Role Canonical ID'][i]}\"." + '\n')
        ttl_list.append(f"ex:{rolerecon_df['Project DHE ID'][i]} famo:leadsToInformation ex:enum_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." + '\n')
#existence of role
    #1. Role exists in reality
    if (rolerecon_df['Field Verified Existance of Role'][i] == '1. Role exists in reality'):
        ttl_list.append(f"ex:{rolerecon_df['Project DHE ID'][i]} famo:finds ex:role_status_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." + '\n')
        ttl_list.append(f"ex:role_status_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} a famo:StatusRelation." + '\n')
        ttl_list.append(f"ex:role_status_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} famo:hasSubject ex:{rolerecon_df['Role DHE URI'][i]}." + '\n')
        ttl_list.append(f"ex:role_status_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} famo:hasObject ex:role_status_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." + '\n')
        ttl_list.append(f"ex:role_status_{rolerecon_df['Role DHE URI'][i]}_{dt_now} a famo:ExistentRoleStatus." + '\n')
    #2. Role does not exist in reality
    elif (rolerecon_df['Field Verified Existance of Role'][i] == '2. Role does not exist in reality'):
        ttl_list.append(f"ex:{rolerecon_df['Project DHE ID'][i]} famo:finds ex:role_status_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." + '\n')
        ttl_list.append(f"ex:role_status_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} a famo:StatusRelation." + '\n')
        ttl_list.append(f"ex:role_status_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} famo:hasSubject ex:{rolerecon_df['Role DHE URI'][i]}." + '\n')
        ttl_list.append(f"ex:role_status_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} famo:hasObject ex:role_status_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." + '\n')
        ttl_list.append(f"ex:role_status_{rolerecon_df['Role DHE URI'][i]}_{dt_now} a famo:NonExistentRoleStatus." + '\n')
    #NK-triedButUnableToConfirm
    elif (rolerecon_df['Field Verified Existance of Role'][i] == '#NK-triedButUnableToConfirm'):
        ttl_list.append(f"ex:{rolerecon_df['Project DHE ID'][i]} famo:unableToAccess ex:role_status_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." + '\n')
        ttl_list.append(f"ex:role_status_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} a famo:StatusRelation." + '\n')
        ttl_list.append(f"ex:role_status_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} famo:hasSubject ex:{rolerecon_df['Role DHE URI'][i]}." + '\n')
        
#Field Verified Asset Occupant DHE URI
    #Nothing
    if (rolerecon_df['Field Verifed Asset Occupant DHE URI'][i] == '#Nothing'):
        ttl_list.append(f"ex:{rolerecon_df['Role DHE URI'][i]} famo:subjectOf ex:role_occ_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." +'\n')
        ttl_list.append(f"ex:role_occ_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} a famo:RoleOccupationRelation."+'\n')
        ttl_list.append(f"ex:{rolerecon_df['Project DHE ID'][i]} famo:finds ex:role_occ_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}."+'\n')
        ttl_nothing_list.append(f"ex:role_occ_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} famo:hasObject famo:nothing."+'\n')
    #NK-triedButUnableToConfirm
    elif (rolerecon_df['Field Verifed Asset Occupant DHE URI'][i] == '#NK-triedButUnableToConfirm'):
        ttl_list.append(f"ex:{rolerecon_df['Role DHE URI'][i]} famo:subjectOf ex:role_occ_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." +'\n')
        ttl_list.append(f"ex:role_occ_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} a famo:RoleOccupationRelation."+'\n')
        ttl_list.append(f"ex:{rolerecon_df['Project DHE ID'][i]} famo:unableToAccess ex:role_occ_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}."+'\n')
    #not null
    elif (pd.notna(rolerecon_df['Field Verifed Asset Occupant DHE URI'][i])):
        ttl_list.append(f"ex:{rolerecon_df['Role DHE URI'][i]} famo:subjectOf ex:role_occ_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." +'\n')
        ttl_list.append(f"ex:role_occ_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} a famo:RoleOccupationRelation."+'\n')
        ttl_list.append(f"ex:{rolerecon_df['Project DHE ID'][i]} famo:finds ex:role_occ_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}."+'\n')
        ttl_list.append(f"ex:role_occ_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} famo:hasObject ex:{rolerecon_df['Field Verifed Asset Occupant DHE URI'][i]}."+'\n')
    #no mapping for null values
#Field Verified Role ID physically printed on Tag
    #Nothing
    if (rolerecon_df['Field Verified Role ID physically printed on Tag'][i] == '#Nothing'):
        ttl_list.append(f"ex:{rolerecon_df['Role DHE URI'][i]} famo:subjectOf ex:roleid_on_tag_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." +'\n')
        ttl_list.append(f"ex:roleid_on_tag_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} a famo:EntityNumberPrintedOnTagRelation."+'\n')
        ttl_list.append(f"ex:{rolerecon_df['Project DHE ID'][i]} famo:finds ex:roleid_on_tag_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}."+'\n')
        ttl_nothing_list.append(f"ex:roleid_on_tag_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} famo:hasObject famo:nothing."+'\n')
    #NK-triedButUnableToConfirm
    if (rolerecon_df['Field Verified Role ID physically printed on Tag'][i] == '#NK-triedButUnableToConfirm'):
        ttl_list.append(f"ex:{rolerecon_df['Role DHE URI'][i]} famo:subjectOf ex:roleid_on_tag_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." +'\n')
        ttl_list.append(f"ex:roleid_on_tag_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} a famo:EntityNumberPrintedOnTagRelation."+'\n')
        ttl_list.append(f"ex:{rolerecon_df['Project DHE ID'][i]} famo:unableToAccess ex:roleid_on_tag_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}."+'\n')
    #not null
    elif (pd.notna(rolerecon_df['Field Verified Role ID physically printed on Tag'][i])):
        ttl_list.append(f"ex:{rolerecon_df['Role DHE URI'][i]} famo:subjectOf ex:roleid_on_tag_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." +'\n')
        ttl_list.append(f"ex:roleid_on_tag_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} a famo:EntityNumberPrintedOnTagRelation."+'\n')
        ttl_list.append(f"ex:{rolerecon_df['Project DHE ID'][i]} famo:finds ex:roleid_on_tag_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}."+'\n')
        ttl_list.append(f"ex:roleid_on_tag_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} famo:hasObject ex:enum_ontag_{rolerecon_df['Role DHE URI'][i]}_{dt_now}."+'\n')
        ttl_list.append(f"ex:enum_ontag_{rolerecon_df['Role DHE URI'][i]}_{dt_now} genprop:hasIdentifier \"{rolerecon_df['Field Verified Role ID physically printed on Tag'][i]}\"." + '\n')
#Field Verified Role Spatial Location DHE URI
    #Nothing
    if (rolerecon_df['Field Verified Role Spatial Location DHE URI'][i] == '#Nothing'):
        ttl_list.append(f"ex:{rolerecon_df['Role DHE URI'][i]} famo:subjectOf ex:role_loc_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." +'\n')
        ttl_list.append(f"ex:role_loc_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} a famo:RoleLocationRelation."+'\n')
        ttl_list.append(f"ex:{rolerecon_df['Project DHE ID'][i]} famo:finds ex:role_loc_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}."+'\n')
        ttl_nothing_list.append(f"ex:role_loc_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} famo:hasObject famo:nothing."+'\n')
    #NK-triedButUnableToConfirm
    if (rolerecon_df['Field Verified Role Spatial Location DHE URI'][i] == '#NK-triedButUnableToConfirm'):
        ttl_list.append(f"ex:{rolerecon_df['Role DHE URI'][i]} famo:subjectOf ex:role_loc_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." +'\n')
        ttl_list.append(f"ex:role_loc_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} a famo:RoleLocationRelation."+'\n')
        ttl_list.append(f"ex:{rolerecon_df['Project DHE ID'][i]} famo:unableToAccess ex:role_loc_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}."+'\n')
    #not null
    elif (pd.notna(rolerecon_df['Field Verified Role Spatial Location DHE URI'][i])):
        ttl_list.append(f"ex:{rolerecon_df['Role DHE URI'][i]} famo:subjectOf ex:role_loc_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}." +'\n')
        ttl_list.append(f"ex:role_loc_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} a famo:RoleLocationRelation."+'\n')
        ttl_list.append(f"ex:{rolerecon_df['Project DHE ID'][i]} famo:finds ex:role_loc_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now}."+'\n')
        ttl_list.append(f"ex:role_loc_relation_{rolerecon_df['Role DHE URI'][i]}_{dt_now} famo:hasObject ex:{rolerecon_df['Field Verified Role Spatial Location DHE URI'][i]}."+'\n')

In [5]:
#append all of the text in ttl_list to the file role_reconciliation_output.ttl
ttl_results = open("role_reconciliation_output.ttl", "a")
ttl_results.writelines(ttl_list)

ttl_results.close()

#append all "nothing results" in ttl_nothing_list to the file role_reconciliation_nothing_output.ttl
ttl_nothing_results = open("role_reconciliation_nothing_output.ttl", "a")
ttl_nothing_results.writelines(ttl_nothing_list)

ttl_nothing_results.close()