In [1]:
import pandas as pd
import datetime
#need to update this to map parent role instead of subrole

In [2]:
#where role_capital.csv is the csv conversion of the reconciliation sheet provided by TH
role_df = pd.read_csv("role_capital.csv")

# RDF transformation

## Variables for IRI generation

In [3]:
#define key namespaces to be used in the mapping
famo = "http://ontology.eil.utoronto.ca/FAMO/famo/"
ex = "http://example.com/data/"
rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
owl = "http://www.w3.org/2002/07/owl#"

#current datetime to be used for identifier suffix
dt_now = datetime.datetime.now().strftime("%d%b%Y_%H%M%S")

## Object mapping
Consider what key objects are represented in each row of data. Are the IRIs included in the dataset, or do we need to generate them?
For each object in each row, break down its mapping by assertion of its existence, and assertion of any properties identified in the data.

**TODO** test for null values in all columns before mapping

In [4]:
#these lists will store all of the text to be exported as a ttl files
ttl_list = []
ttl_nothing_list = []
#prefixes
ttl_list.append(f"PREFIX famo: <{famo}>"+'\n')
ttl_list.append(f"PREFIX ex: <{ex}>"+'\n')
ttl_list.append(f"PREFIX rdf: <{rdf}>"+'\n')
ttl_list.append(f"PREFIX owl: <{owl}>"+'\n')
ttl_nothing_list.append(f"PREFIX famo: <{famo}>"+'\n')
ttl_nothing_list.append(f"PREFIX ex: <{ex}>"+'\n')
ttl_nothing_list.append(f"PREFIX rdf: <{rdf}>"+'\n')
ttl_nothing_list.append(f"PREFIX owl: <{owl}>"+'\n')

#note: in theory - it would make more sense for the data to contain the full DHE IRI (as stored in the datahub), not the abbreviation without the namespace
#for now, we'll add the ex:namespace back in, but in an end to end implementation the export (from the data hub) would likely contain the raw IRI and would not need to be modified
#associated validation activity
#if we break down the activity into more levels of detail across the entries we'll need to include this in the loop, but for now a single statement is sufficient
ttl_list.append(f"ex:{role_df['Project DHE ID'][0]} rdf:type famo:Project."+'\n')

#for each row in the dataset
for i in range(0,len(role_df)):
    #the role is in the project scope
    ttl_list.append(f"ex:{role_df['Role DHE URI'][i]} famo:inProjectScope ex:{role_df['Project DHE ID'][i]}."+'\n')
#Changes to Roles During the Project
    #1. Role remains
    if (role_df['Changes to Roles During the Project'][i] == '1. Role remains'):
        ttl_list.append(f"ex:{role_df['Project DHE ID'][i]} famo:hasOutcomeProperty ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now}." + '\n')
        ttl_list.append(f"ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now} a famo:StatusRelation." + '\n')
        ttl_list.append(f"ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now} famo:hasSubject ex:{role_df['Role DHE URI'][i]}." + '\n')
        ttl_list.append(f"ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now} famo:hasObject ex:role_status_{role_df['Role DHE URI'][i]}_{dt_now}." + '\n')
        ttl_list.append(f"ex:role_status_{role_df['Role DHE URI'][i]}_{dt_now} a famo:RealizedRoleStatus." + '\n')
    #2. Role is removed
    elif (role_df['Changes to Roles During the Project'][i] == '2. Role is removed'):
        ttl_list.append(f"ex:{role_df['Project DHE ID'][i]} famo:hasOutcomeProperty ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now}." + '\n')
        ttl_list.append(f"ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now} a famo:StatusRelation." + '\n')
        ttl_list.append(f"ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now} famo:hasSubject ex:{role_df['Role DHE URI'][i]}." + '\n')
        ttl_list.append(f"ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now} famo:hasObject ex:role_status_{role_df['Role DHE URI'][i]}_{dt_now}." + '\n')
        ttl_list.append(f"ex:role_status_{role_df['Role DHE URI'][i]}_{dt_now} a famo:EliminatedRoleStatus." + '\n')
    #3. Role is new
    elif (role_df['Changes to Roles During the Project'][i] == '3. Role is new'):
        ttl_list.append(f"ex:{role_df['Project DHE ID'][i]} famo:hasOutcomeProperty ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now}." + '\n')
        ttl_list.append(f"ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now} a famo:StatusRelation." + '\n')
        ttl_list.append(f"ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now} famo:hasSubject ex:{role_df['Role DHE URI'][i]}." + '\n')
        ttl_list.append(f"ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now} famo:hasObject ex:role_status_{role_df['Role DHE URI'][i]}_{dt_now}." + '\n')
        ttl_list.append(f"ex:role_status_{role_df['Role DHE URI'][i]}_{dt_now} a famo:RealizedRoleStatus." + '\n')
    #4. Role is planned
    elif (role_df['Changes to Roles During the Project'][i] == '4. Role is planned'):
        ttl_list.append(f"ex:{role_df['Project DHE ID'][i]} famo:hasOutcomeProperty ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now}." + '\n')
        ttl_list.append(f"ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now} a famo:StatusRelation." + '\n')
        ttl_list.append(f"ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now} famo:hasSubject ex:{role_df['Role DHE URI'][i]}." + '\n')
        ttl_list.append(f"ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now} famo:hasObject ex:role_status_{role_df['Role DHE URI'][i]}_{dt_now}." + '\n')
        ttl_list.append(f"ex:role_status_{role_df['Role DHE URI'][i]}_{dt_now} a famo:SpecifiedRoleStatus." + '\n')
    #null
    elif (pd.isna(role_df['Changes to Roles During the Project'][i])):
        ttl_list.append(f"ex:{role_df['Project DHE ID'][i]} famo:hasOutcomeProperty ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now}." + '\n')
        ttl_list.append(f"ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now} a famo:StatusRelation." + '\n')
        ttl_list.append(f"ex:role_status_relation_{role_df['Role DHE URI'][i]}_{dt_now} famo:hasSubject ex:{role_df['Role DHE URI'][i]}." + '\n')
        
#Post-project Asset Occupant DHE URI
    #Nothing
    if (role_df['Post-project Asset Occupant DHE URI'][i] == '#Nothing'):
        ttl_list.append(f"ex:{role_df['Role DHE URI'][i]} famo:subjectOf ex:role_occ_relation_{role_df['Role DHE URI'][i]}_{dt_now}." +'\n')
        ttl_list.append(f"ex:role_occ_relation_{role_df['Role DHE URI'][i]}_{dt_now} a famo:RoleOccupationRelation."+'\n')
        ttl_list.append(f"ex:{role_df['Project DHE ID'][i]} famo:hasOutcomeProperty ex:role_occ_relation_{role_df['Role DHE URI'][i]}_{dt_now}."+'\n')
        ttl_nothing_list.append(f"ex:role_occ_relation_{role_df['Role DHE URI'][i]}_{dt_now} famo:hasObject famo:nothing."+'\n')
    #NK (null)
    elif (pd.isna(role_df['Post-project Asset Occupant DHE URI'][i])):
        ttl_list.append(f"ex:{role_df['Role DHE URI'][i]} famo:subjectOf ex:role_occ_relation_{role_df['Role DHE URI'][i]}_{dt_now}." +'\n')
        ttl_list.append(f"ex:role_occ_relation_{role_df['Role DHE URI'][i]}_{dt_now} a famo:RoleOccupationRelation."+'\n')
        ttl_list.append(f"ex:{role_df['Project DHE ID'][i]} famo:hasOutcomeProperty ex:role_occ_relation_{role_df['Role DHE URI'][i]}_{dt_now}."+'\n')
    #not null
    elif (pd.notna(role_df['Post-project Asset Occupant DHE URI'][i])):
        ttl_list.append(f"ex:{role_df['Role DHE URI'][i]} famo:subjectOf ex:role_occ_relation_{role_df['Role DHE URI'][i]}_{dt_now}." +'\n')
        ttl_list.append(f"ex:role_occ_relation_{role_df['Role DHE URI'][i]}_{dt_now} a famo:RoleOccupationRelation."+'\n')
        ttl_list.append(f"ex:{role_df['Project DHE ID'][i]} famo:hasOutcomeProperty ex:role_occ_relation_{role_df['Role DHE URI'][i]}_{dt_now}."+'\n')
        ttl_list.append(f"ex:role_occ_relation_{role_df['Role DHE URI'][i]}_{dt_now} famo:hasObject ex:{role_df['Post-project Asset Occupant DHE URI'][i]}."+'\n')

In [5]:
#append all of the text in ttl_list to the file role_capital_output.ttl
ttl_results = open("role_capital_output.ttl", "a")
ttl_results.writelines(ttl_list)

ttl_results.close()

#append all "nothing results" in ttl_nothing_list to the file role_capital_nothing_output.ttl
ttl_nothing_results = open("role_capital_nothing_output.ttl", "a")
ttl_nothing_results.writelines(ttl_nothing_list)

ttl_nothing_results.close()