# Linking IM Models to the SDM Ontology (RDF)

In [1]:
### Necessary imports
import pandas as pd
from rdflib import Graph, RDF, Namespace, Literal, URIRef
import re, hashlib
import math, sympy

In [2]:
import os
print(os.getcwd())

C:\Users\loafo\Documenten\University\Courses, Lectures\Thesis Project\Ontology


## Transforming TSV to RDF (rdflib)

The following cell contains some helper-functions that enable the user to import, analyse, edit, and export an ontology in turtle syntax (RDF triples). <br> Using these functions, new instances can be added to the SDM ontology for any specific model. 

In [3]:
# Loads the data from a certain file given as input in Turtle syntax into the Graph g  
# -------------------------
def load_graph(filename):
    with open(filename, 'r') as f:
        g.parse(f, format='turtle')
        

# Prints a certain graph given as input in Turtle syntax
# -------------------------
def serialize_graph(myGraph):
     print(myGraph.serialize(format='turtle'))
        

# Saves the Graph g in Turtle syntax to a certain file given as input
# -------------------------
def save_graph(myGraph, filename):
    with open(filename, 'w') as f:
        myGraph.serialize(filename, format='turtle')
        
    
# Changes the namespace of a certain URI given as input to a DBpedia URI 
# Example: transformToDBR("http://example.com/kad2020/Amsterdam") returns "http://dbpedia.org/resource/Amsterdam"
# -------------------------
def transformToDBR(uri):
    if isinstance(uri, Literal):
        # changes the literal to uppercase so that the object with the same name refers to an object and not the string
        return uri.upper()
    components = g.namespace_manager.compute_qname(uri)
    return "http://dbpedia.org/resource/%s"%(components[2])

# -------------------------
g = Graph()

load_graph('SDMontology_turtle.ttl')

# serialize_graph(g)

### Loading the TSV data

In [4]:
### Adapt the paths to point to the tsv files on your local machine

# General data about the Insight file (model)
insight = pd.read_csv('Climate Change/modelData.tsv', sep='\t', header=None, quotechar="~")
insight.columns = ['macros', 'solutionAlgorithm', 'timeStart', 'timeUnit', 'timeLength', 'timeStep']
insight = insight.drop([0]) # Drop header from df
insight

Unnamed: 0,macros,solutionAlgorithm,timeStart,timeUnit,timeLength,timeStep
1,# Scaling factors for energy. Scaling by time ...,,,,,
2,scaledWatts <- {1 1/Seconds} / (TimeStep() * {...,,,,,
3,scaledJoules <- TimeStep() * {1 1/Seconds},,,,,
4,# Conversion Factor to convert CO2 Mass in the...,,,,,
5,CO2MassToPPM <- {7.82e9 Tons CO2/PPM CO2},,,,,
6,# The radius of the Earth,,,,,
7,EarthRadius <- {6.371e6 Meters},,,,,
8,# Energy from the Sun per Square Meter,,,,,
9,SolarFlux <- {1367 Watts/Meters^2},,,,,
10,# Stefan-Boltzmann constant,,,,,


In [5]:
# Stocks 
stocks = pd.read_csv('Climate Change/stockData.tsv', sep='\t', header=None, quotechar="~")
stocks.columns = ['id', 'name', 'initialValue', 'unit', 'delay', 'nonNegative', 'note']
stocks = stocks.drop([0]) # Drop header from df
stocks

Unnamed: 0,id,name,initialValue,unit,delay,nonNegative,note
1,55,Earth Energy,[e0] * scaledJoules,Joules,10,False,
2,58,Atmospheric Energy,[e0] * scaledJoules,Joules,10,False,
3,66,Atmospheric Carbon Dioxide,[Historical CO2 PPM] * CO2MassToPPM,Tons CO2,10,False,


In [6]:
# Flows
flows = pd.read_csv('Climate Change/flowData.tsv', sep='\t', header=None, quotechar="~")
flows.columns = ['id', 'name', 'flowRate', 'unit', 'onlyPositive', 'startStock', 'endStock', 'note']
flows = flows.drop([0])
flows

Unnamed: 0,id,name,flowRate,unit,onlyPositive,startStock,endStock,note
1,56,Solar Radiation,SolarFlux * pi * EarthRadius^2,Watts,True,,55.0,"Solar radiation, does not interact with the at..."
2,60,Radiation Down,[Atmosphere Radiation] / 2,Watts,True,58.0,55.0,
3,61,Pass Through,[Atmospheric Energy] * [Emissivity] * scaledWatts,Watts,True,58.0,,
4,59,Reflection,[Solar Radiation] * [Earth Albedo],Watts,True,55.0,,Solar radiation reflect by the earth (or atmos...
5,89,Radiation,[Earth Energy] * scaledWatts,Watts,True,55.0,58.0,
6,93,Radiation Up,[Atmosphere Radiation] / 2,Watts,True,58.0,,
7,67,Pollution,if years <= 2021 then [Historical Fossil Emis...,Tons CO2 / Year,True,,66.0,
8,71,Land Sink,if years <= 2021 then [Historical Land Sink]e...,Tons CO2 / Year,True,66.0,,"Source: <a href=""https://www.wri.org/insights/..."
9,130,Ocean Sink,if years <= 2021 then [Historical Ocean Sink]...,Tons CO2 / Year,True,66.0,,
10,131,Land-Use Change,if years <= 2021 then [Historical Land-Use Ch...,Tons CO2 / Year,True,,66.0,


In [7]:
# Variables
variables = pd.read_csv('Climate Change/variableData.tsv', sep='\t', header=None, quotechar="~")
variables.columns = ['id', 'name', 'equation', 'unit', 'note']
variables = variables.drop([0])
variables

Unnamed: 0,id,name,equation,unit,note
1,80,Global Temperature,KelvinToCelsius([Absolute Temperature]),Degrees Celsius,
2,91,Absolute Temperature,([Radiation] / (Boltzmann * (EarthRadius^2 * p...,Degrees Kelvin,
3,96,CO2 PPM,[Atmospheric Carbon Dioxide] / CO2MassToPPM,PPM CO2,
4,101,Atmosphere Radiation,[Atmospheric Energy] * (1 - [Emissivity]) * sc...,Watts,
5,111,Earth Albedo,.3,Unitless,
6,118,Pollution Emissions,36.7,Gigatons CO2 / Year,Rate of fossil fuel pollution starting in 2022.
7,124,Land Sink Rate,12.64,Gigatons CO2 / Year,"<span style=""letter-spacing: 0.39996px"">Rate o..."
8,140,Ocean Sink Rate,10.55,Gigatons CO2 / Year,"<span style=""letter-spacing: 0.39996px;"">Rate ..."
9,141,Land-Use Change Emissions,3.94,Gigatons CO2 / Year,"<span style=""letter-spacing: 0.39996px"">Rate o..."
10,144,e0,(SolarFlux * pi * EarthRadius^2) * (1 - [Earth...,Joules,Steady state initial energy used to initialize...


In [8]:
# Converters
converters = pd.read_csv('Climate Change/converterData.tsv', sep='\t', header=None, quotechar="~")
converters.columns = ['id', 'name', 'note', 'inputSource', 'data', 'unit', 'interpolation']
converters = converters.drop([0])
# converters = converters.reset_index()
converters

Unnamed: 0,id,name,note,inputSource,data,unit,interpolation
1,95,Emissivity,"Source:&nbsp;<a href=""https://www.gresham.ac.u...",96,"[{""x"":200,""y"":0.194},{""x"":400,""y"":0.14},{""x"":6...",Unitless,Linear
2,106,Historical Fossil Emissions,"<span style=""letter-spacing: 0.39996px;"">Sourc...",Time,"[{""x"":1959,""y"":8.86},{""x"":1960,""y"":9.39},{""x"":...",Gigatons CO2 / Year,Linear
3,110,Historical Global Temperature Anomaly,"Source:&nbsp;<a href=""https://www.statista.com...",Time,"[{""x"":1880,""y"":-0.48},{""x"":1881,""y"":-0.38},{""x...",Degrees Celsius,Linear
4,120,Historical CO2 PPM,"Source:&nbsp;<a href=""https://www.statista.com...",Time,"[{""x"":1959,""y"":315.98},{""x"":1960,""y"":316.91},{...",PPM CO2,Linear
5,133,Historical Land-Use Change Emissions,"<span style=""letter-spacing: 0.39996px;"">Sourc...",Time,"[{""x"":1959,""y"":7.1},{""x"":1960,""y"":6.57},{""x"":1...",Gigatons CO2 / Year,Linear
6,134,Historical Ocean Sink,"<span style=""letter-spacing: 0.39996px;"">Sourc...",Time,"[{""x"":1959,""y"":3.57},{""x"":1960,""y"":3.51},{""x"":...",Gigatons CO2 / Year,Linear
7,135,Historical Land Sink,"Source: <a href=""https://www.icos-cp.eu/scienc...",Time,"[{""x"":1959,""y"":1.47},{""x"":1960,""y"":4.52},{""x"":...",Gigatons CO2 / Year,Linear


In [9]:
# Generate a pandas df containing all primitive data
model = pd.concat([stocks, flows, variables, converters])
model = model.reset_index() 
model = model.drop(columns=['index'])
model

Unnamed: 0,id,name,initialValue,unit,delay,nonNegative,note,flowRate,onlyPositive,startStock,endStock,equation,inputSource,data,interpolation
0,55,Earth Energy,[e0] * scaledJoules,Joules,10.0,False,,,,,,,,,
1,58,Atmospheric Energy,[e0] * scaledJoules,Joules,10.0,False,,,,,,,,,
2,66,Atmospheric Carbon Dioxide,[Historical CO2 PPM] * CO2MassToPPM,Tons CO2,10.0,False,,,,,,,,,
3,56,Solar Radiation,,Watts,,,"Solar radiation, does not interact with the at...",SolarFlux * pi * EarthRadius^2,True,,55.0,,,,
4,60,Radiation Down,,Watts,,,,[Atmosphere Radiation] / 2,True,58.0,55.0,,,,
5,61,Pass Through,,Watts,,,,[Atmospheric Energy] * [Emissivity] * scaledWatts,True,58.0,,,,,
6,59,Reflection,,Watts,,,Solar radiation reflect by the earth (or atmos...,[Solar Radiation] * [Earth Albedo],True,55.0,,,,,
7,89,Radiation,,Watts,,,,[Earth Energy] * scaledWatts,True,55.0,58.0,,,,
8,93,Radiation Up,,Watts,,,,[Atmosphere Radiation] / 2,True,58.0,,,,,
9,67,Pollution,,Tons CO2 / Year,,,,if years <= 2021 then [Historical Fossil Emis...,True,,66.0,,,,


In [10]:
for index, instance in model.iterrows():
    print(index)
    print(instance)
    # print(instance['name'])# access information using instance['COLUMN NAME']
    # print(flow['name'][index])
    break

0
id                                55
name                    Earth Energy
initialValue     [e0] * scaledJoules
unit                          Joules
delay                             10
nonNegative                    false
note                             NaN
flowRate                         NaN
onlyPositive                     NaN
startStock                       NaN
endStock                         NaN
equation                         NaN
inputSource                      NaN
data                             NaN
interpolation                    NaN
Name: 0, dtype: object


### Creating the triples

Before we can add the new information acquired through the use of the simulation package to our knowledge graph (the SDM ontology in this case), some preprocessing of the data in the previously generated .tsv files is required. <br> The following cells contains code need to create instances and link them to their corresponding properties, using the proper predicates to do so. 

In [11]:
# Format names according to RDF syntax (get rid of spaces, capitilisation, etc.)  
def name_processor(name):
    words = name.split() 
    if len(words) == 1:
        formatted_name = name.lower()
        formatted_name = re.sub('[^A-Za-z0-9]+', '', formatted_name)
    else:    
        formatted_name = words[0].lower()+''.join(words[1:])
        formatted_name = re.sub('[^A-Za-z0-9]+', '', formatted_name)
    return formatted_name
    

# Function that allows users to create instances for any external sources used in their converters
def create_datasource(url, graphname, converter):
    source_URL = extract_sourceURL(url) 
    url_name = source_URL.replace('https://www.', '').strip('/')
    url_name = re.sub('[^A-Za-z0-9]+', '', url_name)
    source_URI = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+f'{url_name}')

    # print(source_URL, url_name, source_URI)
    graphname.add((source_URI, RDF.type, SDM.DataSource))
    graphname.add((source_URI, SDM.hasSourceURL, Literal(source_URL)))
    graphname.add((converter, SDM.hasDataSource, source_URI))
    
# Function to extract sources from a string input based on the Insight Maker model syntax ALSO ADD THE URL NAME AS RETURN VALUE
def extract_sourceURL(source_string): 
    # Find all URLs within the source based on tags
    source_url = re.search(r'<a href="([^"]+)"', source_string).group(1)
    return source_url

# Check whether a string contains only a numerical value or not
def is_number(string):
    try:
        float(string)
        return True
    except Exception:
        return False
    
def is_constant(value, known_constants):
    pi = sympy.symbols('pi')
    try:
        expr = sympy.sympify(value, locals = {'pi': pi})
        symbols = expr.free_symbols
        
        if not symbols or all(str(sym) in known_constants for sym in symbols):
            return symbols, True
        else:
            return False 
    except Exception:
        return False 

In [12]:
# Variables that holds any pre-defined constants and the name of the model

known_constants = {'pi', 'e'}
IM_name = 'Climate Change Model'

In [13]:
func_df = pd.read_excel('Built-In Functions IM.xlsx')
func_df

Unnamed: 0,Individual,Class,IMname
0,seconds,Time,Seconds
1,minutes,Time,Minutes
2,hours,Time,Hours
3,days,Time,Days
4,weeks,Time,Weeks
...,...,...,...
124,sample,Vector,Sample
125,sort,Vector,Sort
126,union,Vector,Union
127,unique,Vector,Unique


#### Generating an empty graph to instantiate with the model data

In [14]:
g2 = Graph()

SDM = Namespace('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/')
RDF = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
RDFS = Namespace('http://www.w3.org/2000/01/rdf-schema#')
XSD = Namespace('http://www.w3.org/2001/XMLSchema#')

g2.bind('sdm', SDM)
g2.bind('rdf', RDF)
g2.bind('rdf', RDFS)

#### Generating the triples per primitive

In [15]:
model_instance = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+name_processor(IM_name)) 

for index, instance in insight.iterrows():
    if pd.isna(instance['timeUnit']):
        pass
    else:
        time_unit = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+name_processor(instance['timeUnit'])) 
        g2.add((time_unit, RDF.type, SDM.Unit))
        g2.add((model_instance, SDM.hasTimeUnit, time_unit))
        g2.add((model_instance, RDF.type, SDM.Model))
        g2.add((model_instance, SDM.usesAlgorithm, Literal(instance['solutionAlgorithm'])))
        g2.add((model_instance, SDM.hasTimeStart, Literal(instance['timeStart'], datatype=XSD.decimal)))
        g2.add((model_instance, SDM.hasTimeLength, Literal(instance['timeLength'], datatype=XSD.decimal)))
        g2.add((model_instance, SDM.hasTimeStep, Literal(instance['timeStep'], datatype=XSD.decimal)))
    
    for name in model['name']:
        primitive =  URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+name_processor(name))
        g2.add((model_instance, SDM.hasPrimitive, primitive))
        g2.add((primitive, SDM.primitiveOf, model_instance))
    
    
    # print(instance['macros'])
    if pd.isna(instance['macros']):
        pass
    elif instance['macros'][0] not in {'#', '//'}:
        macro = instance['macros'].split('<-')
        name = macro[0].strip()
        value = macro[1].strip()
        
        if name[-1] == ')':
            function = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+name.split('(')[0])
            g2.add((function, RDF.type, SDM.CustomFunction))
            #add value
            print('function', name.split('(')[0], value)
            new_row = {'Individual': name.split('(')[0], 'Class': 'Custom', 'IMname': name.split('(')[0]}
            new_row = pd.DataFrame([new_row])
            func_df = pd.concat([func_df, new_row], ignore_index=True)
        else:
            constant = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+name)
            g2.add((constant, RDF.type, SDM.Constant))
            #add value
            print('constant', name)
            known_constants.add(name)


print(known_constants)
print(func_df)

constant scaledWatts
constant scaledJoules
constant CO2MassToPPM
constant EarthRadius
constant SolarFlux
constant Boltzmann
function KelvinToCelsius (k - {273.15 Degrees Kelvin}) * {1 Degrees Celsius/Degrees Kelvin}
{'CO2MassToPPM', 'pi', 'Boltzmann', 'scaledWatts', 'EarthRadius', 'e', 'scaledJoules', 'SolarFlux'}
          Individual   Class           IMname
0            seconds    Time          Seconds
1            minutes    Time          Minutes
2              hours    Time            Hours
3               days    Time             Days
4              weeks    Time            Weeks
..               ...     ...              ...
125             sort  Vector             Sort
126            union  Vector            Union
127           unique  Vector           Unique
128           values  Vector           Values
129  KelvinToCelsius  Custom  KelvinToCelsius

[130 rows x 3 columns]


In [16]:
# Adding Stocks
for index, instance in stocks.iterrows():
    stock = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+name_processor(instance['name']))
    g2.add((stock, RDF.type, SDM.Stock))
    g2.add((stock, SDM.hasID, Literal(instance['id'], datatype=XSD.int)))
    if pd.isna(instance['note']):
        pass
    else:
        g2.add((stock, RDFS.comment, Literal(instance['note'])))
    
    if instance['unit'] != 'Unitless':
        unit = name_processor(instance['unit'].replace('/', 'Per'))
        unit = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+unit)
        g2.add((unit, RDF.type, SDM.Unit))
        g2.add((stock, SDM.hasUnit, unit))
        
    if instance['nonNegative'] == 'true':
        g2.add((stock, SDM.isNonNegative, Literal(True, datatype=XSD.boolean)))
    elif instance['nonNegative'] == 'false':
        g2.add((stock, SDM.isNonNegative, Literal(False, datatype=XSD.boolean)))
        
    # Create unique identifier for the initial value and create instances of the Expression class using these IDs
    iv_id = hashlib.shake_128(f"{instance['initialValue']}".encode('utf-8'))    
    iv_id = iv_id.hexdigest(5)
    initial_value = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+f'Expression_{iv_id}')
    print(iv_id, instance['initialValue'])
    
    g2.add((initial_value, RDF.type, SDM.Expression))
    g2.add((stock, SDM.hasInitialValue, initial_value))
    g2.add((initial_value, SDM.hasStrExpression, Literal(instance['initialValue'])))
    
    # Check if value of expression is a single-value constant
    if is_constant(instance['initialValue'], known_constants):
        symbols, boolean = is_constant(instance['initialValue'], known_constants)
        if not symbols:
            print(f'constant: {instance["initialValue"]}')
            constant = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+instance['initialValue'])
            g2.add((constant, RDF.type, SDM.Constant))
            g2.add((initial_value, SDM.hasSymbol, constant))
    
    # In all other cases, treat expression as more complex and obtain its symbolic terms (i.e. primitives, functions, and pre-defined constants)
    else:
        #Check for presence of primitives
        primitives = re.findall(r'\[(.*?)\]', instance['initialValue'])
        for primitive in primitives:
            primitive = name_processor(primitive)
            primitive = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+primitive)
            g2.add((initial_value, SDM.hasSymbol, primitive))
        
        # Check for presence of functions
        for index, func in func_df.iterrows():
            if func['IMname'] in instance['initialValue']:
                    print(instance['initialValue'], func['Individual'])
                    function = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+func['Individual'])
                    g2.add((initial_value, SDM.hasSymbol, function))
        
        # Check for presence of constants
        for constant in known_constants:
            if constant in instance['initialValue']:
                if constant in ['e', 'pi'] and not re.search(fr'\b{{constant}}\b', instance['initialValue']): 
                    pass
                else:
                    print(instance['initialValue'], constant)
                    constant = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+constant)
                    g2.add((initial_value, SDM.hasSymbol, constant))

c252ae1f3e [e0] * scaledJoules
[e0] * scaledJoules scaledJoules
c252ae1f3e [e0] * scaledJoules
[e0] * scaledJoules scaledJoules
b466971568 [Historical CO2 PPM] * CO2MassToPPM
[Historical CO2 PPM] * CO2MassToPPM CO2MassToPPM


In [17]:
# Adding Flows
for index, instance in flows.iterrows():
    flow = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+name_processor(instance['name'])) 
    g2.add((flow, RDF.type, SDM.Flow))
    g2.add((flow, SDM.hasID, Literal(instance['id'], datatype=XSD.int)))
    if pd.isna(instance['note']):
        pass
    else:
        g2.add((flow, RDFS.comment, Literal(instance['note'])))
    
    if instance['unit'] != 'Unitless':
        unit = name_processor(instance['unit'].replace('/', 'Per'))
        unit = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+unit)
        g2.add((unit, RDF.type, SDM.Unit))
        g2.add((flow, SDM.hasUnit, unit))
        
    if instance['onlyPositive'] == 'true':
        g2.add((flow, SDM.isOnlyPositive, Literal(True, datatype=XSD.boolean)))
    elif instance['onlyPositive'] == 'false':
        g2.add((flow, SDM.isOnlyPositive, Literal(False, datatype=XSD.boolean)))

    # Create unique identifier for the flowrate and create instances of the Expression class using these IDs
    fr_id = hashlib.shake_128(f"{instance['flowRate']}".encode('utf-8'))    
    fr_id = fr_id.hexdigest(5)
    flow_rate = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+f'Expression_{fr_id}')
    
    g2.add((flow_rate, RDF.type, SDM.Expression))
    g2.add((flow, SDM.hasFlowrate, flow_rate))
    g2.add((flow_rate, SDM.hasStrExpression, Literal(instance['flowRate'])))
    
    # Check if value of expression is a single-value constant
    if is_constant(instance['flowRate'], known_constants):
        symbols, boolean = is_constant(instance['flowRate'], known_constants)
        if not symbols:
            print(f'constant: {instance["flowRate"]}')
            constant = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+instance['flowRate'])
            g2.add((constant, RDF.type, SDM.Constant))
            g2.add((flow_rate, SDM.hasSymbol, constant))
    
    # In all other cases, treat expression as more complex and obtain its symbolic terms (i.e. primitives, functions, and pre-defined constants)
    else:
        #Check for presence of primitives
        primitives = re.findall(r'\[(.*?)\]', instance['flowRate'])
        for primitive in primitives:
            primitive = name_processor(primitive)
            primitive = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+primitive)
            g2.add((flow_rate, SDM.hasSymbol, primitive))
        
        # Check for presence of functions
        for index, func in func_df.iterrows():
            if func['IMname'] in instance['flowRate']:
                    print(instance['flowRate'], func['Individual'])
                    function = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+func['Individual'])
                    g2.add((flow_rate, SDM.hasSymbol, function))
        
        # Check for presence of constants
        for constant in known_constants:
            if constant in instance['flowRate']:
                if constant in ['e', 'pi'] and not re.search(fr'\b{{constant}}\b', instance['flowRate']): 
                    pass
                else:
                    print(instance['flowRate'], constant)
                    constant = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+constant)
                    g2.add((flow_rate, SDM.hasSymbol, constant))
    
    if pd.isna(instance['startStock']):
        pass
    else:
        startStock = model[model['id'] == instance['startStock']]['name'].iloc[0]
        startStock = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+name_processor(startStock))
        g2.add((flow, SDM.hasStart, startStock))
        print(startStock)
    
    if pd.isna(instance['endStock']):
        pass
    else:
        endStock = model[model['id'] == instance['endStock']]['name'].iloc[0]
        endStock = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+name_processor(endStock))
        g2.add((flow, SDM.hasEnd, endStock))
        print(endStock)

http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/earthEnergy
http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/atmosphericEnergy
http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/earthEnergy
[Atmospheric Energy] * [Emissivity] * scaledWatts scaledWatts
http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/atmosphericEnergy
http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/earthEnergy
[Earth Energy] * scaledWatts scaledWatts
http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/earthEnergy
http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/atmosphericEnergy
http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/atmosphericEnergy
if years <= 2021 then  [Historical Fossil Emissions]else  [Pollution Emissions]end if ifElse
http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/atmosphericCarbonDioxide
if years <= 2021 then  [Historical Land Sink]else  [Land Sink Rate]end if ifElse
if years <= 2021 then  [Historical Land Sink]else  [Land Sink Rate]end if sin
http:

In [18]:
# Adding Variables      
for index, instance in variables.iterrows():
    variable = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+name_processor(instance['name']))
    g2.add((variable, RDF.type, SDM.Variable))
    g2.add((variable, SDM.hasID, Literal(instance['id'], datatype=XSD.int)))
    if pd.isna(instance['note']):
        pass
    else:
        g2.add((variable, RDFS.comment, Literal(instance['note'])))
        
    
    if instance['unit'] != 'Unitless':
        unit = name_processor(instance['unit'].replace('/', 'Per'))
        unit = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+unit)
        g2.add((unit, RDF.type, SDM.Unit))
        g2.add((variable, SDM.hasUnit, unit))
    
    # Create unique identifier for the equations and create instances of the Expression class using these IDs
    eq_id = hashlib.shake_128(f"{instance['equation']}".encode('utf-8'))    
    eq_id = eq_id.hexdigest(5)
    equation = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+f'Expression_{eq_id}')
    
    g2.add((equation, RDF.type, SDM.Expression))
    g2.add((variable, SDM.hasExpression, equation))
    g2.add((equation, SDM.hasStrExpression, Literal(instance['equation'])))
    
    # Check if value of expression is a single-value constant
    if is_constant(instance['equation'], known_constants):
        symbols, boolean = is_constant(instance['equation'], known_constants)
        if not symbols:
            print(f'constant: {instance["equation"]}')
            constant = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+instance['equation'])
            g2.add((constant, RDF.type, SDM.Constant))
            g2.add((equation, SDM.hasSymbol, constant))
    
    # In all other cases, treat expression as more complex and obtain its symbolic terms (i.e. primitives, functions, and pre-defined constants)
    else:
        #Check for presence of primitives
        primitives = re.findall(r'\[(.*?)\]', instance['equation'])
        for primitive in primitives:
            primitive = name_processor(primitive)
            primitive = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+primitive)
            g2.add((equation, SDM.hasSymbol, primitive))
        
        # Check for presence of functions
        for index, func in func_df.iterrows():
            if func['IMname'] in instance['equation']:
                print(instance['equation'], func['Individual'])
                function = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+func['Individual'])
                g2.add((equation, SDM.hasSymbol, function))
        
        # Check for presence of constants
        for constant in known_constants:
            if constant in instance['equation']:
                if constant in ['e', 'pi'] and not re.search(fr'\b{{constant}}\b', instance['equation']): 
                    pass
                else:
                    print(instance['equation'], constant)
                    constant = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+constant)
                    g2.add((equation, SDM.hasSymbol, constant))

KelvinToCelsius([Absolute Temperature]) abs
KelvinToCelsius([Absolute Temperature]) KelvinToCelsius
([Radiation] / (Boltzmann * (EarthRadius^2 * pi * 4)))^ (1/4) Boltzmann
([Radiation] / (Boltzmann * (EarthRadius^2 * pi * 4)))^ (1/4) EarthRadius
[Atmospheric Carbon Dioxide] / CO2MassToPPM CO2MassToPPM
[Atmospheric Energy] * (1 - [Emissivity]) * scaledWatts scaledWatts
constant: .3
constant: 36.7
constant: 12.64
constant: 10.55
constant: 3.94
(SolarFlux * pi * EarthRadius^2) * (1 - [Earth Albedo]) / ((1+[Emissivity])/2)  * {1 Seconds} seconds
(SolarFlux * pi * EarthRadius^2) * (1 - [Earth Albedo]) / ((1+[Emissivity])/2)  * {1 Seconds} EarthRadius
(SolarFlux * pi * EarthRadius^2) * (1 - [Earth Albedo]) / ((1+[Emissivity])/2)  * {1 Seconds} SolarFlux


In [19]:
# Adding Converters      
for index, instance in converters.iterrows():
    converter = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+name_processor(instance['name']))
    g2.add((converter, RDF.type, SDM.Converter))
    g2.add((converter, SDM.hasData, Literal(instance['data'].replace('"', '').replace('\\', ''))))
    g2.add((converter, SDM.hasID, Literal(instance['id'], datatype=XSD.int)))
    
    if pd.isna(instance['note']):
        pass
    else:
        try:
            if extract_sourceURL(instance['note']):
                create_datasource(instance['note'], g2, converter)
        except Exception:
            g2.add((variable, RDFS.comment, Literal(instance['note'])))
    
    if instance['unit'] != 'Unitless':
        unit = name_processor(instance['unit'].replace('/', 'Per'))
        unit = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+unit)
        g2.add((unit, RDF.type, SDM.Unit))
        g2.add((converter, SDM.hasUnit, unit))
    
    if instance['inputSource'] == 'Time':
        g2.add((converter, SDM.hasInputSource, SDM.currentTime))
    else:
        input_source = model[model['id'] == instance['inputSource']]['name'].iloc[0]
        print(instance['inputSource'], input_source)
        input_source = URIRef('http://www.semanticweb.org/loafo/ontologies/2024/3/sdm/'+name_processor(input_source))
        g2.add((converter, SDM.hasInputSource, input_source))
        #fix the lack of notes!!!

96 CO2 PPM


In [20]:
print(len(g2))
# serialize_graph(g2)

349


In [21]:
g3 = g+g2
print(f'Nr of triples in the SDM Ontology: {len(g)}\nNr of added triples: {len(g2)}\nTotal nr of triples: {len(g3)}')

save_graph(g3, 'Climate Change/ccSDM.ttl')

Nr of triples in the SDM Ontology: 393
Nr of added triples: 349
Total nr of triples: 742


## Creating a representation in RDF for external tabular data 

This notebook can be used to open the converted rdf file obtained after using CoW on an external dataset. The following functions and code allow for the RDF file to be saved as ttl and merged with the SDM ontology

In [22]:
# Loads the data from a certain file given as input in Turtle syntax into the Graph g  
# -------------------------
def load_graph2(filename, graph):
    with open(filename, 'r') as f:
        graph.parse(f, format='xml')
    

In [None]:
extSource1 = Graph()
load_graph2('Climate Change/Global_Carbon_Budget_2022v1.0.rdf', extSource1)

print(len(extSource1))
# save_graph(extSource1, 'Climate Change/icosSource.ttl')
# serialize_graph(cc)

In [None]:
extSource2 = Graph()
load_graph2('Climate Change/ChrisBuddMathsOfClimateChangeTpdf.rdf', extSource2)

print(len(extSource2))
# save_graph(extSource2, 'Climate Change/greshamSource.ttl')
# serialize_graph(cc)

In [None]:
extSource3 = Graph()
load_graph2('Climate Change/statistic_id1048518_global-land-temperature-anomalies-1880-2023.rdf', extSource3)

print(len(extSource3))
# save_graph(extSource3, 'Climate Change/statistaLandSource.ttl')
# serialize_graph(cc)

In [None]:
extSource4 = Graph()
load_graph2('Climate Change/statistic_id1091926_global-atmospheric-carbon-dioxide-concentration-1959-2023.rdf', extSource4)

print(len(extSource4))
# save_graph(extSource4, 'Climate Change/statistaAtmosSource.ttl')
# serialize_graph(cc)

In [None]:
extData = extSource1+extSource2+extSource3+extSource4
print(f'Nr of triples in the SDM Ontology: {len(g)}\nNr of added triples from external data: {len(extData)}\nCompared to {len(g3)} with only the converter data available in IM')

# save_graph(extData, 'Climate Change/externalDataGraph.ttl')