In [None]:
from maplib import Mapping
import polars as pl
from math import floor
pl.config = pl.Config(fmt_str_lengths=150)

In [None]:
rds = "https://github.com/DataTreehouse/maplib_workshop/rds_power#"

This notebook creates a combination of the functional aspect of three Solar PV plants according to the Reference Designation System as proposed in a draft by Statkraft, and a publicly available dataset from NIST ( https://pvdata.nist.gov/ ). It is intended for experimental purposes only. In the full version of the demo, it is possible to use this model as context from time-series data from the NIST dataset, and to jointly query context and time-series using [chrontext](https://github.com/DataTreehouse/chrontext).

Image from https://pvdata.nist.gov/ 
![Alt text](ground.jpg "Image from https://pvdata.nist.gov/ showing a solar PV installation")

### These are the templates we will use in the mapping:

In [None]:
mapping_doc = """
@prefix tpl:<https://github.com/DataTreehouse/maplib_workshop/templates#>.
@prefix rds:<https://github.com/DataTreehouse/maplib_workshop/rds_power#>.
@prefix ct:<https://github.com/DataTreehouse/chrontext#>.
@prefix rdfs:<http://www.w3.org/2000/01/rdf-schema#>.

tpl:Types[] :: {
    ottr:Triple(rds:FunctionalSystem, rdfs:label, "Functional System") ,
    ottr:Triple(rds:TechnicalSystem, rdfs:label, "Technical System") ,
    ottr:Triple(rds:ComponentSystem, rdfs:label, "Component System") ,
    ottr:Triple(rds:Site, rdfs:label, "Site") ,
} .

tpl:RDSFunctionalSystemType[?URI, ?Name, ??Description, ?Code] :: {
    tpl:RDSSystemType(?URI, ?Name, ?Description, ?Code, rds:FunctionalSystem)
} .
tpl:RDSTechnicalSystemType[?URI, ?Name, ??Description, ?Code] :: {
    tpl:RDSSystemType(?URI, ?Name, ?Description, ?Code, rds:TechnicalSystem)
} .
tpl:RDSComponentSystemType[?URI, ?Name, ??Description, ?Code] :: {
    tpl:RDSSystemType(?URI, ?Name, ?Description, ?Code, rds:ComponentSystem)
} .

tpl:RDSSystemType[?URI, ?Name, ??Description, ?Code, ?Level] :: {
    ottr:Triple(?URI, rdfs:label, ?Name),
    ottr:Triple(?URI, rdfs:comment, ?Description),
    ottr:Triple(?URI, rds:code, ?Code),
    ottr:Triple(?URI, rdfs:subClassOf, ?Level)
} .

tpl:Site [?SiteURI, ?SiteName, ?Code] :: {
    ottr:Triple(?SiteURI, rdfs:label, ?SiteName),
    ottr:Triple(?SiteURI, a, rds:Site),
    ottr:Triple(?SiteURI, rds:code, ?Code)
    } .

tpl:FunctionalAspect [?SourceURI, xsd:anyURI ?TargetURI,] :: {
    ottr:Triple(?SourceURI, rds:functionalAspect, ?TargetURI)
} .

tpl:ProductAspect [?SourceURI, xsd:anyURI ?TargetURI,] :: {
    ottr:Triple(?SourceURI, rds:productAspect, ?TargetURI)
} .

tpl:RDSSystem [?SystemURI, xsd:anyURI ?RDSType, ?Code, ?Label, ??Description] :: {
    ottr:Triple(?SystemURI, a, ?RDSType),
    ottr:Triple(?SystemURI, rds:code, ?Code),
    ottr:Triple(?SystemURI, rdfs:label, ?Label),
    ottr:Triple(?SystemURI, rdfs:comment, ?Description)
} .

tpl:StaticProperty [?ParentURI, ?ValueNodeURI, ?Label, ?Value] :: {
    ottr:Triple(?ParentURI, ct:hasStaticProperty, ?ValueNodeURI),
    ottr:Triple(?ValueNodeURI, rdfs:label, ?Label),
    ottr:Triple(?ValueNodeURI, ct:hasStaticValue, ?Value)
} .

tpl:SolarTimeseries [?ParentURI, ?TimeseriesURI, ?Name, ?ExternalId, ?Datatype, ?Resource, ?Description, ?Unit, ??Aggregation] :: {
    tpl:Timeseries(?ParentURI, ?TimeseriesURI, ?Name, ?ExternalId, ?Datatype, ?Resource),
    ottr:Triple(?TimeseriesURI, rds:unit, ?Unit),
    ottr:Triple(?TimeseriesURI, rdfs:comment, ?Description),
    ottr:Triple(?TimeseriesURI, rds:aggregation, ?Aggregation),
} .

tpl:Timeseries [?ParentURI, ?TimeseriesURI, ?Label, ?ExternalId, xsd:anyURI ?Datatype, ?Resource] :: {
    ottr:Triple(?ParentURI, ct:hasTimeseries, ?TimeseriesURI),
    ottr:Triple(?TimeseriesURI, ct:hasExternalId, ?ExternalId),
    ottr:Triple(?TimeseriesURI, ct:hasDatatype, ?Datatype),
    ottr:Triple(?TimeseriesURI, rdfs:label, ?Label),
    ottr:Triple(?TimeseriesURI, ct:hasResource, ?Resource)
} .
"""

### We instantiate a mapping object with our templates, and instantiate the first template, which does not need any arguments

In [None]:
m = Mapping([mapping_doc])
m.expand("tpl:Types")

The following image displays the Site breakdown according to the functional aspect:

- Site
    - Block 1 (e.g. A1)
        - Generator 1 (e.g. RG1)
                - Inverter 1 (e.g. TBB1)
                - String 1 (e.g. RG1)
                - String 2 (e.g. RG2)

The image cannot be included in this public version of the notebook. 

![Alt text](function_aspect.png "a title")

### We load some functional system types from a CSV and process them slightly

In [None]:
functional_systems = pl.scan_csv("functional_systems.csv")
functional_systems = functional_systems.with_columns([
    (rds + pl.col("Code")).alias("URI"),
    pl.col("Description").str.replace_all("\n", " ", literal=True).str.replace_all("\"", "", literal=True)
]).collect()
functional_systems

### Add the functional system types to the graph

In [None]:
m.expand("tpl:RDSFunctionalSystemType", functional_systems, ["URI"])

### Just checking that the functional system types are in the graph

In [None]:
fs_qres = m.query("""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
PREFIX rds: <https://github.com/DataTreehouse/maplib_workshop/rds_power#> 
SELECT ?system ?code ?name WHERE {
    ?system rdfs:subClassOf rds:FunctionalSystem .
    ?system rds:code ?code .
    ?system rdfs:label ?name .
}
""")
fs_qres

### We could add an assert here

In [None]:
assert set(fs_qres["code"]) == {"A", "F", "B"}

### We load some technical system types from a CSV and process them slightly

In [None]:
technical_systems = pl.scan_csv("technical_systems.csv")
technical_systems = technical_systems.with_columns([
    (rds + pl.col("Code")).alias("URI"),
    pl.col("Description").str.replace_all("\n", " ", literal=True).str.replace_all("\"", "", literal=True)
]).collect()
technical_systems.head(3)

### Now we can add the technical system types to the graph aswell

In [None]:
m.expand("tpl:RDSTechnicalSystemType", technical_systems, ["URI"])

In [None]:
### We load some component system types from a CSV and process them slightly

In [None]:
component_systems = pl.scan_csv("component_systems.csv")
component_systems = component_systems.with_columns([
    (rds + pl.col("Code")).alias("URI"),
    pl.col("Description").str.replace_all("\n", " ", literal=True).str.replace_all("\"", "", literal=True)
]).collect()
component_systems.head(3)

### Finally, we can add the component system types to the graph

In [None]:
m.expand("tpl:RDSComponentSystemType", component_systems, ["URI"])

### Now we load several tables with the different sites and systems, and how they are connected. Ordinarily these come from a source system such as the maintenance system or a SCADA system.

In [None]:
sites = pl.scan_parquet("solar/sites.parquet").collect()
blocks = pl.scan_parquet("solar/blocks.parquet").collect()
site_has_block = pl.scan_parquet("solar/site_has_block.parquet").collect()
gens = pl.scan_parquet("solar/generators.parquet").collect()
block_has_gen = pl.scan_parquet("solar/block_has_gen.parquet").collect()
invs = pl.scan_parquet("solar/inverters.parquet").collect()
gen_has_inv = pl.scan_parquet("solar/gen_has_inv.parquet").collect()
strings = pl.scan_parquet("solar/strings.parquet").collect()
gen_has_string = pl.scan_parquet("solar/gen_has_string.parquet").collect()
inv_timeseries = pl.scan_parquet("solar/inverter_timeseries.parquet").collect()
weather_timeseries = pl.scan_parquet("solar/weather_timeseries.parquet").collect()

### Just a peek inside the sites table, to see that these are entirely made up

In [None]:
sites

### Add sites, blocks, and relationship between sites and blocks to the graph

In [None]:
m.expand("tpl:Site", sites, ["SiteURI"])
m.expand("tpl:RDSSystem", blocks, ["SystemURI"])
m.expand("tpl:FunctionalAspect", site_has_block)

### Now we can check that the sites have blocks, and that these are of the appropriate type 

In [None]:
block_qres = m.query("""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
PREFIX rds: <https://github.com/DataTreehouse/maplib_workshop/rds_power#> 
SELECT ?site ?site_name ?block ?block_name
WHERE {
    ?site a rds:Site .
    ?site rdfs:label ?site_name .
    ?site rds:functionalAspect ?block .
    ?block a rds:A .
    ?block rdfs:label ?block_name .
} 
ORDER BY ?site_name ?block_name
""")
block_qres

### Next, we add generators, inverters, strings and their relationships.

In [None]:
m.expand("tpl:RDSSystem", gens, ["SystemURI"])
m.expand("tpl:FunctionalAspect", block_has_gen)
m.expand("tpl:RDSSystem", invs, ["SystemURI"])
m.expand("tpl:FunctionalAspect", gen_has_inv)
m.expand("tpl:RDSSystem", strings, ["SystemURI"])
m.expand("tpl:FunctionalAspect", gen_has_string)

### We perform a similar check as above, but this time we just count the strings for each site

In [None]:
string_qres = m.query("""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
PREFIX rds: <https://github.com/DataTreehouse/maplib_workshop/rds_power#> 
SELECT ?sitename (count(?string) as ?string_count)
WHERE {
    ?site a rds:Site.
    ?site rdfs:label ?sitename .
    ?site rds:functionalAspect ?block .
    ?block a rds:A .
    ?block rds:functionalAspect ?gen .
    ?gen a rds:RG .
    ?gen rds:functionalAspect ?string .
    ?string a rds:RG
} GROUP BY ?sitename
""")
string_qres

In [None]:
assert (string_qres["string_count"] == 350).all()

### Finally, we attach the identifiers of some time series to the graph, which will lead to the next part of the demo

In [None]:
inv_timeseries.head(3)

In [None]:
m.expand("tpl:SolarTimeseries", inv_timeseries, ["TimeseriesURI"])

In [None]:
m.expand("tpl:SolarTimeseries", weather_timeseries, ["TimeseriesURI"])

### We write the resulting set of triples

In [None]:
m.write_ntriples("solar_model.nt")