In [25]:
from rdflib import Graph,URIRef,Namespace,BNode,Literal
from rdflib.namespace import XSD,RDF
import pandas as pd
import argparse
from collections import defaultdict
from datetime import datetime
import json
import os
import calendar


In [7]:
def load_graph(input_path):
    g = Graph()
    g.parse(input_path, format="turtle", publicID="https://example.com/")
    return g
    

In [None]:
def unique_year_query(graph):
    query = """
    PREFIX ns1: <https://example.com/>
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
    PREFIX sosa: <http://www.w3.org/ns/sosa/>

SELECT DISTINCT (YEAR(?resultTime) AS ?year)
WHERE {
  ?obs a sosa:Observation ;
       sosa:resultTime ?resultTime .
}
ORDER BY ?year
    """
    result = graph.query(query)
    return result

In [18]:
input_path = "Mol_Sluis_Dessel_data_prettified.ttl"
original_graph = load_graph(input_path)
result = unique_year_query(original_graph)
years_set = set()
for row in result:
    year_int = int(row[0].toPython())  # Convert the Literal to a Python string
    #print(f"Years with data: {row[0]}")
    years_set.add(year_int)

    

In [19]:
print(years_set)

{2020, 2021, 2022, 2023, 2024, 2025}


In [29]:
base_path = "./data"  # e.g., "./data"
os.makedirs(base_path, exist_ok=True)

for year in sorted(years_set):
    year_path = os.path.join(base_path, str(year))
    os.makedirs(year_path, exist_ok=True)  # exist_ok=True avoids errors if folder exists
    #print(f"Created folder: {year_path}")

    for month in range(1, 13):
        month_path = os.path.join(year_path, str(month))
        os.makedirs(month_path, exist_ok=True)
        #print(f"Created folder: {month_path}")

        # Get the number of days in this month/year
        num_days = calendar.monthrange(int(year), month)[1]

        # Create day folders
        for day in range(1, num_days + 1):
            day_str = f"{day:02d}"  # Pad day with zero: 01, 02, ..., 31
            day_path = os.path.join(month_path, day_str)
            os.makedirs(day_path, exist_ok=True)

In [44]:
readings_query = """
    PREFIX ns1: <http://example.com/>
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
    PREFIX sosa: <http://www.w3.org/ns/sosa/>

    SELECT ?obs ?id ?result ?property ?time
WHERE {
    ?obs a sosa:Observation ;
         ns1:id ?id ;
         sosa:hasSimpleResult ?result ;
         sosa:observedProperty ?property ;
         sosa:resultTime ?time .
}
ORDER BY ?time

"""

result = original_graph.query(readings_query)


In [43]:
for row in result:
    print(row)

In [49]:
SOSA = Namespace("http://www.w3.org/ns/sosa/")
example = Namespace("http://example.com/")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")

base_path = "./data"  # e.g., "./data"
os.makedirs(base_path, exist_ok=True)
for row in result:
    obs = row['obs']
    id_ = row['id']
    result_value = row['result']
    property_ = row['property']
    time_ = row['time']

    # Extract year, month, day from time_
    dt = datetime.fromisoformat(str(time_.toPython()))
    year_str = str(dt.year)
    month_str = f"{dt.month:02d}"
    day_str = f"{dt.day:02d}"
    timestamp_str = dt.strftime("%Y%m%dT%H%M%S")
    # Construct the file path
    #file_path = os.path.join(base_path, year_str, month_str, day_str, f"{id_}.nt")
    file_path = os.path.join(base_path, year_str, month_str, day_str, "readings.nt")
    os.makedirs(os.path.dirname(file_path), exist_ok=True)

    temp_graph = Graph()
    temp_graph.bind("sosa", SOSA)
    temp_graph.bind("ex", example)
    temp_graph.bind("xsd", xsd)
    # Add the observation type
    temp_graph.add((obs, RDF.type, SOSA.Observation))

    # Add observation properties
    temp_graph.add((obs, example.id, Literal(id_, datatype=xsd.int)))
    temp_graph.add((obs, SOSA.hasSimpleResult, Literal(result_value, datatype=xsd.float)))
    temp_graph.add((obs, SOSA.observedProperty, Literal(property_)))
    temp_graph.add((obs, SOSA.resultTime, Literal(time_.toPython(), datatype=xsd.dateTime)))

    

    ttl_str = temp_graph.serialize(format='nt')
    with open(file_path, "a", encoding="utf-8") as f:
        f.write(ttl_str)
    # Write to TTL file
    #temp_graph.serialize(destination=file_path, format='turtle')

    # # Create JSON content
    # json_content = {
    #     "observation": str(obs),
    #     "id": str(id_),
    #     "result": str(result_value),
    #     "property": str(property_),
    #     "time": str(time_.toPython())
    # }

    # # Write to TTL file
    # with open(file_path, 'w') as ttl_file:
    #     json.dump(json_content, ttl_file, indent=4)

    #print(f"Saved observation {id_} to {file_path}")