In [1]:
#workaround for running jupyter from wsl
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../")))

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
uri = "http://ramp.uni-mannheim.de"
graph_endpoint = os.getenv("GRAPH_ENDPOINT")
username = os.getenv("GRAPH_USERNAME")
password = os.getenv("GRAPH_PASSWORD")

In [4]:
# Initialize an RDFLib Graph
from rdflib import Graph, Namespace

RAMP = Namespace("http://ramp.uni-mannheim.de/")

graph = Graph()

graph.bind("ramp", RAMP)

print(os.getcwd())

# Load ontology from the specified file
graph.parse("./ontology/ontology.ttl", format="turtle")

/Users/yannikhahn/Code/module-catalog-extractor/sparql


<Graph identifier=Nb685773527564f7fa59481b0b82a5ca6 (<class 'rdflib.graph.Graph'>)>

In [5]:
from scripts import parse_module_catalog, parse_overview, format_module_name
import glob
import os

output_dir = "../output/best"
overview_files = glob.glob(os.path.join(output_dir, "*/catalog_overview.json"))

study_program_data = []

for overview_file in overview_files:
    overview = parse_overview(overview_file)
    dir_path = os.path.dirname(overview_file)
    merged_file = os.path.join(dir_path, "merged_modules.json")
    
    if os.path.exists(merged_file):
        merged = parse_module_catalog(merged_file)
        study_area_modules_pair = {
            'study_program': overview,
            'modules': merged.modules
        }
        study_program_data.append(study_area_modules_pair)

# study_program_data now contains all the study area and modules pairs

In [6]:
from rdflib import URIRef, Literal, Namespace
from rdflib.namespace import RDF, RDFS
import requests
import hashlib

from scripts import format_degree_name

m = hashlib.md5()

XSD = Namespace("http://www.w3.org/2001/XMLSchema#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")

for study_program in study_program_data:
    overview = study_program['study_program']
    
    m.update(overview.name.lower().encode())
    study_program_uri = URIRef(f"{uri}/study_program/{str(int(m.hexdigest(), 16))[0:12]}")
    graph.add((URIRef(study_program_uri), RDF.type, RAMP.StudyProgram))
    graph.add((URIRef(study_program_uri), RDFS.label, Literal(overview.name)))
    for degree in overview.hasDegree:
        degree_url = URIRef(f"{uri}/degree/{Literal(degree.value.upper())}")
        graph.add((study_program_uri, RAMP.hasDegree, degree_url))
        graph.add((degree_url, RDF.type, RAMP.Degree))
        graph.add((degree_url, RDFS.label, Literal(format_degree_name(degree.value))))
    # Iterate through each study area and module to create triples
    for study_area in overview.studyArea:
        m.update(study_area.name.lower().encode())
        # Create the URI for the study area
        study_area_uri = URIRef(f"{uri}/study_area/{str(int(m.hexdigest(), 16))[0:12]}")
        graph.add((study_area_uri, RDF.type, RAMP.StudyArea))
        graph.add((study_area_uri, RAMP.isStudyAreaOf, study_program_uri))
        graph.add((study_area_uri, RDFS.label, Literal(study_area.name)))
        for ects in study_area.requiredEcts:
            graph.add((study_area_uri, RAMP.ects, Literal(ects, datatype=XSD.integer)))
        
        for module in study_area.modules:
            if module:
                # Create the URI for the module
                module_uri = URIRef(f"{uri}/module/{module.id.upper()}")

                # Add RDF type triple
                graph.add((module_uri, RDF.type, RAMP.Module))

                # Add relationship between study area and module
                graph.add((study_area_uri, RAMP.hasModule, module_uri))
                graph.add((study_program_uri, RAMP.hasModule, module_uri))
                #graph.add((module_uri, RAMP.isPartOfStudyArea, study_area_uri))
                
                # Add literal properties for name and id
                graph.add((module_uri, RDFS.label, Literal(format_module_name(module.name))))
                graph.add((module_uri, RAMP.id, Literal(module.id)))

rdf_data = graph.serialize(format="nt")


In [7]:
from rdflib import BNode

for study_program in study_program_data:
    modules = study_program['modules']
    
    for module in modules:
        module_uri = URIRef(f"{uri}/module/{module.id.upper()}")

        if module.name is not None:
            graph.add((module_uri, RDFS.label, Literal(format_module_name(module.name))))
        if module.ects is not None:
            graph.add((module_uri, RAMP.ects, Literal(module.ects, datatype=XSD.int)))
        if module.workloadInPerson is not None:
            graph.add((module_uri, RAMP.workloadInPerson, Literal(module.workloadInPerson, datatype=XSD.int)))
        if module.workloadSelfStudy is not None:
            graph.add((module_uri, RAMP.workloadSelfStudy, Literal(module.workloadSelfStudy, datatype=XSD.int)))
        if module.assessmentForms:
            for i, form in enumerate(module.assessmentForms):
                blank_node = BNode()
                graph.add((module_uri, RAMP.hasAssesment, blank_node))
                graph.add((blank_node, RAMP.hasAssessmentForm, Literal(form.value)))
                if module.examinationDistribution:
                    if(len(module.examinationDistribution.split("/")) > i):
                        dist = module.examinationDistribution.split("/")[i] 
                        if dist.isdigit():
                            graph.add((blank_node, RAMP.examinationDistribution, Literal(dist, datatype=XSD.decimal)))

                if module.examinationDuration is not None:
                        graph.add((blank_node, RAMP.examinationDuration, Literal(module.examinationDuration, datatype=XSD.int)))
        if module.additionalPrerequisite:
            graph.add((module_uri, RAMP.hasAdditionalPrerequisite, Literal(module.additionalPrerequisite)))
        # Add lists of literals
        for semester in module.offeredIn:
            graph.add((module_uri, RAMP.offeredIn, Literal(semester.value)))
        for recommendedSemester in module.recommendedSemester:
            graph.add((module_uri, RAMP.recommendedSemester, Literal(recommendedSemester, datatype=XSD.int)))
        for literature in module.recommendedLiterature:
            graph.add((module_uri, RAMP.recommendedLiterature, Literal(literature)))
        for requiredPrerequisite in module.requiredPrerequisiteModules:
            if len(requiredPrerequisite.split("_")) == 2:
                graph.add((module_uri, RAMP.hasMandatoryPrerequisite, URIRef(f"{uri}/module/{requiredPrerequisite.upper()}")))
            else:
                graph.add((module_uri, RAMP.hasAdditionalPrerequisite, Literal(requiredPrerequisite)))
        for optionalPrerequisite in module.optionalPrerequisiteModules:
            if len(optionalPrerequisite.split("_")) == 2:
                graph.add((module_uri, RAMP.hasOptionalPrerequisite, URIRef(f"{uri}/module/{optionalPrerequisite.upper()}")))
            else:
                graph.add((module_uri, RAMP.hasAdditionalPrerequisite, Literal(optionalPrerequisite)))
        for further in module.furtherModules:
            graph.add((module_uri, RAMP.hasFurtherModue, Literal(further)))

        # Add lecturer and person in charge as URI references
        for lecturer in module.hasLecturer:
            if lecturer.hasName:
                lecturer_uri = URIRef(f"{uri}/person/{lecturer.name.upper().replace(' ', '_')}")
                graph.add((module_uri, RAMP.hasLecturer, lecturer_uri))
                graph.add((lecturer_uri, RDF.type, FOAF.Person))
        for person_in_charge in module.hasPersonInCharge:
            if person_in_charge.hasName:
                person_in_charge_uri = URIRef(f"{uri}/person/{person_in_charge.name.upper().replace(' ', '_')}")
                graph.add((module_uri, RAMP.hasPersonInCharge, person_in_charge_uri))
                graph.add((person_in_charge_uri, RDF.type, FOAF.Person))
    


In [8]:
# Serialize and print the graph in Turtle format (optional for debugging)
print(graph.serialize(format="turtle"))

# store ontology in file
graph.serialize(destination="../ontology/ontology-full.ttl", format="turtle")

rdf_data = graph.serialize(format="turtle")

@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix ramp: <http://ramp.uni-mannheim.de/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://ramp.uni-mannheim.de> a owl:Ontology .

ramp:ects a owl:DatatypeProperty ;
    rdfs:label "ECTS" ;
    rdfs:domain [ a owl:Class ;
            owl:unionOf ( ramp:Module ramp:StudyArea ) ] ;
    rdfs:range xsd:decimal .

ramp:hasAdditionalPrerequisite a owl:DatatypeProperty ;
    rdfs:domain ramp:Module .

ramp:hasAssessment a owl:ObjectProperty ;
    rdfs:domain ramp:Module ;
    rdfs:range ramp:Assessment .

ramp:hasDegree a owl:ObjectProperty ;
    rdfs:domain ramp:StudyProgram ;
    rdfs:range ramp:Degree .

ramp:hasLecturer a owl:ObjectProperty ;
    rdfs:domain ramp:Module ;
    rdfs:range foaf:Person .

ramp:hasMandatoryPrerequisite a owl:ObjectProperty ;
    rdfs:subP

In [9]:


from owlready2 import get_ontology, sync_reasoner
import io
rdf_data_xml = graph.serialize(format="xml")
rdf_file = io.BytesIO(rdf_data_xml.encode("utf-8"))

# Load serialized RDF/XML data as an ontology in Owlready2
# (using a temporary URI since the data is in-memory)
onto = get_ontology("http://ramp.uni-mannheim.de").load(fileobj=rdf_file)
with onto:
    # Run Owlready2's reasoner
    sync_reasoner(infer_property_values = True)

    onto.world.save(file="../ontology/ontology-reasoned.owl")



* Owlready2 * Running HermiT...
    java -Xmx2000M -cp /Users/yannikhahn/Library/Caches/pypoetry/virtualenvs/module-catalog-extractor-49STR4rT-py3.12/lib/python3.12/site-packages/owlready2/hermit:/Users/yannikhahn/Library/Caches/pypoetry/virtualenvs/module-catalog-extractor-49STR4rT-py3.12/lib/python3.12/site-packages/owlready2/hermit/HermiT.jar org.semanticweb.HermiT.cli.CommandLine -c -O -D -I file:////var/folders/zx/63q9k_gd4nb5sbdwq12wb_km0000gn/T/tmp_lukgxiy -Y
* Owlready2 * HermiT took 1.094376802444458 seconds
* Owlready * Reparenting ramp.uni-mannheim.de.hasMandatoryPrerequisite: {owl.ObjectProperty, ramp.uni-mannheim.de.hasPrerequisite} => {ramp.uni-mannheim.de.hasPrerequisite}
* Owlready * Reparenting ramp.uni-mannheim.de.hasRecommnededPrerequisite: {owl.ObjectProperty, ramp.uni-mannheim.de.hasPrerequisite} => {ramp.uni-mannheim.de.hasPrerequisite}
* Owlready * Reparenting ramp.uni-mannheim.de.isMandatoryPrerequisiteOf: {owl.ObjectProperty, ramp.uni-mannheim.de.isPrerequisite

In [10]:
reasoned_graph = onto.world.as_rdflib_graph()
reasoned_graph.bind("ramp", RAMP)


reasoned_graph.serialize(destination="../ontology/ontology-reasoned.ttl", format="turtle")
reasoned_turtle = reasoned_graph.serialize(format="turtle")

In [11]:
from blazegraph_client import create_blazegraph_namespace, delete_blazegraph_namespace

#reset graph

delete_blazegraph_namespace("ramp")

create_blazegraph_namespace("ramp")

Namespace 'ramp' deleted successfully.
Namespace 'ramp' created successfully.


True

In [12]:
from requests.auth import HTTPBasicAuth
# Send the update request to the SPARQL endpoint
# Prepare SPARQL INSERT statement
sparql_update = f"""
INSERT DATA {{
    {reasoned_turtle}
}}
"""

# Send the request with Basic Authentication
headers = {
    "Content-Type": "application/sparql-update"
}

response = requests.post(
    url=graph_endpoint,
    data=sparql_update,
    headers=headers,
    auth=HTTPBasicAuth(username, password)
)

if response.status_code == 200:
        print("Data successfully inserted.")
else:
    print("Failed to insert data:", response.status_code, response.text)

Data successfully inserted.
