<a href="https://colab.research.google.com/github/BomiaoZh/bachelor/blob/main/bachelor1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install SPARQLWrapper
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd

query_skos = """
SELECT (COUNT(DISTINCT ?s) AS ?count) WHERE {
  ?s a <http://www.w3.org/2004/02/skos/core#Concept> .
}
"""

query_owl = """
SELECT (COUNT(DISTINCT ?s) AS ?count) WHERE {
  ?s a <http://www.w3.org/2002/07/owl#Class> .
}
"""

query_dct = """
SELECT (COUNT(?s) AS ?count) WHERE {
  ?s <http://purl.org/dc/terms/title> ?title .
}
"""

def run_queries(endpoint_url, repo_name):
    result = {"Repository": repo_name}
    sparql = SPARQLWrapper(endpoint_url)
    sparql.setReturnFormat(JSON)

    try:
        sparql.setQuery(query_skos)
        res = sparql.query().convert()
        result["skos:Concept"] = int(res["results"]["bindings"][0]["count"]["value"]) > 0
    except:
        result["skos:Concept"] = False

    try:
        sparql.setQuery(query_owl)
        res = sparql.query().convert()
        result["owl:Class"] = int(res["results"]["bindings"][0]["count"]["value"]) > 0
    except:
        result["owl:Class"] = False

    try:
        sparql.setQuery(query_dct)
        res = sparql.query().convert()
        result["dcterms:title"] = int(res["results"]["bindings"][0]["count"]["value"]) > 0
    except:
        result["dcterms:title"] = False

    return result

lov_results = run_queries("https://lov.linkeddata.es/dataset/lov/sparql", "LOV")
gesis_results = run_queries("https://data.gesis.org/gesiskg/sparql", "GESIS")

manual = [
    {"Repository": "BioPortal", "skos:Concept": True, "owl:Class": True, "dcterms:title": False},
    {"Repository": "OLS",       "skos:Concept": True, "owl:Class": True, "dcterms:title": True},
    {"Repository": "BARTOC",    "skos:Concept": True, "owl:Class": True, "dcterms:title": True},
    {"Repository": "CLARIAH",   "skos:Concept": True, "owl:Class": True, "dcterms:title": True}
]

result = pd.DataFrame([lov_results, gesis_results] + manual)

print("\n🧪 Interoperability – Use of SKOS / OWL / DCTERMS\n")
print(result)



🧪 Interoperability – Use of SKOS / OWL / DCTERMS

  Repository  skos:Concept  owl:Class  dcterms:title
0        LOV          True       True           True
1      GESIS         False       True          False
2  BioPortal          True       True          False
3        OLS          True       True           True
4     BARTOC          True       True           True
5    CLARIAH          True       True           True


In [None]:
!pip install SPARQLWrapper

from SPARQLWrapper import SPARQLWrapper, JSON

endpoints = {
    "LOV": "https://lov.linkeddata.es/dataset/lov/sparql",
    "GESIS": "https://data.gesis.org/gesiskg/sparql"
}

query = """
SELECT DISTINCT (lang(?label) AS ?lang) WHERE {
  {
    ?s <http://www.w3.org/2000/01/rdf-schema#label> ?label .
  } UNION {
    ?s <http://www.w3.org/2004/02/skos/core#prefLabel> ?label .
  }
  FILTER (lang(?label) != "")
}
"""

def check_multilingual_support(endpoint_url):
    sparql = SPARQLWrapper(endpoint_url)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)

    try:
        results = sparql.query().convert()
        languages = [result["lang"]["value"] for result in results["results"]["bindings"]]
        unique_langs = sorted(set(languages))
        return unique_langs
    except Exception as e:
        return f"Error querying {endpoint_url}: {str(e)}"

manual_language_support = {
    "BARTOC": ['bg', 'ca', 'hr', 'cs', 'da', 'nl', 'en', 'et', 'fi', 'fr', 'de', 'el', 'hu', 'it',
               'lv', 'lt', 'mk', 'mt', 'pl', 'pt', 'ro', 'sr', 'sk', 'sl', 'es', 'sv'],
    "OLS": ['en', 'fr', 'es', 'de', 'la'],
    "CLARIAH": ['en'],
    "BioPortal": ['en']
}

for repo, langs in manual_language_support.items():
    print(f"{repo} supports {len(langs)} languages: {langs}")

for name, url in endpoints.items():
    langs = check_multilingual_support(url)
    if isinstance(langs, list):
        print(f"{name} supports {len(langs)} languages: {langs}")
    else:
        print(f"{name} query failed: {langs}")


BARTOC supports 26 languages: ['bg', 'ca', 'hr', 'cs', 'da', 'nl', 'en', 'et', 'fi', 'fr', 'de', 'el', 'hu', 'it', 'lv', 'lt', 'mk', 'mt', 'pl', 'pt', 'ro', 'sr', 'sk', 'sl', 'es', 'sv']
OLS supports 5 languages: ['en', 'fr', 'es', 'de', 'la']
CLARIAH supports 1 languages: ['en']
BioPortal supports 1 languages: ['en']
LOV supports 111 languages: ['EN', 'af', 'aln', 'ar', 'arz', 'ast', 'az', 'be', 'be-tarask', 'bg', 'bn', 'br', 'ca', 'ca-ad', 'ca-es', 'cs', 'cz', 'da', 'de', 'de-DE', 'dk', 'dsb', 'ee', 'el', 'en', 'en-GB', 'en-US', 'en-au', 'en-gb', 'en-uk', 'en-us', 'eng', 'eo', 'es', 'es-ar', 'es-co', 'es-cu', 'es-es', 'es-mx', 'es-ve', 'et', 'eu', 'fa', 'fi', 'fr', 'fr-be', 'fr-ca', 'fr-ch', 'fr-fr', 'fur', 'ga', 'gl', 'gr', 'he', 'hi', 'hr', 'hsb', 'hu', 'hy', 'ia', 'in', 'is', 'it', 'ja', 'jp', 'ka', 'kn', 'ko', 'kr', 'ku-latn', 'la', 'lb', 'ln', 'lt', 'lv', 'mk', 'ms', 'mt', 'myv', 'mzn', 'nds', 'nl', 'no', 'pl', 'ps', 'pt', 'pt-ao', 'pt-br', 'pt-pt', 'ro', 'ru', 'rue', 'sk', 'sl'

In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON

def assess_lov_sustainability():
    endpoint = "https://lov.linkeddata.es/dataset/lov/sparql"
    sparql = SPARQLWrapper(endpoint)
    sparql.setReturnFormat(JSON)

    def count_query(query):
        sparql.setQuery(query)
        results = sparql.query().convert()
        return int(results["results"]["bindings"][0]["count"]["value"])

    total_ontologies_query = """
    SELECT (COUNT(DISTINCT ?ont) AS ?count) WHERE {
      ?ont a <http://www.w3.org/2002/07/owl#Ontology> .
    }
    """

    license_query = """
    SELECT (COUNT(DISTINCT ?ont) AS ?count) WHERE {
      ?ont a <http://www.w3.org/2002/07/owl#Ontology> ;
            <http://purl.org/dc/terms/license> ?license .
    }
    """

    version_query = """
    SELECT (COUNT(DISTINCT ?ont) AS ?count) WHERE {
      ?ont a <http://www.w3.org/2002/07/owl#Ontology> ;
            <http://www.w3.org/2002/07/owl#versionInfo> ?v .
    }
    """

    try:
        total = count_query(total_ontologies_query)
        licensed = count_query(license_query)
        versioned = count_query(version_query)

        license_ratio = licensed / total if total else 0
        version_ratio = versioned / total if total else 0

        if license_ratio >= 0.5 and version_ratio >= 0.5:
            level = "High"
        elif license_ratio >= 0.2 or version_ratio >= 0.2:
            level = "Medium"
        else:
            level = "Low"

        return {
            "Number of ontologies": total,
            "ontology_with_license": licensed,
            "ontology_with_version": versioned,
            "license_ratio": round(license_ratio, 2),
            "version_ratio": round(version_ratio, 2),
            "sustainability_level": level
        }

    except Exception as e:
        return {"error": str(e)}

if __name__ == "__main__":
    result = assess_lov_sustainability()
    for k, v in result.items():
        print(f"{k}: {v}")


Number of ontologies: 814
ontology_with_license: 236
ontology_with_version: 529
license_ratio: 0.29
version_ratio: 0.65
sustainability_level: Medium


In [None]:
!pip install SPARQLWrapper
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd

def assess_sustainability(endpoint_url):
    sparql = SPARQLWrapper(endpoint_url)
    sparql.setReturnFormat(JSON)

    def count_query(query):
        sparql.setQuery(query)
        results = sparql.query().convert()
        return int(results["results"]["bindings"][0]["count"]["value"])

    total_ontologies_query = """
    SELECT (COUNT(DISTINCT ?ont) AS ?count) WHERE {
      ?ont a <http://www.w3.org/2002/07/owl#Ontology> .
    }
    """

    license_query = """
    SELECT (COUNT(DISTINCT ?ont) AS ?count) WHERE {
      ?ont a <http://www.w3.org/2002/07/owl#Ontology> ;
            <http://purl.org/dc/terms/license> ?license .
    }
    """

    version_query = """
    SELECT (COUNT(DISTINCT ?ont) AS ?count) WHERE {
      ?ont a <http://www.w3.org/2002/07/owl#Ontology> ;
            <http://www.w3.org/2002/07/owl#versionInfo> ?v .
    }
    """

    try:
        total = count_query(total_ontologies_query)
        licensed = count_query(license_query)
        versioned = count_query(version_query)

        license_ratio = licensed / total if total else 0
        version_ratio = versioned / total if total else 0
        score = license_ratio + version_ratio

        if score >= 0.9:
            level = "High"
        elif score >= 0.5:
            level = "Medium"
        else:
            level = "Low"

        return {
            "Endpoint": endpoint_url,
            "Number of ontologies": total,
            "ontology_with_license": licensed,
            "ontology_with_version": versioned,
            "license_ratio": round(license_ratio, 2),
            "version_ratio": round(version_ratio, 2),
            "combined_score": round(score, 2),
            "sustainability_level": level
        }

    except Exception as e:
        return {"Endpoint": endpoint_url, "error": str(e)}

if __name__ == "__main__":
    lov_result = assess_sustainability("https://lov.linkeddata.es/dataset/lov/sparql")
    gesis_result = assess_sustainability("https://data.gesis.org/gesiskg/sparql")

    print("LOV:")
    for k, v in lov_result.items():
        print(f"{k}: {v}")

    print("\nGESIS:")
    for k, v in gesis_result.items():
        print(f"{k}: {v}")




Collecting SPARQLWrapper
  Downloading SPARQLWrapper-2.0.0-py3-none-any.whl.metadata (2.0 kB)


In [4]:
from SPARQLWrapper import SPARQLWrapper, JSON

def assess_sustainability(endpoint_url):
    sparql = SPARQLWrapper(endpoint_url)
    sparql.setReturnFormat(JSON)

    def count_query(query):
        sparql.setQuery(query)
        results = sparql.query().convert()
        return int(results["results"]["bindings"][0]["count"]["value"])

    total_ontologies_query = """
    SELECT (COUNT(DISTINCT ?ont) AS ?count) WHERE {
      ?ont a <http://www.w3.org/2002/07/owl#Ontology> .
    }
    """

    license_query = """
    SELECT (COUNT(DISTINCT ?ont) AS ?count) WHERE {
      ?ont a <http://www.w3.org/2002/07/owl#Ontology> ;
            <http://purl.org/dc/terms/license> ?license .
    }
    """

    version_query = """
    SELECT (COUNT(DISTINCT ?ont) AS ?count) WHERE {
      ?ont a <http://www.w3.org/2002/07/owl#Ontology> ;
            <http://www.w3.org/2002/07/owl#versionInfo> ?v .
    }
    """

    try:
        total = count_query(total_ontologies_query)
        licensed = count_query(license_query)
        versioned = count_query(version_query)

        license_ratio = licensed / total if total else 0
        version_ratio = versioned / total if total else 0
        score = license_ratio + version_ratio

        if score >= 0.9:
            level = "High"
        elif score >= 0.5:
            level = "Medium"
        else:
            level = "Low"

        return {
            "Endpoint": endpoint_url,
            "Number of ontologies": total,
            "ontology_with_license": licensed,
            "ontology_with_version": versioned,
            "license_ratio": round(license_ratio, 2),
            "version_ratio": round(version_ratio, 2),
            "combined_score": round(score, 2),
            "sustainability_level": level
        }

    except Exception as e:
        return {"Endpoint": endpoint_url, "error": str(e)}


bioportal_result = {
    "Endpoint": "BioPortal (manual entry)",
    "Number of selected ontologies": 10,
    "ontology_with_license": 9,
    "ontology_with_version": 10,
    "license_ratio": 0.9,
    "version_ratio": 1.0,
    "combined_score": 1.9,
    "sustainability_level": "High"
}

clariah_result = {
    "Endpoint": "CLARIAH (manual entry)",
    "Number of selected ontologies": 10,
    "ontology_with_license": 10,
    "ontology_with_version": 10,
    "license_ratio": 1.0,
    "version_ratio": 1.0,
    "combined_score": 2.0,
    "sustainability_level": "High"
}

bartoc_result = {
    "Endpoint": "BARTOC (manual entry)",
    "Number of selected ontologies": 10,
    "ontology_with_license": 7,
    "ontology_with_version": 0,
    "license_ratio": 0.7,
    "version_ratio": 0.0,
    "combined_score": 0.7,
    "sustainability_level": "Medium"
}

def print_result(name, result):
    print(f"\n{name}:")
    for k, v in result.items():
        print(f"{k}: {v}")

if __name__ == "__main__":
    lov_result = assess_sustainability("https://lov.linkeddata.es/dataset/lov/sparql")
    gesis_result = assess_sustainability("https://data.gesis.org/gesiskg/sparql")

    print_result("LOV", lov_result)
    print_result("GESIS", gesis_result)
    print_result("BioPortal", bioportal_result)
    print_result("CLARIAH", clariah_result)
    print_result("BARTOC", bartoc_result)



LOV:
Endpoint: https://lov.linkeddata.es/dataset/lov/sparql
Number of ontologies: 814
ontology_with_license: 236
ontology_with_version: 529
license_ratio: 0.29
version_ratio: 0.65
combined_score: 0.94
sustainability_level: High

GESIS:
Endpoint: https://data.gesis.org/gesiskg/sparql
error: EndPointNotFound: It was not possible to connect to the given endpoint: check it is correct. 

Response:
b'<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">\n<html><head>\n<title>404 Not Found</title>\n</head><body>\n<h1>Not Found</h1>\n<p>The requested URL was not found on this server.</p>\n</body></html>\n'

BioPortal:
Endpoint: BioPortal (manual entry)
Number of selected ontologies: 10
ontology_with_license: 9
ontology_with_version: 10
license_ratio: 0.9
version_ratio: 1.0
combined_score: 1.9
sustainability_level: High

CLARIAH:
Endpoint: CLARIAH (manual entry)
Number of selected ontologies: 10
ontology_with_license: 10
ontology_with_version: 10
license_ratio: 1.0
version_ratio: 1.0
combined_sco