In [None]:
# Install libraries
!pip install rdflib
!pip install requests
!pip install SPARQLWrapper

In [17]:
# Import libraries
import os
import requests
import warnings
from rdflib import Graph
from SPARQLWrapper import SPARQLWrapper, JSON
from urllib.parse import urlparse

In [18]:
# Configuration
SPARQL_ENDPOINT = "http://63.32.50.253:81/sparql"  # Replace with your Virtuoso SPARQL endpoint
GRAPH_URI = "http://semic.registry.eu"  # Replace with your graph URI
BYPASS_SSL = True  # Set to True to bypass SSL verification (use cautiously)

In [20]:
# SPARQL query to extract download links
SPARQL_QUERY = """
prefix dct: <http://purl.org/dc/terms/>
prefix dcat: <http://www.w3.org/ns/dcat#>
prefix prof: <http://www.w3.org/ns/dx/prof/>
SELECT ?s ?download
WHERE {
?s a dct:Standard .
?s prof:hasResource ?resource .
?resource dct:format <http://publications.europa.eu/resource/authority/file-type/RDF_TURTLE> .
?resource dcat:downloadURL ?download .
}
"""

def download_and_parse(url):
    """Download RDF data from a URL and parse it into an rdflib Graph."""
    try:
        # Bypass SSL verification if configured (not recommended for production)
        if BYPASS_SSL:
            warnings.warn(f"Bypassing SSL verification for {url}. This is insecure and should be used only for testing.")
            response = requests.get(url, verify=False)
        else:
            response = requests.get(url)

        if response.status_code == 200:
            # Parse RDF data into a Graph (assuming Turtle format)
            graph = Graph()
            graph.parse(data=response.text, format="turtle")
            print(f"Parsed RDF from: {url}")
            return graph
        else:
            print(f"Failed to download {url}: Status code {response.status_code}")
            return None
    except requests.exceptions.SSLError as ssl_err:
        print(f"SSL error downloading {url}: {str(ssl_err)}")
        return None
    except requests.exceptions.RequestException as e:
        print(f"Error downloading {url}: {str(e)}")
        return None
    except Exception as e:
        print(f"Error parsing RDF from {url}: {str(e)}")
        return None

# Suppress SSL warnings if bypassing verification
if BYPASS_SSL:
    requests.packages.urllib3.disable_warnings()

In [21]:
# Query the SPARQL endpoint, download RDF files, and parse them into in-memory graphs. Stores individual graphs in `model_graphs` and combines them into `aggregated_graph`.
# Dictionary to store in-memory graphs (key: model URI, value: rdflib.Graph)
model_graphs = {}
# Single graph to aggregate all models
aggregated_graph = Graph()

# Initialize SPARQL wrapper
sparql = SPARQLWrapper(SPARQL_ENDPOINT)
sparql.setQuery(SPARQL_QUERY)
sparql.setReturnFormat(JSON)

# Add credentials if required (uncomment and configure if needed)
# sparql.setCredentials(user="your-username", passwd="your-password")

try:
    # Execute SPARQL query
    results = sparql.query().convert()

    # Process results
    for result in results["results"]["bindings"]:
        model_uri = result["s"]["value"]
        download_link = result["download"]["value"]

        # Download and parse RDF into a Graph
        graph = download_and_parse(download_link)
        if graph:
            # Store the individual graph
            model_graphs[model_uri] = graph
            # Add to aggregated graph
            aggregated_graph += graph

            # Extract filename for reference
            parsed_url = urlparse(download_link)
            filename = os.path.basename(parsed_url.path)
            if not filename:
                filename = model_uri.split('/')[-1] + ".ttl"
            print(f"Stored graph for {model_uri} (filename: {filename})")

    print(f"\nTotal models parsed: {len(model_graphs)}")
    print(f"Total triples in aggregated graph: {len(aggregated_graph)}")

except Exception as e:
    print(f"Error querying SPARQL endpoint: {str(e)}")



Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/AccessCondition/latest/AccessCondition-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/AccessCondition (filename: AccessCondition-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/ACCO/latest/ACCO-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/ACCO (filename: ACCO-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/AtlasOfPaths/latest/AtlasOfPaths-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/AtlasOfPaths (filename: AtlasOfPaths-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/CLV/latest/CLV-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/CLV (filename: CLV-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/COV/latest/COV-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/COV (filename: COV-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/CPEV/latest/CPEV-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/CPEV (filename: CPEV-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/CPSV/latest/CPSV-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/CPSV (filename: CPSV-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/CPV/latest/CPV-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/CPV (filename: CPV-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/DCAT/latest/DCAT-AP_IT.ttl
Stored graph for http://dati.gov.it/onto/dcatapit (filename: DCAT-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/HER/latest/HER-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/HER (filename: HER-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/Indicator/latest/Indicator-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/Indicator (filename: Indicator-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/IoT/latest/IoT-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/IoT (filename: IoT-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/Language/latest/Language-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/Language (filename: Language-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/MU/latest/MU-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/MU (filename: MU-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/PARK/latest/PARK-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/PARK (filename: PARK-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/PublicContract/latest/PublicContract-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/PublicContract (filename: PublicContract-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/POI/latest/POI-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/POI (filename: POI-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/POT/latest/POT-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/POT (filename: POT-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/Project/latest/Project-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/Project (filename: Project-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/RO/latest/RO-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/RO (filename: RO-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/RPO/latest/RPO-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/RPO (filename: RPO-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/Route/latest/Route-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/Route (filename: Route-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/SM/latest/SM-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/SM (filename: SM-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/TI/latest/TI-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/TI (filename: TI-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/Transparency/latest/Transparency-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/Transparency (filename: Transparency-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/l0/latest/l0-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/l0 (filename: l0-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/Learning/latest/Learning-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/Learning (filename: Learning-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/NDC/latest/NDC-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/NDC (filename: NDC-AP_IT.ttl)




Parsed RDF from: https://raw.githubusercontent.com/italia/daf-ontologie-vocabolari-controllati/master/Ontologie/ADMS/latest/ADMS-AP_IT.ttl
Stored graph for https://w3id.org/italia/onto/ADMS (filename: ADMS-AP_IT.ttl)

Total models parsed: 29
Total triples in aggregated graph: 24290


In [22]:
# Count the bound namespaces for each model and the total unique namespaces across all models.
def explore_namespaces(graph, label="Graph"):
    """Extract and display bound namespaces from an rdflib Graph, return count and namespaces."""
    namespaces = list(graph.namespaces())
    print(f"\nBound namespaces for {label}:")
    if namespaces:
        for prefix, uri in namespaces:
            print(f"  {prefix}: <{uri}>")
        print(f"Namespace count: {len(namespaces)}")
    else:
        print("  No namespaces bound.")
        print("Namespace count: 0")
    return len(namespaces), namespaces

# Count namespaces for each model
print("=== Namespace Counts per Model ===")
unique_namespaces = set()
for model_uri, graph in model_graphs.items():
    filename = os.path.basename(urlparse(model_graphs[model_uri].source).path) if hasattr(graph, 'source') else model_uri.split('/')[-1] + '.ttl'
    count, namespaces = explore_namespaces(graph, f"Model {model_uri} (filename: {filename})")
    # Add namespaces to unique set
    unique_namespaces.update((prefix, str(uri)) for prefix, uri in namespaces)

# Count unique namespaces across all models
print("\n=== Unique Namespaces Across All Models ===")
if unique_namespaces:
    for prefix, uri in sorted(unique_namespaces):
        print(f"  {prefix}: <{uri}>")
    print(f"Total unique namespaces: {len(unique_namespaces)}")
else:
    print("  No unique namespaces found.")
    print("Total unique namespaces: 0")

=== Namespace Counts per Model ===

Bound namespaces for Model https://w3id.org/italia/onto/AccessCondition (filename: AccessCondition.ttl):
  brick: <https://brickschema.org/schema/Brick#>
  csvw: <http://www.w3.org/ns/csvw#>
  dc: <http://purl.org/dc/elements/1.1/>
  dcat: <http://www.w3.org/ns/dcat#>
  dcmitype: <http://purl.org/dc/dcmitype/>
  dcam: <http://purl.org/dc/dcam/>
  doap: <http://usefulinc.com/ns/doap#>
  foaf: <http://xmlns.com/foaf/0.1/>
  geo: <http://www.opengis.net/ont/geosparql#>
  odrl: <http://www.w3.org/ns/odrl/2/>
  org: <http://www.w3.org/ns/org#>
  prof: <http://www.w3.org/ns/dx/prof/>
  prov: <http://www.w3.org/ns/prov#>
  qb: <http://purl.org/linked-data/cube#>
  schema: <https://schema.org/>
  sh: <http://www.w3.org/ns/shacl#>
  skos: <http://www.w3.org/2004/02/skos/core#>
  sosa: <http://www.w3.org/ns/sosa/>
  ssn: <http://www.w3.org/ns/ssn/>
  time: <http://www.w3.org/2006/time#>
  vann: <http://purl.org/vocab/vann/>
  void: <http://rdfs.org/ns/void#>
 

In [26]:
# Display the bound namespaces and their counts for a subset of models. Customize the `subset_models` list with model URIs or filenames to analyze.
# Define a subset of models to analyze (use model URIs or filenames)
subset_models = [
    "https://w3id.org/italia/onto/AccessCondition",
]

print("=== Namespaces for Subset of Models ===")
for model in subset_models:
    # Try to find the model by URI
    if model in model_graphs:
        graph = model_graphs[model]
        filename = os.path.basename(urlparse(model_graphs[model].source).path) if hasattr(graph, 'source') else model.split('/')[-1] + '.ttl'
        explore_namespaces(graph, f"Model {model} (filename: {filename})")
    else:
        # Try to find by filename
        found = False
        for model_uri, graph in model_graphs.items():
            parsed_url = urlparse(model_graphs[model_uri].source) if hasattr(graph, 'source') else urlparse(model_uri)
            filename = os.path.basename(parsed_url.path) or (model_uri.split('/')[-1] + '.ttl')
            if filename == model:
                explore_namespaces(graph, f"Model {model_uri} (filename: {filename})")
                found = True
                break
        if not found:
            print(f"Model not found: {model}")

=== Namespaces for Subset of Models ===

Bound namespaces for Model https://w3id.org/italia/onto/AccessCondition (filename: AccessCondition.ttl):
  brick: <https://brickschema.org/schema/Brick#>
  csvw: <http://www.w3.org/ns/csvw#>
  dc: <http://purl.org/dc/elements/1.1/>
  dcat: <http://www.w3.org/ns/dcat#>
  dcmitype: <http://purl.org/dc/dcmitype/>
  dcam: <http://purl.org/dc/dcam/>
  doap: <http://usefulinc.com/ns/doap#>
  foaf: <http://xmlns.com/foaf/0.1/>
  geo: <http://www.opengis.net/ont/geosparql#>
  odrl: <http://www.w3.org/ns/odrl/2/>
  org: <http://www.w3.org/ns/org#>
  prof: <http://www.w3.org/ns/dx/prof/>
  prov: <http://www.w3.org/ns/prov#>
  qb: <http://purl.org/linked-data/cube#>
  schema: <https://schema.org/>
  sh: <http://www.w3.org/ns/shacl#>
  skos: <http://www.w3.org/2004/02/skos/core#>
  sosa: <http://www.w3.org/ns/sosa/>
  ssn: <http://www.w3.org/ns/ssn/>
  time: <http://www.w3.org/2006/time#>
  vann: <http://purl.org/vocab/vann/>
  void: <http://rdfs.org/ns/voi