In [41]:
import os
import json
import pandas as pd
import csv
from lxml import etree
from rdflib import Graph, Namespace, URIRef, BNode, Literal, RDF, URIRef
from rdflib.namespace import RDF, RDFS, SKOS, DCTERMS, DC, OWL
import uuid

ark_id = "1552"

 ## Language code

In [None]:
def generate_skos(_list, _voc):
    
    NS = Namespace("")
    g = Graph()
    g.bind("ns", NS)
    g.bind("skos", SKOS)
    g.bind("dcterms", DCTERMS)
    g.bind("dc", DC)

    voc_name = _voc
    voc_uri = NS[f'https://n2t.net/ark:/{ark_id}/{voc_name}/{str(uuid.uuid5(uuid.NAMESPACE_DNS, _voc))}']
    
    g.add((voc_uri, RDF.type, SKOS['ConceptScheme']))
    g.add((voc_uri, DCTERMS.title, Literal(voc_name,lang="nl")))
    g.add((voc_uri, DCTERMS.title, Literal(voc_name,lang="en")))

    for row in _list:
        
        uri = NS[row['uri']]
        g.add((uri, SKOS.inScheme, voc_uri))
        g.add((voc_uri, SKOS.hasTopConcept, uri))        
        g.add((uri, RDF.type, SKOS.Concept))
        g.add((uri, SKOS.prefLabel, Literal(row['prefLabel'], lang="nl")))
        g.add((uri, SKOS.inScheme, voc_uri))
        g.add((uri, SKOS.altLabel, Literal(row['altLabel'], lang="nl")))

    skos_data = g.serialize(format='pretty-xml').decode('utf-8')
    with open(f'./skos/{voc_name}.xml', "w") as f:
        f.write(skos_data)
    
    
    #skos_data = ''
    del g
    return skos_data

source_file = './source/iso-languagecodes.txt'
with open(source_file, newline='') as csvfile:
    concepts = csv.reader(csvfile, delimiter='\t')
    next(concepts, None)

    concept_dict = {}
    concept_list = []

    for row in concepts:
        if len(row[2]) > 0:
            
            concept_dict = {
                "uri": f'https://n2t.net/ark:/{ark_id}/{str(uuid.uuid5(uuid.NAMESPACE_DNS, row[2]))}',
                "prefLabel": row[2],
                "altLabel": row[3]
            }
            concept_list.append(concept_dict)
    
    #print(json.dumps(concept_list, indent=2))
    voc_name = 'Language ISO Type'
    skos_data = generate_skos(concept_list, voc_name)
    with open(f'./skos/{voc_name}.xml', "w") as f:
        f.write(skos_data)
    #print(skos)



## Feature Codes

https://www.geonames.org/ontology#WikipediaArticle
https://www.geonames.org/ontology#Map
https://www.geonames.org/ontology#GeonamesFeature
https://www.geonames.org/ontology#Code
https://www.geonames.org/ontology#RDFData
https://www.geonames.org/ontology#Feature
https://www.geonames.org/ontology#Class

In [116]:


bulle= ""
NS = Namespace("") 
BC = Namespace("https://n2t.net/")
SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")
GEO = Namespace("https://www.geonames.org/ontology#")

lookup_list = []
lookup_dict = {}


# Load the original RDF data
g = Graph()
gfc = Graph()
g.parse('./source/geonames_onto.rdf')

gfc.bind("gn", GEO)
gfc.bind("bc", BC)
gfc.bind("skos", SKOS)
gfc.bind("dcterms", DCTERMS)
gfc.bind("dc", DC)


# Define the ConceptScheme
voc_name = "Feature codes"
voc_uri = BC[f'{str(uuid.uuid5(uuid.NAMESPACE_DNS, voc_name))}']

gfc.add((voc_uri, RDF.type, SKOS['ConceptScheme']))
gfc.add((voc_uri, DCTERMS.title, Literal(voc_name,lang="nl")))
gfc.add((voc_uri, DCTERMS.title, Literal(voc_name,lang="en")))

# Map gn:Class to Top Concepts
for cls in g.subjects(RDF.type, GEO.Class):
    uri = cls
    gfc.add((uri, SKOS.inScheme, voc_uri))
    gfc.add((voc_uri, SKOS.hasTopConcept, uri))        
    gfc.add((uri, RDF.type, SKOS.Concept))    
    # Use the comment as prefLabel if available
    pref_label = next(g.objects(cls, RDFS.comment), None)
    if pref_label:
        gfc.add((uri, SKOS.prefLabel, Literal(pref_label)))
    else:
        gfc.add((uri, SKOS.prefLabel, Literal(g.value(uri, RDFS.label))))
    

def determine_language_for_prefLabel(code, g):
    # Find the prefLabel triple for the given code
    pref_label_triple = next(g.triples((code, SKOS.prefLabel, None)), None)
    if pref_label_triple is not None:
        # Extract the language from the prefLabel triple
        pref_label_node = pref_label_triple[2]
        language = pref_label_node.language
        print(language, pref_label_node)
        if language == "en":
            return "en"

# Map gn:Code to Narrower Concepts
for code in g.subjects(RDF.type, GEO.Code):
    
    broader_cls = code.split("#")[1].split(".")[0]
    broader_uri = NS[f'https://www.geonames.org/ontology#{broader_cls}']    
    gfc.add((code, RDF.type, SKOS.Concept))
    gfc.add((code, SKOS.inScheme, voc_uri))
    gfc.add((broader_uri, SKOS.narrower, code)) 
    feature_code = code.split("#")[1].split(".")[1]
    for pref_label in g.objects(code, SKOS.prefLabel): 
        gfc.add((code, SKOS.prefLabel, pref_label))
        if pref_label.language == "en":
            lookup_dict = {"code": feature_code, "concept": str(pref_label)}
            lookup_list.append(lookup_dict)
            lookup_dict = ""
    
            

# Save the extracted SKOS graph to a new RDF/XML file
#bulle1 = g.serialize('./out/geonames_extracted_skos.rdf', format='pretty-xml') 
gfc.serialize('test_skos.rdf', format='pretty-xml') 

skos_data = gfc.serialize(format='pretty-xml').decode('utf-8')

df = pd.DataFrame(lookup_list) 
df.to_csv("./lookup/feature_codes.csv", index=False)
#print(lookup_list)