In [None]:
import numpy as np
import pandas as pd
import requests
#The SKOS Graph management libraries
import rdflib
from rdflib import URIRef, BNode, Literal
from rdflib.namespace import RDF, SKOS
#import the MarketPlace Library 
from sshmarketplacelib import MPData as mpd
from sshmarketplacelib import  eval as eva, helper as hel

In [None]:
mpdata = mpd()
df_tool_flat =mpdata.getMPItems ("toolsandservices", True)
df_publication_flat =mpdata.getMPItems ("publications", True)
df_trainingmaterials_flat =mpdata.getMPItems ("trainingmaterials", True)
df_workflows_flat =mpdata.getMPItems ("workflows", True)
df_datasets_flat =mpdata.getMPItems ("datasets", True)

#### Get the dataframe containing items and their properties

In [None]:
utils=hel.Util()
resultfields=['persistentId', 'MPUrl', 'category', 'label', 'type.code', 'type.label', 'concept.code', 'concept.label', 'concept.uri', 'concept.vocabulary.scheme']
udf_alprop=utils.getAllPropertiesBySources()
udf_alprop=udf_alprop.loc[ : ,resultfields]

#### Get the keywords vocabulary as RDF graph

The following cell creates an RDF Graph containing the [vocabulary of the SSHOC keywords](https://vocabs-downloads.acdh.oeaw.ac.at/vocabs-common/SSHOpenMarketplace/sshomp-keyword/sshomp-keyword.ttl), downloaded in ttl format.

In [None]:
#create an RDF Graph
keywords_rdf = rdflib.Graph() 
#insert in the graph the SKOS vocabulary file obtained from the SSH Vocabulary Commons server
keywords_rdf.parse("https://vocabs-downloads.acdh.oeaw.ac.at/vocabs-common/SSHOpenMarketplace/sshomp-keyword/sshomp-keyword.ttl", format="turtle")

Test if the voabulary is correctly downloaded by checking if two keywords are present in it

In [None]:
k = URIRef("https://vocabs.sshopencloud.eu/vocabularies/sshomp-keyword/mushrooms")
if (k, RDF.type, SKOS.Concept) in keywords_rdf:
    print(f"This graph knows that {k} is a Concept!")
kl=Literal('media', lang='en')
if (None, SKOS.prefLabel, kl) in keywords_rdf:
    print(f"This graph knows that {kl} is a preferred label for a Concept!")

Filter the dataframe of items selecting only keywords

In [None]:
itemswkws=udf_alprop[udf_alprop['type.label']=='Keyword']
#itemswkws.head()

#### Navigate the RDF Graph and for every Concept print if it has a 'label', a 'notation' or neither

In [None]:
for s, p, o in keywords_rdf.triples((None, RDF.type, SKOS.Concept)):
    if (s, SKOS.prefLabel, None) in keywords_rdf:
        print(f"The Concept {s} has a label property, value is:")
        print(keywords_rdf.value(s, SKOS.prefLabel, None))
        continue;
    if (s, SKOS.notation, None) in keywords_rdf:
        print(f"The Concept {s} has a notation property, value is:")
        print(keywords_rdf.value(s, SKOS.notation, None))
        continue
    else:
        print(f"The Concept: \n{s}\ndoes not have label or notation")

#### Iterate on the dataframe containing keywords to check if they have a Vocabulary entry with a label or a notation, create a dataframe of keywords not having such entries

In [None]:
itemsnokw=pd.DataFrame()
for n, row in itemswkws.iterrows():
    pk=row['concept.label']
    if ('http' in pk):
        continue
    kl=Literal(pk, lang='en')
    kn=Literal(pk)
    if (None, SKOS.prefLabel, kl) in keywords_rdf:
        continue
    if (None, SKOS.notation, kn) in keywords_rdf:
#         s=keywords_rdf.value(None, SKOS.notation, kn)
#         print(s)
#         sub=URIRef(s)
#         myLabel=Literal(kn, lang='en')
#         keywords_rdf.add((sub, SKOS.prefLabel, myLabel))
        continue
          
    print (f"Warning {pk} is a Concept URI not having label or notation")
#     pku=pk.replace(" ","+")
#     ku=URIRef(f'https://vocabs.sshopencloud.eu/vocabularies/sshomp-keyword/{pku}')
#     myLabel=Literal(pk.replace('+', ' '), lang='en')
#     keywords_rdf.add((ku, SKOS.prefLabel, myLabel))
    muit={}
    muit['MPUrl']=row.MPUrl
    muit['label']=row.label
    muit['code']=row['concept.code']
    muit['code.uri']=row['concept.uri']
    itemsnokw=itemsnokw.append(muit,  ignore_index=True)

In [None]:
itemsnokw.head()