In [1]:
from rdflib import Graph
from SPARQLWrapper import SPARQLWrapper

import re

In [2]:
def transform_label(label):
    return re.sub(r"(\w)([A-Z])", r"\1 \2", label)

In [3]:
g = Graph()
g.parse("schemaorg-current-https.ttl", encoding="utf-8")

<Graph identifier=N3cfef980fdba44e69f91ec94dbfa8793 (<class 'rdflib.graph.Graph'>)>

In [62]:
q = """
    SELECT distinct ?relation ?label ?comment
    WHERE {
      {?relation a rdfs:Property} UNION {?relation a rdf:Property}
      ?relation rdfs:label ?label.
      OPTIONAL{?relation rdfs:comment ?comment.}
    }
    """

dict_values = {}
for r in g.query(q):
    label = transform_label(str(r["label"]))
    
    if r["comment"]:
        comment = str(r["comment"]).replace("\t", " ").replace("\n", " ")
    else:
        comment = ""
        
    dict_values[(str(r["relation"]), label)] = (comment, set(), set())

In [63]:
q = """
    SELECT distinct ?relation ?label ?domain
    WHERE {
      {?relation a rdfs:Property.} UNION {?relation a rdf:Property}
      ?relation rdfs:label ?label.
      ?relation schema:domainIncludes ?domain.
    }
    """

for r in g.query(q):
    label = transform_label(str(r["label"]))
    
    dict_values[(str(r["relation"]), label)][1].add(str(r["domain"]))

In [64]:
import numpy as np
np.array(list(dict_values.keys()))[:,0]

array(['https://schema.org/abridged', 'https://schema.org/abstract',
       'https://schema.org/accelerationTime', ...,
       'https://schema.org/location', 'https://schema.org/participant',
       'https://schema.org/identifier'], dtype='<U58')

In [65]:
q = """
    SELECT distinct ?relation ?label  ?range 
    WHERE {
      {?relation a rdfs:Property.} UNION {?relation a rdf:Property}
      ?relation rdfs:label ?label.
      ?relation schema:rangeIncludes ?range.
    }
    """

for r in g.query(q):
    label = transform_label(str(r["label"]))
    
    dict_values[(str(r["relation"]), label)][2].add(str(r["range"]))

In [66]:
f = open("schema_P_describe.txt", "w", encoding="utf-8")

for line in dict_values:
    f.write("\t".join(line).replace("\n","")+"\t"+dict_values[line][0]+"\t"+",".join(dict_values[line][1])+"\n")

f.close()

# DataType

In [15]:
res = []

q = """
    SELECT distinct ?datatype ?label ?comment
    WHERE {
      ?datatype a ?type.
      VALUES ?type { rdfs:Class schema:DataType}
      ?datatype rdfs:label ?label.
      OPTIONAL{?datatype rdfs:comment ?comment.}
    }
    """

for r in g.query(q):
    label = transform_label(str(r["label"]))
    line_tp = str(r["datatype"])+"\t"+label+"\t"
    line_tp += str(r["comment"]).replace("\t", " ").replace("\n", " ")
    res.append(line_tp)

In [16]:
res

["https://schema.org/3DModel\t3 DModel\tA 3D model represents some kind of 3D content, which may have [[encoding]]s in one or more [[MediaObject]]s. Many 3D formats are available (e.g. see [Wikipedia](https://en.wikipedia.org/wiki/Category:3D_graphics_file_formats)); specific encoding formats can be represented using the [[encodingFormat]] property applied to the relevant [[MediaObject]]. For the case of a single file published after Zip compression, the convention of appending '+zip' to the [[encodingFormat]] can be used. Geospatial, AR/VR, artistic/animation, gaming, engineering and scientific content can all be represented using [[3DModel]].",
 'https://schema.org/AMRadioChannel\tA MRadio Channel\tA radio channel that uses AM.',
 'https://schema.org/APIReference\tA PI Reference\tReference documentation for application programming interfaces (APIs).',
 'https://schema.org/AboutPage\tAbout Page\tWeb page type: About page.',
 'https://schema.org/AcceptAction\tAccept Action\tThe act of 

In [17]:
f = open("schema_C_describe.txt", "w", encoding="utf-8")

for line in res:
    f.write(line.replace("\n", " ").replace("\\n"," ").replace("\\", " ")+"\n")

f.close()