From 9b0f075fb4721a69e5ae4e3304b476d84d2522c6 Mon Sep 17 00:00:00 2001 From: Richard Wallis Date: Tue, 19 Feb 2019 08:29:41 +0000 Subject: [PATCH] Introduce Feb19 software tweaks (#2146) * 1st pass * First pass at a vocab independant version * fix to canonical url * Tweaks and additions to get full.html etc. working * Fixed Enumeration disply Suppress output of properties table when there are no properties to display * Disssable morinfo block * Temp bypass examples load * Fix to table layout on term pages * First pass at configurable data load: * Removed dependancy on the Unit class (code still in there) * Added CONFIGFILE to pull term definitions, examples, docs, and templates from local/remote locations * added exampl config files * Added SUBDOMAINS variable to .yamls. If set to False will suppress extention terms being referenced by subdomain urls eg. bib.schema.org/Audiobook. Only remaining subdomain pages being the extention home pages. * Final adjustments following datacommons tests * Tweeks to match schemaorg/datacommons requirements. * Tweaks around vocabUri to ensure correct RDFa in pages. * Tweak * Added include file funtionality to config files Now handle local locations such as '.' or './' in config file loads * Added missing rdfas and example.txts to config * Synchronise .yamls Add build termconfig script --- .gitignore | 1 + api.py | 826 ++++++++++++--- apimarkdown.py | 15 +- apirdflib.py | 486 +++++---- apirdfterm.py | 684 +++++++++++++ app.yaml | 16 +- datacomconfig.json | 50 + datacomconfiglocation.json | 7 + docs/schemaorg.css | 18 +- parsers.py | 16 +- schemaorg.yaml | 13 +- scripts/buildTermConfig.sh | 129 +++ sdoapp.py | 1458 ++++++++++++++------------- sdocloudstore.py | 2 +- sdoconfig.json | 79 ++ sdoconfigTermsData.json | 292 ++++++ sdodatacomconfig.json | 43 + sdordf2csv.py | 47 +- sdoutil.py | 26 + templates/docsBasicPageHeader.tpl | 2 +- templates/full.tpl | 35 +- templates/genericTermPageHeader.tpl | 3 +- templates/topnotes.tpl | 8 +- webschemas.yaml | 11 + 24 files changed, 3246 insertions(+), 1021 deletions(-) create mode 100755 apirdfterm.py create mode 100644 datacomconfig.json create mode 100644 datacomconfiglocation.json create mode 100755 scripts/buildTermConfig.sh create mode 100644 sdoconfig.json create mode 100644 sdoconfigTermsData.json create mode 100644 sdodatacomconfig.json diff --git a/.gitignore b/.gitignore index dbbd843733..c0a8e6bd04 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ /scripts/rspec-failures.txt /rspec-failures.txt /admin/deploy_timestamp.txt +/datacoms diff --git a/api.py b/api.py index ef2eeab43a..57e1e6d993 100755 --- a/api.py +++ b/api.py @@ -10,6 +10,7 @@ import os import os.path +import urllib import glob import re import threading @@ -23,7 +24,8 @@ from testharness import * import apirdflib -from sdoutil import sdo_send_mail +import apirdfterm +from sdoutil import * #from apirdflib import rdfGetTargets, rdfGetSources from apimarkdown import Markdown @@ -36,6 +38,11 @@ def getInstanceId(short=False): ret = ret[len(ret)-6:] return ret +TIMESTAMPSTOREMODE = "CLOUDSTORE" +if "TIMESTAMPSTOREMODE" in os.environ: + TIMESTAMPSTOREMODE = os.environ["TIMESTAMPSTOREMODE"] + log.info("TIMESTAMPSTOREMODE set to %s from .yaml file" % TIMESTAMPSTOREMODE) +log.info("Initialised with TIMESTAMPSTOREMODE set to %s" % TIMESTAMPSTOREMODE) EXAMPLESTOREMODE = os.environ.get("EXAMPLESTOREMODE","INMEM") schemasInitialized = False @@ -61,6 +68,8 @@ def getAllLayersList(): global AllLayersList return AllLayersList +VARSUBPATTERN = r'\[\[([\w0-9_ -]+)\]\]' + JSONLDCONTEXT = "jsonldcontext.json" EVERYLAYER = "!EVERYLAYER!" sitename = "schema.org" @@ -480,24 +489,51 @@ def enablePageStore(mode): def prepareCloudstoreDocs(): - if getInTestHarness() or "localhost" in os.environ['SERVER_NAME']: #Force new version logic for local versions and tests + #if getInTestHarness() or "localhost" in os.environ['SERVER_NAME']: #Force new version logic for local versions and tests + if getInTestHarness(): #Force new version logic for local versions and tests log.info("Skipping static docs copy for local/test instance") return - log.info("Preparing Cloudstorage - copying static docs") + log.info("Preparing Cloudstorage - copying static docs..") count = 0 - for root, dirs, files in os.walk("docs"): - for f in files: - count += 1 - fname = os.path.join(root, f) - try: - with open(fname, 'r') as f: - content = f.read() - SdoCloud.writeFormattedFile(fname,content=content, location="html", raw=True) - f.close() - except Exception as e: - log.info("ERROR reading: %s" % e) - pass + filesToCopy = [] + copiedFiles = [] + if SdoConfig.isValid(): + log.info("... from config defined sources") + for f in SdoConfig.docsFiles(): + ft = (f.get("location"),f.get("filePart")) + filesToCopy.append(ft) + else: + log.info("... from local docs location") + for root, dirs, files in os.walk("docs"): + for f in files: + count += 1 + fname = os.path.join(root, f) + ft = ("docs",fname) + filesToCopy.append(ft) + + for ft in filesToCopy: + try: + file = ft[0] + "/" + ft[1] + fname = "docs/" + ft[1] + if file.startswith("file://"): + file = file[7:] + + if "://" in file: + content = urllib.urlopen(file).read() + else: + fd = open(file, 'r') + content = fd.read() + fd.close() + + SdoCloud.writeFormattedFile(fname,content=content, location="html", raw=True) + copiedFiles.append(fname) + except Exception as e: + log.info("ERROR reading: %s" % e) + pass + info = "".join( ["%s\n" % fl for fl in copiedFiles] ) + storeTimestampedInfo("staticdocscopy-timestamp",info=info) + return #sdo_send_mail(to="rjw@dataliberate.com",subject="[SCHEMAINFO] from 'api'", msg="prepareCloudstoreDocs: %s" % (count)) def cloudstoreStoreContent(fname, content, location, raw=False, private=False): @@ -605,7 +641,7 @@ def isEnumeration(self, layers='core'): """Does this unit represent an enumerated type?""" if self.typeFlags.has_key('e'): return self.typeFlags['e'] - isE = self.subClassOf(Unit.GetUnit("Enumeration"), layers=EVERYLAYER) + isE = self.subClassOf(Unit.GetUnit("schema:Enumeration"), layers=EVERYLAYER) self.typeFlags['e'] = isE return isE @@ -617,7 +653,7 @@ def isEnumerationValue(self, layers='core'): #log.debug("isEnumerationValue() called on %s, found %s types. layers: %s" % (self.id, str( len( types ) ), layers ) ) found_enum = False for t in types: - if t.subClassOf(Unit.GetUnit("Enumeration"), layers=EVERYLAYER): + if t.subClassOf(Unit.GetUnit("schema:Enumeration"), layers=EVERYLAYER): found_enum = True break self.typeFlags['ev'] = found_enum @@ -737,27 +773,7 @@ def inverseproperty(self, layers="core"): return None def UsageStr (self) : - str = GetUsage(self.id) - if (str == '1') : - return "Between 10 and 100 domains" - elif (str == '2'): - return "Between 100 and 1000 domains" - elif (str == '3'): - return "Between 1000 and 10,000 domains" - elif (str == '4'): - return "Between 10,000 and 50,000 domains" - elif (str == '5'): - return "Between 50,000 and 100,000 domains" - elif (str == '7'): - return "Between 100,000 and 250,000 domains" - elif (str == '8'): - return "Between 250,000 and 500,000 domains" - elif (str == '9'): - return "Between 500,000 and 1,000,000 domains" - elif (str == '10'): - return "Over 1,000,000 domains" - else: - return "" + return GetUsage(self.id) # NOTE: each Triple is in exactly one layer, by default 'core'. When we # read_schemas() from data/ext/{x}/*.rdfa each schema triple is given a @@ -793,17 +809,19 @@ def __str__ (self): ret = "" if self.source != None: ret += "%s " % self.source - if self.target != None: - ret += "%s " % self.target if self.arc != None: ret += "%s " % self.arc + if self.target != None: + ret += "%s " % self.target + if self.text != None: + ret += "\"%s\" " % self.text return ret @staticmethod def AddTriple(source, arc, target, layer='core'): """AddTriple stores a thing-valued new Triple within source Unit.""" if (source == None or arc == None or target == None): - log.info("Bailing") + log.info("Bailing %s %s %s" % (source, arc, target)) return else: @@ -835,15 +853,18 @@ def AddTripleText(source, arc, text, layer='core'): def GetTargets(arc, source, layers='core'): """All values for a specified arc on specified graph node (within any of the specified layers).""" - # log.debug("GetTargets checking in layer: %s for unit: %s arc: %s" % (layers, source.id, arc.id)) + log.info("GetTargets checking in layer: %s for unit: %s arc: %s" % (layers, source, arc)) targets = {} fred = False try: for triple in source.arcsOut: + log.info("triple %s" % triple) if (triple.arc == arc): - if (triple.target != None and (layers == EVERYLAYER or triple.layer in layers)): + #if (triple.target != None and (layers == EVERYLAYER or triple.layer in layers)): + if (triple.target != None ): targets[triple.target] = 1 - elif (triple.text != None and (layers == EVERYLAYER or triple.layer in layers)): + #elif (triple.text != None and (layers == EVERYLAYER or triple.layer in layers)): + elif (triple.text != None): targets[triple.text] = 1 return targets.keys() except Exception as e: @@ -852,12 +873,15 @@ def GetTargets(arc, source, layers='core'): def GetSources(arc, target, layers='core'): """All source nodes for a specified arc pointing to a specified node (within any of the specified layers).""" - #log.debug("GetSources checking in layer: %s for unit: %s arc: %s" % (layers, target.id, arc.id)) + log.info("GetSources checking in layer: %s for unit: %s arc: %s" % (layers, target, arc)) if(target.sourced == False): - apirdflib.rdfGetSourceTriples(target) + apirdflib.rdfGetSourceTriples(target) + sources = {} for triple in target.arcsIn: - if (triple.arc == arc and (layers == EVERYLAYER or triple.layer in layers)): + #if (triple.arc == arc and (layers == EVERYLAYER or triple.layer in layers)): + log.info("arc %s triplearc: %s" %(arc,triple.arc)) + if (triple.arc == arc ): sources[triple.source] = 1 return sources.keys() @@ -885,7 +909,7 @@ def GetComment(node, layers='core') : if len(tx) > 0: return Markdown.parse(tx[0]) else: - return "No comment" + return "-" def GetComments(node, layers='core') : """Get the rdfs:comment(s) we find on this node within any of the specified layers.""" @@ -926,7 +950,30 @@ def GetAllTypes(layers='core'): return UtilCache.get(KEY,Utc) else: #logging.debug("DataCache MISS: %s" % KEY) - mynode = Unit.GetUnit("Thing", True) + mynode = Unit.GetUnit("schema:Thing", True) + subbed = {} + todo = [mynode] + while todo: + current = todo.pop() + subs = GetImmediateSubtypes(current, EVERYLAYER) + if inLayer(layers,current): + subbed[current] = 1 + for sc in subs: + if subbed.get(sc.id) == None: + todo.append(sc) + UtilCache.put(KEY,subbed.keys(),Utc) + return subbed.keys() + +def oldGetAllTypes(layers='core'): + global Utc + """Return all types in the graph.""" + KEY = "AllTypes:%s" % layers + if UtilCache.get(KEY+'x',Utc): + #logging.debug("DataCache HIT: %s" % KEY) + return UtilCache.get(KEY,Utc) + else: + #logging.debug("DataCache MISS: %s" % KEY) + mynode = Unit.GetUnit("schema:Thing", True) subbed = {} todo = [mynode] while todo: @@ -971,7 +1018,7 @@ def GetAllEnumerationValues(layers='core'): return UtilCache.get(KEY,Utc) else: #logging.debug("DataCache MISS: %s" % KEY) - mynode = Unit.GetUnit("Enumeration", True) + mynode = Unit.GetUnit("schema:Enumeration", True) enums = {} subbed = {} todo = [mynode] @@ -1116,27 +1163,27 @@ def __init__ (self, terms, original_html, microdata, rdfa, jsonld, egmeta, layer if not EXAMPLES.get(self.keyvalue): EXAMPLES[self.keyvalue] = self -def LoadNodeExamples(node, layers='core'): +def LoadTermExamples(term, layers='core'): """Returns the examples (if any) for some Unit node.""" #log.info("Getting examples for: %s %s" % (node.id,node.examples)) - if(node.examples == None): - node.examples = [] + if(term.examples == None): + term.examples = [] if getInTestHarness() or EXAMPLESTOREMODE != "NDBSHARED": #Get from local storage with exlock: - examples = EXAMPLESMAP.get(node.id) + examples = EXAMPLESMAP.get(term.id) if examples: for e in examples: ex = EXAMPLES.get(e) if ex: - node.examples.append(ex) + term.examples.append(ex) else: #Get from NDB shared storage - ids = ExampleMap.get(node.id) + ids = ExampleMap.get(term.id) if not ids: ids = [] for i in ids: - node.examples.append(ExampleStore.get_by_id(i)) - return node.examples + term.examples.append(ExampleStore.get_by_id(i)) + return term.examples USAGECOUNTS = {} @@ -1144,7 +1191,28 @@ def StoreUsage(id,count): USAGECOUNTS[id] = count def GetUsage(id): - return USAGECOUNTS.get(id,0) + str = USAGECOUNTS.get(id,0) + if (str == '1') : + return "Between 10 and 100 domains" + elif (str == '2'): + return "Between 100 and 1000 domains" + elif (str == '3'): + return "Between 1000 and 10,000 domains" + elif (str == '4'): + return "Between 10,000 and 50,000 domains" + elif (str == '5'): + return "Between 50,000 and 100,000 domains" + elif (str == '7'): + return "Between 100,000 and 250,000 domains" + elif (str == '8'): + return "Between 250,000 and 500,000 domains" + elif (str == '9'): + return "Between 500,000 and 1,000,000 domains" + elif (str == '10'): + return "Over 1,000,000 domains" + else: + return "" + def GetExtMappingsRDFa(node, layers='core'): """Self-contained chunk of RDFa HTML markup with mappings for this term.""" @@ -1178,44 +1246,55 @@ def GetJsonLdContext(layers='core'): jsonldcontext += " \"type\": \"@type\",\n" jsonldcontext += " \"id\": \"@id\",\n" jsonldcontext += " \"HTML\": { \"@id\": \"rdf:HTML\" },\n" - jsonldcontext += " \"@vocab\": \"http://schema.org/\",\n" - jsonldcontext += namespaces - - url = Unit.GetUnit("URL") - date = Unit.GetUnit("Date") - datetime = Unit.GetUnit("DateTime") - -# properties = sorted(GetSources(Unit.GetUnit("rdf:type",True), Unit.GetUnit("rdf:Property",True), layers=getAllLayersList()), key=lambda u: u.id) -# for p in properties: - for t in GetAllTerms(EVERYLAYER,includeDataTypes=True): - if t.isClass(EVERYLAYER) or t.isEnumeration(EVERYLAYER) or t.isEnumerationValue(EVERYLAYER) or t.isDataType(EVERYLAYER): - jsonldcontext += " \"" + t.id + "\": {\"@id\": \"schema:" + t.id + "\"}," - elif t.isAttribute(EVERYLAYER): - range = GetTargets(Unit.GetUnit("rangeIncludes"), t, layers=EVERYLAYER) - type = None - - if url in range: - type = "@id" - elif date in range: - type = "Date" - elif datetime in range: - type = "DateTime" - - typins = "" - if type: - typins = ", \"@type\": \"" + type + "\"" - - jsonldcontext += " \"" + t.id + "\": { \"@id\": \"schema:" + t.id + "\"" + typins + "}," + jsonldcontext += " \"@vocab\": \"%s\",\n" % SdoConfig.vocabUri() + ns = apirdflib.getNamespaces() + done = [] + for n in ns: + for n in ns: + pref, pth = n + pref = str(pref) + if not pref in done: + done.append(pref) + jsonldcontext += " \"%s\": \"%s\",\n" % (pref,pth) + + datatypepre = "" + if SdoConfig.vocabUri() != "http://schema.org/": + datatypepre = "schema:" + + vocablines = "" + externalines = "" + typins = "" + url = apirdfterm.VTerm.getTerm("schema:URL") + for t in apirdfterm.VTerm.getAllTerms(supressSourceLinks=True): + if t.isClass() or t.isEnumeration() or t.isEnumerationValue() or t.isDataType(): + line = " \"" + t.getId() + "\": {\"@id\": \"" + t.getPrefixedId() + "\"}," + elif t.isProperty(): + ranges = t.getRanges() + + for r in ranges: + + if r == url: + typins = ", \"@type\": \"@id\"" + break + elif r.isDataType(): + typins = "" + else: + typins = ", \"@type\": \"@id\"" + + line = " \"" + t.getId() + "\": { \"@id\": \"" + t.getPrefixedId() + "\"" + typins + "}," + + if t.getId().startswith("http"): + externalines += line + else: + vocablines += line + jsonldcontext += vocablines + jsonldcontext += externalines jsonldcontext += "}}\n" jsonldcontext = jsonldcontext.replace("},}}","}\n }\n}") jsonldcontext = jsonldcontext.replace("},","},\n") return str(jsonldcontext) - - - - #### UTILITIES @@ -1234,14 +1313,34 @@ def inLayer(layerlist, node): def read_file (filename): """Read a file from disk, return it as a single string.""" strs = [] - - file_path = full_path(filename) - - import codecs - #log.debug("READING FILE: filename=%s file_path=%s " % (filename, file_path ) ) - for line in codecs.open(file_path, 'r', encoding="utf8").readlines(): - strs.append(line) - return "".join(strs) + + if filename.startswith("file://"): + filename = filename[7:] + + if "://" in filename: + import urllib2 + log.info("URL: %s" % filename) + try: + fd = urllib2.urlopen(filename) + return fd.read() + except urllib2.URLError as e: + log.info("read_file URLError %s: %s" % (e,e.message)) + return None + except Exception as e: + log.info("read_file Exception %s: %s" % (e,e.message)) + return None + else: + file_path = full_path(filename) + import codecs + try: + #log.debug("READING FILE: filename=%s file_path=%s " % (filename, file_path ) ) + for line in codecs.open(file_path, 'r', encoding="utf8").readlines(): + strs.append(line) + ret = "".join(strs) + except Exception as e: + log.info("read_file Exception %s: %s" % (e,e.message)) + return None + return ret def full_path(filename): """convert local file name to full path.""" @@ -1277,7 +1376,37 @@ def setHomeValues(items,layer='core',defaultToCore=False): log.info(msg) extensionLoadErrors += msg + '\n' -def read_schemas(loadExtensions=False): +def read_schemas(files): + """Read/parse/ingest schemas from files from config""" + load_start = datetime.datetime.now() + log.debug("[%s] (re)loading core and annotations." % getInstanceId(short=True)) + + for f in files: + try: + log.info("read_schema '%s' '%s'" %(f.get("ext"),f.get("file"))) + apirdflib.load_graph(f.get("ext"),f.get("file"),prefix=f.get("prefix"),vocab=f.get("vocaburi")) + except Exception as e: + log.error("exception loading schema file %s %s: %s" % (f.get("file"),e,e.message)) + pass + + + log.info("[%s] Loaded graphs in %s" % (getInstanceId(short=True),(datetime.datetime.now() - load_start))) + +def load_usage_data(files): + load_start = datetime.datetime.now() + for f in files: + try: + usage_data = read_file(f.get("file")) + parser = parsers.UsageFileParser(None) + parser.parse(usage_data) + except Exception as e: + log.error("exception loading usage data file %s %s: %s" % (f,e,e.message)) + pass + + log.debug("[%s]Loaded usage data in %s" % (getInstanceId(short=True),(datetime.datetime.now() - load_start))) + + +def read_local_schemas(loadExtensions=False): """Read/parse/ingest schemas from data/*.rdfa. Also data/*examples.txt""" load_start = datetime.datetime.now() @@ -1285,8 +1414,8 @@ def read_schemas(loadExtensions=False): schemasInitialized = True if (not schemasInitialized or DYNALOAD): log.debug("[%s] (re)loading core and annotations." % getInstanceId(short=True)) - files = glob.glob("data/*.rdfa") - jfiles = glob.glob("data/*.jsonld") + files = glob_from_dir("data","*.rdfa") + jfiles = glob_from_dir("data","*.jsonld") for jf in jfiles: rdfequiv = jf[:-7]+".rdfa" if not rdfequiv in files: #Only add .jsonld files if no equivalent .rdfa @@ -1299,7 +1428,7 @@ def read_schemas(loadExtensions=False): load_start = datetime.datetime.now() - files = glob.glob("data/2015-04-vocab_counts.txt") + files = glob_from_dir("data","2015-04-vocab_counts.txt") for file in files: usage_data = read_file(file) parser = parsers.UsageFileParser(None) @@ -1319,8 +1448,8 @@ def read_extensions(extensions): log.info("[%s] extensions %s " % (getInstanceId(short=True),extensions)) for i in extensions: all_layers[i] = "1" - extfiles = glob.glob("data/ext/%s/*.rdfa" % i) - jextfiles = glob.glob("data/ext/%s/*.jsonld" % i) + extfiles = glob_from_dir("data/ext/%s/" % i,"*.rdfa") + jextfiles = glob_from_dir("data/ext/%s/" % i,"*.jsonld") for jf in jextfiles: rdfequiv = jf[:-7]+".rdfa" if not rdfequiv in extfiles: #Only add .jsonld files if no equivalent .rdfa @@ -1334,6 +1463,29 @@ def read_extensions(extensions): extensionsLoaded = True def load_examples_data(extensions): + if SdoConfig.isValid(): + load_example_sources(SdoConfig.exampleFiles()) + if not getInTestHarness() and EXAMPLESTOREMODE == "NDBSHARED": #Use NDB Storage + ExampleStore.store(EXAMPLES) + ExampleMap.store(EXAMPLESMAP) + memcache.set("ExmplesLoaded",value=True) + else: + load_local_examples_data(extensions) + +def load_example_sources(files): + if files: + work = [] + for f in files: + #log.info("FILE: %s" % f) + work.append(f.get("file")) + + read_examples(work,f.get("extension")) + + +def load_local_examples_data(extensions): + log.info("Skipping examples load") + return + load = False if getInTestHarness(): load = True @@ -1342,10 +1494,10 @@ def load_examples_data(extensions): if load: load_start = datetime.datetime.now() - files = glob.glob("data/*examples.txt") + files = glob_from_dir("data","*examples.txt") read_examples(files,'core') for i in extensions: - expfiles = glob.glob("data/ext/%s/*examples.txt" % i) + expfiles = glob_from_dir("data/ext/%s" % i,"*examples.txt") read_examples(expfiles,i) if not getInTestHarness() and EXAMPLESTOREMODE == "NDBSHARED": #Use NDB Storage @@ -1356,16 +1508,21 @@ def load_examples_data(extensions): log.info("Loaded %s examples mapped to %s terms in %s" % (len(EXAMPLES),len(EXAMPLESMAP),(datetime.datetime.now() - load_start))) else: log.info("Examples already loaded") - + def read_examples(files, layer): first = True for f in files: - parser = parsers.ParseExampleFile(None,layer=layer) - #log.info("[%s] Reading: %s" % (getInstanceId(short=True),f)) - if first: - #log.info("[%s] Loading examples from %s" % (getInstanceId(short=True),layer)) - first = False - parser.parse(f) + try: + parser = parsers.ParseExampleFile(None,layer=layer) + #log.info("[%s] Reading: %s" % (getInstanceId(short=True),f)) + if first: + #log.info("[%s] Loading examples from %s" % (getInstanceId(short=True),layer)) + first = False + parser.parse(f) + except Exception as e: + log.error("exception loading examples file %s %s: %s" % (f,e,e.message)) + pass + EXAMPLESTORECACHE = [] class ExampleStore(ndb.Model): @@ -1546,4 +1703,445 @@ def ShortenOnSentence(source,lengthHint=250): return source log.info("[%s]api loaded" % (getInstanceId(short=True))) +############################### +class TimestampEntity(ndb.Model): + content = ndb.TextProperty() + info = ndb.TextProperty() + + +def storeTimestampedInfo(tag,stamp=None,info=None): + if not stamp: + stamp = datetime.datetime.utcnow().strftime("%a %d %b %Y %H:%M:%S UTC") + + if TIMESTAMPSTOREMODE == "INMEM": + log.info("Storing %s stamp: '%s'" % (tag,stamp)) + if info: + stamp = "%s\n%s" %(stamp,info) + memcache.set(key=tag,value=stamp) + + elif TIMESTAMPSTOREMODE == "NDBSHARED": + log.info("Storing ndbshared %s stamp: '%s'" % (tag,stamp)) + ent = TimestampEntity(id = tag, content = stamp, info = info) + ent.put() + + elif TIMESTAMPSTOREMODE == "CLOUDSTORE": + log.info("Storing cloudstore %s stamp: '%s' info: %s" % (tag,stamp,info)) + if info: + stamp = "%s\n%s" %(stamp,info) + val = cloudstoreStoreContent("%s.txt" % tag, stamp, ".status", private=True) + +def getTimestampedInfo(tag): + info = "" + val = "" + if TIMESTAMPSTOREMODE == "INMEM": + data = memcache.get(tag) + val = data.split('\n',1)[0] + info = data[len(val):] + log.info("%s mem version: '%s'" % (tag, val)) + + elif TIMESTAMPSTOREMODE == "NDBSHARED": + ent = TimestampEntity.get_by_id(tag) + if ent: + val = ent.content + info = ent.info + log.info("%s: ndbshared version: '%s'" % (tag, val)) + + elif TIMESTAMPSTOREMODE == "CLOUDSTORE": + tag += ".txt" + data = cloudstoreGetContent(tag, ".status") + if data: + val = data.split('\n',1)[0] + info = data[len(val):] + else: + val="-1" + log.info("%s: cloudstore version: '%s'" % (tag, val)) + + return val, info + + +############################### +class SdoConfig(): + configFile = "" + nested = 0 + valid = False + loaded = False + myconf = None + name = None + attic = None + varslist = None + descs = {} + + @classmethod + def clear(cls): + if cls.myconf: + cls.myconf.close() + cls.valid = False + cls.loaded = False + cls.myconf = None + cls.name = None + cls.attic = None + cls.varslist = None + cls.descs = {} + + + @classmethod + def load(cls, conffile): + log.info("Loading config file from %s" % conffile) + if cls.myconf: + log.info("Found previous config load graph - closing it!") + cls.myconf.close() + cls.myconf = None + + config = conffile + while config: + try: + SdoConfig.myconf = apirdflib.graphFromFiles(config,prefix="scc",path="http://configfiles.schema.org/") + config = cls.loadData(configFile=config)#Returns new config file if a redirect + except Exception as e: + log.info("Configuration file (%s) read/load Exception %s: %s" % (config,e,e.message)) + pass + + if config: + log.info("Found previous config load graph - closing it!") + cls.myconf.close() #dump previous graphs to start next on clean. + + cls.configFile = config + + if len(cls.myconf) > 0: + cls.valid = True + log.info("SdoConfig.myconf valid:%s %s triple count: %s" % (cls.valid, cls.myconf, len(cls.myconf))) + + else: + cls.valid = False + log.info("No config detected!!!") + + @classmethod + def isValid(cls): + return cls.valid + + @classmethod + def getConfigFile(cls): + return cls.configFile + + @classmethod + def getname(cls): + return cls.name + + @classmethod + def prefix(cls): + return cls.pre + + @classmethod + def loadData(cls,configFile=None): + redirectq= """SELECT ?loc WHERE { + ?s a scc:ConfigurationRedirect; + scc:configurationLocation ?loc + }""" + + q = """SELECT ?name ?url ?voc ?pre ?attic ?include WHERE { + ?s a scc:DataFeed; + scc:siteurl ?url; + scc:vocaburl ?voc; + scc:prefix ?pre; + scc:name ?name. + OPTIONAL { + ?s scc:atticurl ?attic. + } + }""" + + if cls.nested > 5: + log.error("Too many nested redirects (%s) - aborting" % cls.nested + 1) + return False + newconfig = None + if not cls.loaded: + res = apirdflib.rdfQueryStore(redirectq,cls.myconf) + if len(res): + for row in res: + loc = row.loc + log.info("Found Redirect to config file: %s" % loc) + cls.nested += 1 + newconfig = str(loc) + + if not newconfig: + res = apirdflib.rdfQueryStore(q,cls.myconf) + if len(res) > 1: + log.error("More than one DataFeed in config file!!") + cls.valid = False + for row in res: + cls.name = row.name + cls.pre = row.pre + cls.url = str(row.url) + cls.voc = str(row.voc) + cls.attic = str(row.attic) + cls.loaded = True + break + if cls.loaded: + cls.loadIncludes(configFile) + + return newconfig + + @classmethod + def loadIncludes(cls,configFile): + if not configFile: + return + q = """SELECT DISTINCT ?inc WHERE { + ?s a scc:DataFeed; + scc:name "%s"; + scc:include ?inc. + } + """ % (cls.name) + nameq = """SELECT DISTINCT ?obj WHERE { + ?s a scc:DataFeed; + scc:name "%s"; + ?p ?obj. + } + """ % (cls.name) + res = apirdflib.rdfQueryStore(q,cls.myconf) + inc = "" + if len(res): + try: + for row in list(res): + inc = str(row.inc) + #Include files are placed in same location as main config + if os.path.basename(inc) != inc: + log.error("No path allowed in include file names! %s" % inc) + inc = None + elif os.path.basename(configFile) != configFile: + inc = os.path.dirname(configFile) + "/" + inc + + if inc: + log.info("Loading Include file: %s" % inc) + graph = apirdflib.graphFromFiles(inc,prefix="scc",path="http://configfiles.schema.org/") + objs = apirdflib.rdfQueryStore(nameq,graph) + if not len(objs): + log.error("No triples for DataFeed '%s' in include file: %s" % (cls.name,inc)) + else: + log.info("Include triple count: %s" % len(graph)) + cls.myconf += graph + except Exception as e: + log.info("Configuration include file (%s) read/load Exception %s: %s" % (inc,e,e.message)) + cls.valid = False + pass + + @classmethod + def templateDir(cls): + ret = None + temps = cls.files("templates") + if temps and len(temps): + ret = temps[0].get("location") + log.info("Templates dir: %s " % ret) + return ret + + @classmethod + def baseUri(cls): + return cls.url + + @classmethod + def siteUri(cls): + ret = cls.baseUri() + if ret.endswith("/"): + ret = ret[:len(ret)-1] + return ret + + @classmethod + def siteUriRoot(cls): + m = re.search("^(http[s]*:\/\/)(.*)",cls.siteUri) + root = None + if m: + prto = m.group(1) + root = m.group(2) + return root + + @classmethod + def vocabUri(cls): + return cls.voc + + @classmethod + def atticUri(cls): + return cls.attic + + + @classmethod + def termFiles(cls): + return cls.files(filetype="TERMS") + + @classmethod + def exampleFiles(cls): + return cls.files(filetype="EXAMPLES") + + @classmethod + def countsFiles(cls): + return cls.files(filetype="COUNTS") + + @classmethod + def docsFiles(cls): + return cls.files(filetype="DOCS") + + @classmethod + def loadVars(cls): + q = """SELECT ?var ?val WHERE { + ?s scc:dataFeedVar ?o. + ?o ?var ?val. + }""" + + cls.varslist = {} + res = apirdflib.rdfQueryStore(q,cls.myconf) + for row in res: + #log.info(">>> %s ==== %s <<<<<"% (row.var,row.val)) + cls.varslist[os.path.basename(row.var)] = row.val #the var value will come back as a URI + + @classmethod + def varsub(cls,s): + if not cls.varslist: + cls.loadVars() + return re.sub(VARSUBPATTERN, cls.varsubReplace, s) + + @classmethod + def varsubReplace(cls,match): + ret = "" + var = match.group(1) + val = cls.varslist.get(var,None) + if val: + ret = val + return ret + + @classmethod + def descriptor(cls,extension=None): + ret = cls.descs.get(extension,None) + if ret: + return ret + + ex="" + fil="" + if extension and len(extension): + ex='?v scc:extension "%s".' % extension + else: + fil="""FILTER ( strlen(?ext) < 1 || NOT EXISTS {?v scc:extension ?ext. } ) """ + + + q = """SELECT ?id ?name ?ver ?disam ?com ?ex WHERE { + ?s a scc:DataFeed; + scc:name "%s"; + scc:extensiondescription ?v. + %s + ?v scc:id ?id; + scc:name ?name; + scc:comment ?com. + OPTIONAL { + ?v scc:extension ?ext. + } OPTIONAL { + ?v scc:disambiguatingDescription ?disam. + } OPTIONAL { + ?v scc:softwareVersion ?ver. + } + %s + } + """ % (cls.name,ex,fil) + #log.info("%s" % q) + res = apirdflib.rdfQueryStore(q,cls.myconf) + #log.info("%s" % len(res)) + + ret = [] + for row in res: + r = { + "id": row.id, + "name": row.name, + "version": row.ver, + "brief": row.disam, + "comment": row.com, + "extension": row.ex + } + ret.append(r) + cls.descs[extension] = ret + return ret + + @classmethod + def stripLocalPathPrefix(cls,path): + ret = path + if ret and len(ret): + if ret.startswith('./'): + ret = ret[2:] + elif ret.startswith('.'): + ret = ret[1:] + return ret + + @classmethod + def files(cls,filetype=None): + filter = "" + if filetype: + filter ='FILTER regex(?type, "%s", "i")' % filetype + + q = """SELECT DISTINCT ?file ?dir ?ext ?type ?addpre ?addvoc WHERE { + ?s a scc:DataFeed; + scc:name "%s"; + scc:dataFeedElement ?d. + + ?d scc:fileContent ?type. + + OPTIONAL { + ?d scc:extension ?ext. + } OPTIONAL { + ?d scc:contentLocation ?dir. + }OPTIONAL { + ?d scc:contentFile ?file. + }OPTIONAL { + ?d scc:addPrefix ?addpre; + scc:addVocaburl ?addvoc. + } + %s + } + ORDER BY ?ext ?type ?file + """ % (cls.name,filter) + + res = apirdflib.rdfQueryStore(q,cls.myconf) + ret = [] + for row in res: + d = row.dir + if d: + d = cls.stripLocalPathPrefix(str(cls.varsub(d))) + loc = d + f = row.file + if f: + f = cls.stripLocalPathPrefix(str(cls.varsub(f))) + fpath = f + if d and f: + if not d.endswith('/'): + d += '/' + f = d + f + if f and "://" not in f: + if f.startswith('./'): + f = f[2:] + elif f.startswith('/'): + f = f[1:] + f = "file://%s" % full_path(f) + t = str(row.type) + t = t.upper() + e = row.ext + if e: + e = str(e) + else: + e = "" + + + p = row.addpre + if not p or not len(p): + p = None + v = row.addvoc + if not v or not len(v): + v = None + + + + r = { + "ext": e, + "type": t, + "file": f, + "location": loc, + "filePart": fpath, + "prefix": p, + "vocaburi": v + } + ret.append(r) + return ret + diff --git a/apimarkdown.py b/apimarkdown.py index ad0f94b836..042b9f87fc 100644 --- a/apimarkdown.py +++ b/apimarkdown.py @@ -6,6 +6,7 @@ from markdown2 import Markdown import re import threading +import sdoutil WIKILINKPATTERN = r'\[\[([\w0-9_ -]+)\]\]' class MarkdownTool(): @@ -18,13 +19,13 @@ def __init__ (self): self.wpost = "" self.parselock = threading.Lock() - def setPre(self,pre="/"): + def setPre(self,pre="./"): self.wpre = pre def setPost(self,post=""): self.wpost = post - def parse(self,source,preservePara=False): + def parse(self,source,preservePara=False,wpre=None): if not source or len(source) == 0: return "" source = source.strip() @@ -45,14 +46,18 @@ def parse(self,source,preservePara=False): if ret.endswith("

"): ret = ret[:len(ret)-10] - return self.parseWiklinks(ret) + return self.parseWiklinks(ret,wpre=wpre) - def parseWiklinks(self,source): + def parseWiklinks(self,source,wpre=None): + sdoutil.setAppVar("MKDOWNWPRE",wpre) return re.sub(WIKILINKPATTERN, self.wikilinkReplace, source) def wikilinkReplace(self,match): + wpre = sdoutil.getAppVar("MKDOWNWPRE") + if not wpre: + wpre = self.wpre t = match.group(1) - return '%s' % (self.wclass,self.wpre,t,self.wpost,t) + return '%s' % (self.wclass,wpre,t,self.wpost,t) Markdown = MarkdownTool() diff --git a/apirdflib.py b/apirdflib.py index d812e680e0..57b6def002 100755 --- a/apirdflib.py +++ b/apirdflib.py @@ -1,6 +1,8 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- +from __future__ import with_statement + import logging logging.basicConfig(level=logging.INFO) # dev_appserver.py --log_level debug . log = logging.getLogger(__name__) @@ -15,6 +17,7 @@ from rdflib.plugins.sparql import prepareQuery import threading from testharness import * +from sdoutil import * import api from apimarkdown import Markdown import StringIO @@ -23,8 +26,8 @@ rdflib.plugin.register("json-ld", Serializer, "rdflib_jsonld.serializer", "JsonLDSerializer") ATTIC = 'attic' -VOCAB = "http://schema.org" -VOCABLEN = len(VOCAB) +VOCAB = None +VOCABLEN = 0 ALTVOCAB = "https://schema.org" STORE = rdflib.Dataset() #Namespace mapping############# @@ -44,31 +47,33 @@ def queryGraph(): global QUERYGRAPH if not QUERYGRAPH: - try: - RDFLIBLOCK.acquire() + with RDFLIBLOCK: if not QUERYGRAPH: QUERYGRAPH = rdflib.Graph() gs = list(STORE.graphs()) for g in gs: id = str(g.identifier) - if not id.startswith("http://"):#skip some internal graphs + if not id.startswith("http://") and not id.startswith("https://"):#skip some internal graphs continue QUERYGRAPH += g QUERYGRAPH.bind('owl', 'http://www.w3.org/2002/07/owl#') QUERYGRAPH.bind('rdfa', 'http://www.w3.org/ns/rdfa#') - QUERYGRAPH.bind('dct', 'http://purl.org/dc/terms/') + QUERYGRAPH.bind('dc', 'http://purl.org/dc/terms/') QUERYGRAPH.bind('schema', 'http://schema.org/') - altSameAs(QUERYGRAPH) - finally: - RDFLIBLOCK.release() + pre = api.SdoConfig.prefix() + path = api.SdoConfig.vocabUri() + if pre and path: + QUERYGRAPH.bind(pre, path) return QUERYGRAPH def altSameAs(graph): - sameAs = URIRef("%s/sameAs" % VOCAB) - for sub in graph.subjects(None,None): - if sub.startswith(VOCAB): + vocab = api.SdoConfig.baseUri() + sameAs = URIRef("%s/sameAs" % vocab) + #for sub in graph.subjects(None,None): + #if sub.startswith(api.SdoConfig.baseUri()): #log.info("%s >>>> %s " % (sub,"%s%s" % (ALTVOCAB,sub[VOCABLEN:]))) - graph.add( (sub,sameAs,URIRef("%s%s" % (ALTVOCAB,sub[VOCABLEN:]))) ) + #graph.add( (sub,sameAs,URIRef("%s%s" % (ALTVOCAB,sub[VOCABLEN:]))) ) + def loadNss(): global NSSLoaded @@ -100,14 +105,18 @@ def getRevNss(val): return "" ############################## -def load_graph(context, files): +def load_graph(context, files,prefix=None,vocab=None): """Read/parse/ingest schemas from data/*.rdfa.""" import os.path import glob import re + if not isinstance(files,list): + files = [files] - log.debug("Loading %s graph." % context) + #log.info("Loading %s graph." % context) for f in files: + if f.startswith("file://"): + f = f[7:] if(f[-5:] == ".rdfa"): format = "rdfa" elif(f[-7:] == ".jsonld"): @@ -118,17 +127,23 @@ def load_graph(context, files): if(format == "rdfa"): uri = getNss(context) g = STORE.graph(URIRef(uri)) - g.parse(file=open(full_path(f),"r"),format=format) - STORE.bind(context,uri) + g.parse(f,format=format) + if len(context) and context != "core": + STORE.bind(context,uri) elif(format == "json-ld"): - STORE.parse(file=open(full_path(f),"r"),format=format, context=context_data) + STORE.parse(f,format=format, context=context_data) + + namespaceAdd(STORE,prefix=prefix,path=vocab) + namespaceAdd(STORE,prefix=api.SdoConfig.prefix(),path=api.SdoConfig.vocabUri()) + + nss = STORE.namespaces() QUERYGRAPH = None #In case we have loaded graphs since the last time QUERYGRAPH was set def rdfQueryStore(q,graph): - res = [] - try: - RDFLIBLOCK.acquire() + res = [] + + with RDFLIBLOCK: retrys = 0 #Under very heavy loads rdflib has been know to throw exceptions - hense the retry loop while True: @@ -143,92 +158,98 @@ def rdfQueryStore(q,graph): else: log.error("Retrying again after %s retrys" % retrys) retrys += 1 - finally: - RDFLIBLOCK.release() - return res + return res def rdfGetTriples(id): - """All triples with node as subject.""" - targets = [] - fullId = id - -# log.info("rdfgetTriples(%s)" % fullId) - if ':' in id: #Includes full path or namespaces - fullId = id - else: - fullId = VOCAB + "/" + id - #log.info("rdfgetTriples(%s)" % source) - - first = True - unit = None - - homeSetTo = None - typeOfInLayers = [] - - q = "SELECT ?g ?p ?o WHERE {GRAPH ?g {<%s> ?p ?o }}" % fullId - - res = rdfQueryStore(q,STORE) + """All triples with node as subject.""" + targets = [] + fullId = id - for row in res: -# if source == "http://meta.schema.org/": -# log.info("Triple: %s %s %s %s" % (source, row.p, row.o, row.g)) - layer = str(getRevNss(str(row.g))) - if first: - first = False - unit = api.Unit.GetUnitNoLoad(id,True) - s = stripID(fullId) - p = stripID(row.p) - if p == "rdf:type": - typeOfInLayers.append(layer) - elif(p == "isPartOf"): - if(unit.home != None and unit.home != layer): - log.info("WARNING Cannot set %s home to %s - already set to: %s" % (s,layer,unit.home)) - unit.home = layer - homeSetTo = layer - elif(p == "category"): - unit.category = row.o - - prop = api.Unit.GetUnit(p,True) - - if isinstance(row.o,rdflib.Literal): - api.Triple.AddTripleText(unit, prop, row.o, layer) - else: - api.Triple.AddTriple(unit, prop, api.Unit.GetUnit(stripID(row.o),True), layer) - - """ Default Unit.home to core if not specificly set with an 'isPartOf' triple """ - if(unit and homeSetTo == None): - if('core' in typeOfInLayers or len(typeOfInLayers) == 0): - unit.home = 'core' - else: - log.info("WARNING: %s defined in extensions %s but has no 'isPartOf' triple - cannot default home to core!" % (id,typeOfInLayers)) - return unit + log.info("rdfgetTriples(%s)" % fullId) + if ':' in id: #Includes full path or namespaces + fullId = id + else: + #fullId = api.SdoConfig.baseUri() + "/" + id + fullId = api.SdoConfig.baseUri() + id + log.info("rdfgetTriples(%s)" % fullId) -def rdfGetSourceTriples(target): - """All source nodes for a specified arc pointing to a specified node (within any of the specified layers).""" - id = target.id - target.sourced = True - sources = [] - fullId = id - if ':' in id: #Includes full path or namespaces - fullId = id - else: - fullId = VOCAB + "/" + id - - targ = fullId - if fullId.startswith('http://'): - targ = "<%s>" % fullId - - q = "SELECT ?g ?s ?p WHERE {GRAPH ?g {?s ?p %s }}" % targ - - res = rdfQueryStore(q,STORE) + first = True + unit = None - for row in res: - layer = str(getRevNss(str(row.g))) - unit = api.Unit.GetUnit(stripID(row.s)) - p = stripID(row.p) - prop = api.Unit.GetUnit(p,True) - obj = api.Unit.GetUnit(stripID(fullId),True) - api.Triple.AddTriple(unit, prop, obj, layer) + homeSetTo = None + typeOfInLayers = [] + + q = "SELECT ?g ?p ?o WHERE {GRAPH ?g {<%s> ?p ?o }}" % fullId + + log.info("%s" % q) + + res = rdfQueryStore(q,STORE) + + log.info("rdfgetTriples RES: %s: %s" % (len(res), res)) + for row in res: + # if source == "http://meta.schema.org/": + # log.info("Triple: %s %s %s %s" % (source, row.p, row.o, row.g)) + layer = str(getRevNss(str(row.g))) + if first: + first = False + unit = api.Unit.GetUnitNoLoad(id,True) + s = stripID(fullId) + p = stripID(row.p) + if p == "rdf:type": + typeOfInLayers.append(layer) + elif(p == "isPartOf"): + if(unit.home != None and unit.home != layer): + log.info("WARNING Cannot set %s home to %s - already set to: %s" % (s,layer,unit.home)) + unit.home = layer + homeSetTo = layer + elif(p == "category"): + unit.category = row.o + + prop = api.Unit.GetUnit(p,True) + + if isinstance(row.o,rdflib.Literal): + api.Triple.AddTripleText(unit, prop, row.o, layer) + else: + api.Triple.AddTriple(unit, prop, api.Unit.GetUnit(stripID(row.o),True), layer) + + """ Default Unit.home to core if not specificly set with an 'isPartOf' triple """ + if(unit and homeSetTo == None): + if('core' in typeOfInLayers or len(typeOfInLayers) == 0): + unit.home = 'core' + else: + log.info("WARNING: %s defined in extensions %s but has no 'isPartOf' triple - cannot default home to core!" % (id,typeOfInLayers)) + return unit + +def rdfGetSourceTriples(target): + """All source nodes for a specified arc pointing to a specified node (within any of the specified layers).""" + id = target.id + target.sourced = True + sources = [] + log.info("rdfGetSourceTriples(%s)" % id) + if ':' in id: #Includes full path or namespaces + fullId = id + else: + #fullId = api.SdoConfig.baseUri() + "/" + id + fullId = api.SdoConfig.baseUri() + id + targ = fullId + if fullId.startswith('http://') or fullId.startswith('https://'): + targ = "<%s>" % fullId + log.info("rdfGetSourceTriples(%s)" % targ) + + q = "SELECT ?g ?s ?p WHERE {GRAPH ?g {?s ?p %s }}" % targ + log.info("%s" % q) + + res = rdfQueryStore(q,STORE) + log.info("rdfGetSourceTriples: res: %s %s" % (len(res),res)) + + for row in res: + log.info("SUB: %s PRED: %s OBJ: %s" % (stripID(row.s),stripID(row.p),stripID(fullId))) + layer = str(getRevNss(str(row.g))) + unit = api.Unit.GetUnit(stripID(row.s),True) + p = stripID(row.p) + prop = api.Unit.GetUnit(p,True) + obj = api.Unit.GetUnit(stripID(fullId),True) + api.Triple.AddTriple(unit, prop, obj, layer) def countFilter(extension="ALL",includeAttic=False): excludeAttic = "FILTER NOT EXISTS {?term schema:isPartOf }." @@ -246,19 +267,60 @@ def countFilter(extension="ALL",includeAttic=False): return extensionSel + "\n" + excludeAttic +TOPSTERMS = None +def rdfgettops(): + global TOPSTERMS + #Terms that are Classes AND have no superclass OR have a superclass from another vocab + #plus Terms that of another type (not rdfs:Class or rdf:Property) and that type is from another vocab + #Note: In schema.org this will also return DataTypes + if TOPSTERMS: + return TOPSTERMS + + TOPSTERMS = [] + query= '''select ?term where { + { + ?term a rdfs:Class; + rdfs:subClassOf ?super. + FILTER (!strstarts(str(?super),"%s")) + } UNION { + ?term a rdfs:Class. + FILTER NOT EXISTS { ?term rdfs:subClassOf ?p } + } UNION { + ?term a ?type. + FILTER NOT EXISTS { ?term a rdfs:Class } + FILTER NOT EXISTS { ?term a rdf:Property } + FILTER (!strstarts(str(?type),"%s")) + } + FILTER (strstarts(str(?term),"%s")) + } + ORDER BY ?term + ''' % (api.SdoConfig.vocabUri(),api.SdoConfig.vocabUri(),api.SdoConfig.vocabUri()) + + #log.info("%s"%query) + res = rdfQueryStore(query,queryGraph()) + #log.info("[%s]"%len(res)) + for row in res: + TOPSTERMS.append(str(row.term)) + return TOPSTERMS + def countTypes(extension="ALL",includeAttic=False): - filter = countFilter(extension=extension, includeAttic=includeAttic) - query= ('''select (count (?term) as ?cnt) where { - ?term a rdfs:Class. - ?term rdfs:subClassOf* schema:Thing. - %s - }''') % filter - graph = queryGraph() - count = 0 - res = rdfQueryStore(query,graph) - for row in res: - count = row.cnt - return count + log.info("countTypes()") + filter = countFilter(extension=extension, includeAttic=includeAttic) + log.info("countTypes 1") + query= ('''select (count (?term) as ?cnt) where { + ?term a rdfs:Class. + ?term rdfs:subClassOf* schema:Thing. + %s + }''') % filter + log.info("countTypes 2") + graph = queryGraph() + log.info("countTypes 3") + count = 0 + log.info ("QUERY %s" % query) + res = rdfQueryStore(query,graph) + for row in res: + count = row.cnt + return count def countProperties(extension="ALL",includeAttic=False): filter = countFilter(extension=extension, includeAttic=includeAttic) @@ -290,6 +352,22 @@ def countEnums(extension="ALL",includeAttic=False): count = row.cnt return count +def getPathForPrefix(pre): + ns = STORE.namespaces() + for n in ns: + pref, path = n + if str(pre) == str(pref): + return path + return None + +def getPrefixForPath(pth): + ns = STORE.namespaces() + for n in ns: + pref, path = n + if str(path) == str(pth): + return pref + return None + def serializeSingleTermGrapth(node,format="json-ld",excludeAttic=True,markdown=True): graph = buildSingleTermGraph(node=node,excludeAttic=excludeAttic,markdown=markdown) file = StringIO.StringIO() @@ -301,82 +379,103 @@ def serializeSingleTermGrapth(node,format="json-ld",excludeAttic=True,markdown=T def buildSingleTermGraph(node,excludeAttic=True,markdown=True): + q = queryGraph() g = rdflib.Graph() - g.bind('owl', 'http://www.w3.org/2002/07/owl#') - g.bind('rdfa', 'http://www.w3.org/ns/rdfa#') - g.bind('dct', 'http://purl.org/dc/terms/') - g.bind('schema', 'http://schema.org/') + ns = q.namespaces() + for n in ns: + prefix, path = n + namespaceAdd(g,prefix=prefix,path=path) + namespaceAdd(g,api.SdoConfig.prefix(),api.SdoConfig.vocabUri()) - full = "http://schema.org/" + node - #n = URIRef(full) - n = SCHEMA.term(node) - n = n + full = "%s%s" % (api.SdoConfig.vocabUri(), node) + n = URIRef(full) full = str(n) - q = queryGraph() ret = None - #log.info("NAME %s %s"% (n,full)) + log.info("NAME %s %s"% (n,full)) atts = None - try: - RDFLIBLOCK.acquire() - atts = list(q.triples((n,SCHEMA.isPartOf,URIRef("http://attic.schema.org")))) - finally: - RDFLIBLOCK.release() + attic = api.SdoConfig.atticUri() + if attic: + with RDFLIBLOCK: + atts = list(q.triples((n,SCHEMA.isPartOf,URIRef(attic)))) if len(atts): #log.info("ATTIC TERM %s" % n) excludeAttic = False + #Outgoing triples - try: - RDFLIBLOCK.acquire() + with RDFLIBLOCK: ret = list(q.triples((n,None,None))) - finally: - RDFLIBLOCK.release() + for (s,p,o) in ret: #log.info("adding %s %s %s" % (s,p,o)) g.add((s,p,o)) #Incoming triples - ret = list(q.triples((None,None,n))) + with RDFLIBLOCK: + ret = list(q.triples((None,None,n))) for (s,p,o) in ret: #log.info("adding %s %s %s" % (s,p,o)) g.add((s,p,o)) #super classes - query='''select * where { - ?term (^rdfs:subClassOf*) <%s>. - ?term rdfs:subClassOf ?super. - ?super ?pred ?obj. - }''' % n - - ret = rdfQueryStore(query,q) + query='''select * where { + ?term (^rdfs:subClassOf*) <%s>. + ?term rdfs:subClassOf ?super. + OPTIONAL { + ?super ?pred ?obj. + FILTER (strstarts(str(?super),'%s')) + } + } + ''' % (n,api.SdoConfig.vocabUri()) + log.info("Query: %s" % query) + + ret = rdfQueryStore(query,q) for row in ret: #log.info("adding %s %s %s" % (row.term,RDFS.subClassOf,row.super)) g.add((row.term,RDFS.subClassOf,row.super)) - g.add((row.super,row.pred,row.obj)) + pred = row.pred + obj = row.obj + if pred and obj: + g.add((row.super,row.pred,row.obj)) #poperties with superclasses in domain query='''select * where{ ?term (^rdfs:subClassOf*) <%s>. ?prop ?term. - ?prop ?pred ?obj. + OPTIONAL { + ?prop ?pred ?obj. + FILTER (strstarts(str(?prop),'%s')) + } } - ''' % n - ret = rdfQueryStore(query,q) + ''' % (n,api.SdoConfig.vocabUri()) + log.info("Query: %s" % query) + ret = rdfQueryStore(query,q) for row in ret: g.add((row.prop,SCHEMA.domainIncludes,row.term)) - g.add((row.prop,row.pred,row.obj)) + pred = row.pred + obj = row.obj + if pred and obj: + g.add((row.prop,row.pred,row.obj)) #super properties query='''select * where { ?term (^rdfs:subPropertyOf*) <%s>. ?term rdfs:subPropertyOf ?super. - ?super ?pred ?obj. - }''' % n - ret = rdfQueryStore(query,q) + OPTIONAL { + ?super ?pred ?obj. + FILTER (strstarts(str(?super),'%s')) + } + } + ''' % (n,api.SdoConfig.vocabUri()) + log.info("Query: %s" % query) + ret = rdfQueryStore(query,q) for row in ret: #log.info("adding %s %s %s" % (row.term,RDFS.subPropertyOf,row.super)) g.add((row.term,RDFS.subPropertyOf,row.super)) - g.add((row.super,row.pred,row.obj)) + pred = row.pred + obj = row.obj + if pred and obj: + g.add((row.super,row.pred,row.obj)) #Enumeration for an enumeration value query='''select * where { @@ -391,34 +490,36 @@ def buildSingleTermGraph(node,excludeAttic=True,markdown=True): if excludeAttic: #Remove triples referencing terms part of http://attic.schema.org trips = list(g.triples((None,None,None))) - try: - RDFLIBLOCK.acquire() + with RDFLIBLOCK: for (s,p,o) in trips: - atts = list(q.triples((s,SCHEMA.isPartOf,URIRef("http://attic.schema.org")))) + atts = list(q.triples((s,SCHEMA.isPartOf,URIRef(attic)))) if isinstance(o, URIRef): - atts.extend(q.triples((o,SCHEMA.isPartOf,URIRef("http://attic.schema.org")))) + atts.extend(q.triples((o,SCHEMA.isPartOf,URIRef(attic)))) for (rs,rp,ro) in atts: #log.info("Removing %s" % rs) g.remove((rs,None,None)) g.remove((None,None,rs)) - finally: - RDFLIBLOCK.release() if markdown: - try: - RDFLIBLOCK.acquire() + with RDFLIBLOCK: trips = list(g.triples((None,RDFS.comment,None))) - Markdown.setPre("http://schema.org/") + Markdown.setPre(api.SdoConfig.vocabUri()) for (s,p,com) in trips: mcom = Markdown.parse(com) g.remove((s,p,com)) g.add((s,p,Literal(mcom))) - finally: - RDFLIBLOCK.release() - Markdown.setPre() + Markdown.setPre() return g def stripID (str): l = len(str) + vocab = api.SdoConfig.vocabUri() + if vocab != 'http://schema.org/' and vocab != 'https://schema.org/': + if l > len(vocab) and str.startswith(vocab): + return str[len(vocab):] + else: + if (l > 17 and (str[:18] == 'http://schema.org/')): + return "schema:" + str[18:] + if (l > 17 and (str[:18] == 'http://schema.org/')): return str[18:] elif (l > 24 and (str[:25] == 'http://purl.org/dc/terms/')): @@ -431,10 +532,57 @@ def stripID (str): return "owl:" + str[30:] else: return str - - -def full_path(filename): - """convert local file name to full path.""" - import os.path - folder = os.path.dirname(os.path.realpath(__file__)) - return os.path.join(folder, filename) + +def graphFromFiles(files,prefix=None,path=None): + if not isinstance(files,list): + files = [files] + g = rdflib.Graph() + ns = namespaceAdd(g,prefix=prefix,path=path) + for f in files: + if f.startswith("file://"): + f = f[7:] + + if not "://" in f: + f = full_path(f) + + log.info("Trying %s" % f) + try: + g.parse(f,format='json-ld') + msg = "" + if ns: + msg = "with added namespace(%s: \"%s\")" % ns + log.info("graphFromFiles loaded : %s %s" % (f,msg)) + except Exception as e: + log.error("graphFromFiles exception %s: %s" % (e,e.message)) + pass + return g + +NSLIST = {} +def getNamespaces(g=None): + + if g == None: + g = queryGraph() + + ns = NSLIST.get(g,None) + if not ns: + ns = list(g.namespaces()) + NSLIST[g] = ns + return ns + +def namespaceAdd(g,prefix=None,path=None): + if prefix and path: + with RDFLIBLOCK: + ns = getNamespaces(g) + for n in ns: + pref, pth = n + + if str(prefix) == str(pref): #Already bound + return n + ns1 = rdflib.Namespace(path) + g.bind(prefix,ns1) + return prefix, path + return None + + + + \ No newline at end of file diff --git a/apirdfterm.py b/apirdfterm.py new file mode 100755 index 0000000000..36fbefc6e8 --- /dev/null +++ b/apirdfterm.py @@ -0,0 +1,684 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +from __future__ import with_statement + +import logging +logging.basicConfig(level=logging.INFO) # dev_appserver.py --log_level debug . +log = logging.getLogger(__name__) + +from testharness import * + +import threading +import re +import api +import apirdflib +import rdflib +from rdflib import URIRef +from sdoutil import * + +#from apirdflib import rdfGetTargets, rdfGetSources +from apimarkdown import Markdown + +CORELAYER = "core" +VTERMS={} +TERMSLOCK = threading.Lock() +from apirdflib import RDFLIBLOCK + +DATATYPEURI = URIRef("http://schema.org/DataType") +ENUMERATIONURI = URIRef("http://schema.org/Enumeration") +class VTerm(): + + CLASS = "Class" + PROPERTY = "Property" + DATATYPE = "Datatype" + ENUMERATION = "Enumeration" + ENUMERATIONVALUE = "Enumerationvalue" + REFERENCE = "Reference" + + + def __init__(self,uri,ttype=None,label=None,layer=None,cat=None): + #log.info('%s %s "%s" %s %s' % (uri,ttype,label, layer, cat)) + uri = str(uri) + self.uri = uri + self.id = uri + if uri.startswith(api.SdoConfig.vocabUri()): + self.id = uri[len(api.SdoConfig.vocabUri()):] + self.label = label + self.layer = CORELAYER + if layer: + self.layer = layer + self.category = cat + if not cat: + self.category = "" + self.parent = None + self.checkedDataTypeParents = False + self.supersededBy = None + self.supersedes = None + self.supers = None + self.termStack = None + self.subs = None + self.props = None + self.propUsedOn = None + self.ranges = None + self.domains = None + self.targetOf = None + self.equivalents = None + self.inverseOf = None + self.comments = None + self.comment = None + self.srcaks = None + self.sources = None + self.aks = None + self.examples = None + + VTERMS[self.uri] = self + + if ttype == rdflib.RDFS.Class: + self.ttype = VTerm.CLASS + elif ttype == rdflib.RDF.Property: + self.ttype = VTerm.PROPERTY + elif ttype == ENUMERATIONURI: + self.ttype = VTerm.ENUMERATION + elif ttype == DATATYPEURI: + self.ttype = VTerm.DATATYPE + elif not ttype: + self.ttype = VTerm.REFERENCE + self.label = id + else: + #log.info("checking parent %s" % ttype) + self.parent = str(ttype) + p = VTerm._getTerm(self.parent) + if p.isEnumeration(): + self.ttype = VTerm.ENUMERATIONVALUE + else: + self.ttype = p.getType() + + #log.info("VTerm %s %s" %(self.ttype,self.id)) + + def __str__(self): + return ("<%s: '%s'>") % (self.ttype,self.id) + def getType(self): + return self.ttype + def isClass(self): + return self.ttype == VTerm.CLASS + def isProperty(self): + return self.ttype == VTerm.PROPERTY + def isDataType(self): + if self.ttype == VTerm.DATATYPE: + return True + if self.isClass() and not self.checkedDataTypeParents: + self.checkedDataTypeParents = True + for super in self.getSupers(): + if super.isDataType(): + self.ttype = VTerm.DATATYPE + return True + return False + + def isEnumeration(self): + return self.ttype == VTerm.ENUMERATION + def isEnumerationValue(self): + return self.ttype == VTerm.ENUMERATIONVALUE + def isReference(self): + return self.ttype == VTerm.REFERENCE + def getId(self): + return self.id + def getPrefixedId(self): + return prefixedIdFromUri(self.uri) + def getUri(self): + return self.uri + def getLabel(self): + return self.label + def getComments(self): + if not self.comments: + self.comments = [] + comms = self.loadObjects(rdflib.RDFS.comment) + for c in comms: + self.comments.append(c) + return self.comments + def getComment(self): + if not self.comment: + self.loadComment() + return self.comment + def getSupersededBy(self): + if not self.supersededBy: + tmp = [] + ss = self.loadObjects("schema:supersededBy") + for s in ss: + t = VTerm._getTerm(str(s)) + if t: + tmp.append(t) + + if len(tmp) > 1: + log.debug("Warning '%s' supersededBy more than 1 term ()%s" % (self.id,len(tmp))) + if len(tmp): + self.supersededBy = tmp[0] + return self.supersededBy + def superseded(self): + return self.getSupersededBy() != None + def getSupersedes(self): + if not self.supersedes: + self.supersedes = [] + subs = self.loadSubjects("schema:supersededBy") + for sub in subs: + term = VTerm._getTerm(sub,createReference=True) + sortedAddUnique(self.supersedes,term) + return self.supersedes + def getSourcesAndAcks(self): + if not self.srcaks: + self.srcaks = [] + objs = self.loadObjects("dc:source") + for obj in objs: + term = VTerm._getTerm(obj,createReference=True) + sortedAddUnique(self.srcaks,term) + + self.sources = [] + self.aks = [] + #An aknowledgement is a 'source' with a comment + #A source is a source without a comment + if len(self.srcaks): + for ao in self.srcaks: + acks = ao.getComments() + if len(acks): + for ack in acks: + self.aks.append(ack) + else: + self.sources.append(ao.getUri()) + + return self.srcaks + def getSources(self): + if not self.sources: + self.getSourcesAndAcks() + return self.sources + def getAcknowledgements(self): + if not self.aks: + self.getSourcesAndAcks() + return self.aks + def getCategory(self): + return self.category + def getLayer(self): + return self.layer + def getInverseOf(self): + if not self.inverseOf: + self.inverseOf = VTerm._getTerm(self.loadValue("schema:inverseOf")) + return self.inverseOf + def getSupers(self): + if not self.supers: + self.loadsupers() + return self.supers + def getTermStack(self): + if not self.termStack: + self.termStack = [self] + for s in self.getSupers(): + self.termStack.extend(s.getTermStack()) + return self.termStack + def getSubs(self): + if not self.subs: + self.loadsubs() + return self.subs + def getProperties(self): + if not self.props: + self.props = [] + subs = self.loadSubjects("schema:domainIncludes") + for sub in subs: + term = VTerm._getTerm(sub,createReference=True) + sortedAddUnique(self.props,term) + return self.props + def getPropUsedOn(self): + raise Exception("Not implemented yet") + return self.propUsedOn + def getRanges(self): + if not self.ranges: + self.ranges = [] + objs = self.loadObjects("schema:rangeIncludes") + for obj in objs: + term = VTerm._getTerm(obj,createReference=True) + sortedAddUnique(self.ranges,term) + return self.ranges + def getDomains(self): + if not self.domains: + self.domains = [] + objs = self.loadObjects("schema:domainIncludes") + for obj in objs: + term = VTerm._getTerm(obj,createReference=True) + sortedAddUnique(self.domains,term) + return self.domains + def getTargetOf(self): + if not self.targetOf: + self.targetOf = [] + subs = self.loadSubjects("schema:rangeIncludes") + for sub in subs: + term = VTerm._getTerm(sub,createReference=True) + sortedAddUnique(self.targetOf,term) + return self.targetOf + def getEquivalents(self): + if not self.equivalents: + self.equivalents = self.loadObjects("owl:equivalentClass") + self.equivalents.extend(self.loadObjects("owl:equivalentProperty")) + log.info("equivalents: %s" % self.equivalents) + return self.equivalents + def inLayers(self,layers): + return self.layer in layers + + + def loadComment(self): + comments = self.getComments() + wpre = None + name = self.getId() + if name.startswith("http"): #Wikilinks in markdown default to current site - extermals need overriding + val = os.path.basename(name) + wpre = name[:len(name) - len(val)] + + first = True + buf = sdoStringIO() + for com in comments: + if not first: + buf.write(" ") + else: + first = False + buf.write(Markdown.parse(com,wpre=wpre)) + ret = buf.getvalue() + if not len(ret): + ret = "-" + self.comment = ret + + + def loadValue(self,valType): + ret = self.loadObjects(valType) + if not ret or len(ret) == 0: + return None + return ret[0] + + def loadObjects(self,pred): + query = """ + SELECT ?val WHERE { + %s %s ?val. + }""" % (uriWrap(toFullId(self.id)),uriWrap(pred)) + ret = [] + #log.info("query %s" % query) + res = VTerm.query(query) + #log.info("res %d" % len(res)) + for row in res: + #log.info(">%s<" % row.val) + ret.append(row.val) + return ret + + def loadSubjects(self,pred): + query = """ + SELECT ?sub WHERE { + ?sub %s %s. + }""" % (uriWrap(pred),uriWrap(toFullId(self.id))) + ret = [] + #log.info("query %s" % query) + res = VTerm.query(query) + #log.info("res %d" % len(res)) + for row in res: + #log.info(">%s<" % row.sub) + ret.append(row.sub) + return ret + + def loadsupers(self): + fullId = toFullId(self.id) + #log.info("checksupers(%s)" % self.id) + if self.ttype == VTerm.CLASS or VTerm.DATATYPE: + sel = "rdfs:subClassOf" + else: + sel = "rdfs:subPropertyOf" + query = """ + SELECT ?sup WHERE { + { + %s rdfs:subClassOf ?sup . + }UNION{ + %s rdfs:subPropertyOf ?sup . + } + }""" % (uriWrap(fullId),uriWrap(fullId)) + + #log.info("query %s" % query) + res = VTerm.query(query) + #log.info("res %d" % len(res)) + self.supers = [] + for row in res: + super = VTerm._getTerm(row.sup,createReference=True) + if not super: + log.debug("Failed to get term for %s" % row.sup) + continue + sortedAddUnique(self.supers,super) + if self.isEnumerationValue(): + sortedAddUnique(self.supers,VTerm._getTerm(self.parent)) + + + def loadsubs(self): + fullId = toFullId(self.id) + #log.info("checksupers(%s)" % self.id) + if self.ttype == VTerm.CLASS or VTerm.DATATYPE: + sel = "rdfs:subClassOf" + else: + sel = "rdfs:subPropertyOf" + query = """ + SELECT ?sub WHERE { + ?sub %s %s. + }""" % (uriWrap(sel),uriWrap(fullId)) + + #log.info("query %s" % query) + res = VTerm.query(query) + #log.info("res %d" % len(res)) + self.subs = [] + for row in res: + sub = VTerm._getTerm(row.sub) + if not sub: + log.debug("Failed to get term for %s" % row.sub) + continue + sortedAddUnique(self.subs,sub) + + if self.ttype == VTerm.ENUMERATION: + subjects = self.loadSubjects("a") #Enumerationvalues have an Enumeration as a type + for child in subjects: + sub = VTerm._getTerm(str(child)) + sortedAddUnique(self.subs,sub) + + + def checkEnumerations(self): + for t in self.supers: #Is parent a schema:Enumeration + if t.id == "http://schema.org/Enumeration": + self.ttype = VTerm.ENUMERATION + return + + if VTerm.checkForEnumVal(self): + self.ttype = VTerm.ENUMERATIONVALUE + + + @staticmethod + def checkForEnumVal(term): + if term.ttype == VTerm.ENUMERATION: + return True + + for t in term.supers: + if VTerm.checkForEnumVal(t): + return True + return False + + + @staticmethod + def getTerm(termId,refresh=False,createReference=False): + log.info("getTerm(%s,%s,%s)" % (termId,refresh,createReference)) + with TERMSLOCK: + return VTerm._getTerm(termId,refresh=refresh,createReference=createReference) + + @staticmethod + def _getTerm(termId,refresh=False,createReference=False): + + if not termId: + return None + termId = str(termId) + fullId = toFullId(termId) + term = VTERMS.get(fullId,None) + #if term: + #log.info("GOT %s" % fullId) + + if term and refresh: + del VTERMS[termId] + log.info("Term '%s' found and removed" % termId) + term = None + + query = """ + SELECT ?term ?type ?label ?layer ?sup ?cat WHERE { + %s a ?type; + rdfs:label ?label. + OPTIONAL { + %s schema:isPartOf ?layer. + } + OPTIONAL { + %s rdfs:subClassOf ?sup. + } + OPTIONAL { + %s rdfs:subPropertyOf ?sup. + } + OPTIONAL { + %s schema:category ?cat. + } + + }""" % (uriWrap(fullId),uriWrap(fullId),uriWrap(fullId),uriWrap(fullId),uriWrap(fullId)) + + if not term: + #log.info("query %s" % query) + res = VTerm.query(query) + if len(res): + term = VTerm.termsFromResults(res,termId=fullId) + elif createReference: + term = VTerm(fullId) + else: + log.debug("No definition of term %s" % fullId) + return term + + @staticmethod + def termsFromResults(res,termId=None): + ret = [] + single = False + if termId: + single = True + tmp = VTerm.TmpTerm(termId) + count = 0 + for row in res: #Assumes termdefinition rows are ordered by termId + if not single: + termId = str(row.term) + if tmp.id != termId: #New term definition starts on this row + if tmp.id: + term = VTerm.createTerm(tmp) + if term: + ret.append(term) + count += 1 + tmp = VTerm.TmpTerm(termId) + tmp.types.append(row.type) + tmp.sups.append(row.sup) + tmp.tt = row.type + tmp.lab = row.label + tmp.cat = row.cat + tmp.layer = layerFromUri(row.layer) + + term = VTerm.createTerm(tmp) + if term: + ret.append(term) + count += 1 + + if single: + return ret[0] + else: + return ret + + @staticmethod + def createTerm(tmp): + if not tmp or not tmp.id: + return None + + if DATATYPEURI in tmp.types: + tmp.tt = DATATYPEURI + elif ENUMERATIONURI in tmp.sups: + tmp.tt = ENUMERATIONURI + + term = VTERMS.get(tmp.id,None) + if not term: #Already created this term ? + term = VTerm(tmp.id,ttype=tmp.tt,label=tmp.lab,layer=tmp.layer,cat=tmp.cat) + return term + + class TmpTerm: + def __init__(self, id): + self.id = id + self.types = [] + self.sups = [] + self.lab = None + self.layer = None + self.cat = None + self.tt = "" + + + @staticmethod + def getAllTypes(layer=None): + return VTerm.getAllTerms(ttype = VTerm.CLASS,layer=layer) + + @staticmethod + def getAllProperties(layer=None): + return VTerm.getAllTerms(ttype = VTerm.PROPERTY,layer=layer) + + @staticmethod + def getAllTerms(ttype=None,layer=None,supressSourceLinks=False): + typsel = "" + if ttype == VTerm.CLASS: + typsel = "a <%s>;" % rdflib.RDFS.Class + elif ttype == VTerm.PROPERTY: + typsel = "a <%s>;" % rdflib.RDF.Property + elif ttype == VTerm.DATATYPE: + typsel = "a <%s>;" % DATATYPEURI + elif ttype == VTerm.ENUMERATION: + typsel = "a <%s>;" % ENUMERATIONURI + #elif ttype == VTerm.ENUMERATIONVALUE: + #typsel = "?type <%s>;" % ENUMERATIONURI + elif not ttype: + typesel = "" + else: + log.debug("Invalid type value '%s'" % ttype) + + laysel = "" + fil = "" + supress = "" + if layer: + if layer == "core": + fil = "FILTER NOT EXISTS { ?term schema:isPartOf ?x. }" + else: + laysel = "schema:isPartOf <%s>;" % uriFromLayer(layer) + + if supressSourceLinks: + supress = "FILTER NOT EXISTS { ?s dc:source ?term. }" + + + query = """SELECT ?term ?type ?label ?layer ?sup ?cat WHERE { + ?term a ?type; + %s + %s + rdfs:label ?label. + OPTIONAL { + ?term schema:isPartOf ?layer. + } + OPTIONAL { + ?term rdfs:subClassOf ?sup. + } + OPTIONAL { + ?term rdfs:subPropertyOf ?sup. + } + OPTIONAL { + ?term schema:category ?cat. + } + %s + %s + } + ORDER BY ?term + """ % (typsel,laysel,fil,supress) + + #log.info("query %s" % query) + res = VTerm.query(query) + #log.info("res %d" % len(res)) + terms = VTerm.termsFromResults(res,termId=None) + log.info("count %s VTERMS %s" % (len(terms),len(VTERMS))) + return terms + + @staticmethod + def query(q): + graph = apirdflib.queryGraph() + with RDFLIBLOCK: + ret = list(graph.query(q)) + return ret + + + +############################################# +def toFullId(termId): + + if not ':' in termId: #Includes full path or namespaces + fullId = api.SdoConfig.vocabUri() + termId + elif termId.startswith("http"): + fullId = termId + else: + sp = termId.split(':') + pre = sp[0] + id = sp[1] + fullId = "%s%s" % (uriForPrefix(pre),id) + return fullId + +def uriWrap(id): + if id.startswith('http://') or id.startswith('https://'): + id = "<%s>" % id + return id + +def sortedAddUnique(lst,term): + if term not in lst: + lst.append(term) + lst.sort(key=lambda u: u.getId(),reverse=False) + +LAYERPATTERN = None +def layerFromUri(uri): + global LAYERPATTERN + if uri: + if not LAYERPATTERN: + voc = api.SdoConfig.vocabUri() + if voc.endswith("/") or voc.endswith('#'): + voc = voc[:len(voc) - 1] + prto,root = getProtoAndRoot(voc) + LAYERPATTERN = "^%s([\w]*)\.%s" % (prto,root) + + if LAYERPATTERN: + m = re.search(LAYERPATTERN,str(uri)) + if m: + return m.group(1) + return None + +def uriFromLayer(layer=None): + voc = api.SdoConfig.vocabUri() + if voc.endswith("/") or voc.endswith('#'): + voc = voc[:len(voc) - 1] + if not layer: + return voc + prto,root = getProtoAndRoot(voc) + return "%s%s.%s" % (prto,layer,root) + +def getProtoAndRoot(uri): + m = re.search("^(http[s]*:\/\/)(.*)",uri) + if m: + prto = m.group(1) + root = m.group(2) + return prto,root + return None,None + + + +def prefixFromUri(uri): + uri = str(uri) + ns = apirdflib.getNamespaces() + for n in ns: + pref, pth = n + if uri.startswith(str(pth)): + return pref + log.debug("Requested unknown namespace uri %s" % uri) + return None + +def uriForPrefix(pre): + pre = str(pre) + ns = apirdflib.getNamespaces() + for n in ns: + pref, pth = n + if pre == pref: + return pth + log.debug("Requested unknown prefix %s:" % pre) + return None + + +def prefixedIdFromUri(uri): + prefix = prefixFromUri(uri) + if prefix: + return "%s:%s" % (prefix,os.path.basename(uri)) + return uri + + + + + + + \ No newline at end of file diff --git a/app.yaml b/app.yaml index 2667e3f7fd..4dff4f4525 100644 --- a/app.yaml +++ b/app.yaml @@ -21,7 +21,11 @@ inbound_services: env_variables: # TARGETSITE: 'localhost' + PRODSITEDEBUG: 'False' + #CONFIGFILE: 'https://raw.githubusercontent.com/RichardWallis/datacommons/appupdate/schema/datacomconfig.json' + CONFIGFILE: 'sdoconfig.json' + MOREBLOCK: 'True' WARMUPSTATE: 'Auto' # 'Off', 'On', 'Auto' - Off for localhost, On elsewhere STAYINEXTENTION: 'False' PAGESTOREMODE: 'CLOUDSTORE' # 'INMEM' (In instance memory), 'NDBSHARED' (NDB shared - accross instances), 'CLOUDSTORE' (Cloudstorage files) @@ -29,6 +33,7 @@ env_variables: TIMESTAMPSTOREMODE: 'CLOUDSTORE' # 'INMEM', 'NDBSHARED', 'CLOUDSTORE' # CACHE_CONTROL: 'public, max-age=600' CACHE_CONTROL: 'no-cache' + SUBDOMAINS: 'False' #'False' no subdomains, 'True' for pending.schema.org, bib.schema.org etc. handlers: @@ -72,9 +77,16 @@ handlers: - url: /docs/tree.json.* script: sdoapp.app +- url: /admin/refresh + login: required + script: sdoapp.app + +#- url: /docs + #static_dir: datacoms/docs + #application_readable: True + - url: /docs - static_dir: docs - application_readable: True + script: sdoapp.app - url: /admin static_dir: admin diff --git a/datacomconfig.json b/datacomconfig.json new file mode 100644 index 0000000000..e009052880 --- /dev/null +++ b/datacomconfig.json @@ -0,0 +1,50 @@ +{ + "@context": { + "@vocab": "http://configfiles.schema.org/" + }, + "@type": "DataFeed", + "name": "schema.dataCommons.org", + "prefix": "schemadc", + "siteurl": "https://schema.datacommons.org", + "vocaburl": "https://schema.datacommons.org/", + "atticurl": "https://attic.schema.datacommons.org/", + "dataFeedVar": [ + {"SCHEMAORGLOC": "https://raw.githubusercontent.com/schemaorg/schemaorg/master"}, + {"DATACOMMLOC": "https://raw.githubusercontent.com/RichardWallis/datacommons/appupdate/schema"}, + {"GITDATACOMMLOC": "https://raw.githubusercontent.com/schemaorg/schemaorg/master"} + ], + "dataFeedElement": [ + { + "@type": "DataDownload", + "contentLocation": "[[DATACOMMLOC]]/docs", + "contentFile": [ + "favicon.ico", + "prettify.css", + "prettify.js", + "schemaorg.css" + ], + "fileContent": "docs" + }, + { + "@type": "DataDownload", + "contentLocation": "[[DATACOMMLOC]]", + "contentFile": "datacommons.rdfa", + "fileContent": "terms" + }, + { + "@type": "DataDownload", + "contentLocation": "[[DATACOMMLOC]]/templates", + "fileContent": "templates" + }, + { + "@type": "DataDownload", + "addPrefix": "schema", + "addVocaburl": "http://schema.org/", + "contentFile": [ + "[[SCHEMAORGLOC]]/data/schema.rdfa", + "[[SCHEMAORGLOC]]/data/ext/meta/meta.rdfa" + ], + "fileContent": "terms" + } + ] +} \ No newline at end of file diff --git a/datacomconfiglocation.json b/datacomconfiglocation.json new file mode 100644 index 0000000000..df5820cd77 --- /dev/null +++ b/datacomconfiglocation.json @@ -0,0 +1,7 @@ +{ + "@context": { + "@vocab": "http://configfiles.schema.org/" + }, + "@type": "ConfigurationRedirect", + "configurationLocation": "https://s3.amazonaws.com/rjwPublicData/datacomconfig.json" +} diff --git a/docs/schemaorg.css b/docs/schemaorg.css index 2cca9f4d99..53c6fd670c 100644 --- a/docs/schemaorg.css +++ b/docs/schemaorg.css @@ -453,6 +453,22 @@ a.ext:hover { background-color: #0000cc; text-decoration: none; } +/* External links */ +a.externlink:link { + color: #000 !important; + border-bottom: dotted 1px #000 !important; + text-decoration: none; +} +a.externlink:visited { + color: #000 !important; + border-bottom: dotted 1px #000 !important; + text-decoration: none; +} +a.externlink:hover { + color: #fff !important; + background-color: #000; + text-decoration: none; +} /* Attic extension links overriding default 'ext' values */ a.ext.ext-attic:link{ @@ -540,6 +556,6 @@ a.ext.ext-attic:hover { @media (min-width: 960px) { #mainContent, #footer, .wrapper { max-width: 960px; - padding: 0 1em; + padding: 0 1em 1em 1em } } diff --git a/parsers.py b/parsers.py index acaf238ffc..f83fb027e8 100755 --- a/parsers.py +++ b/parsers.py @@ -1,7 +1,8 @@ #!/usr/bin/env python -# +# coding=UTF-8 import webapp2 +import urllib import re from google.appengine.ext import db from google.appengine.ext import blobstore @@ -65,9 +66,16 @@ def parse (self, file): count = 0 start = datetime.datetime.now() - fd = codecs.open(file, 'r', encoding="utf8") - content = fd.read() - fd.close() + if file.startswith("file://"): + file = file[7:] + + if "://" in file: + content = urllib.urlopen(file).read() + else: + fd = codecs.open(file, 'r', encoding="utf8") + content = fd.read() + fd.close() + lines = re.split('\n|\r', content) for line in lines: diff --git a/schemaorg.yaml b/schemaorg.yaml index 4dcd9b07f7..d5b825de93 100644 --- a/schemaorg.yaml +++ b/schemaorg.yaml @@ -19,13 +19,16 @@ inbound_services: env_variables: TARGETSITE: 'schema.org' PRODSITEDEBUG: 'False' + CONFIGFILE: 'sdoconfig.json' + MOREBLOCK: 'True' WARMUPSTATE: 'Auto' # 'Off', 'On', 'Auto' - Off for localhost, On elsewhere STAYINEXTENTION: 'False' PAGESTOREMODE: 'CLOUDSTORE' # 'INMEM' (In instance memory), 'NDBSHARED' (NDB shared - accross instances), 'CLOUDSTORE' (Cloudstorage files) EXAMPLESTOREMODE: 'INMEM' # 'INMEM', 'NDBSHARED' TIMESTAMPSTOREMODE: 'CLOUDSTORE' # 'INMEM', 'NDBSHARED', 'CLOUDSTORE' CACHE_CONTROL: 'public, max-age=600' -# CACHE_CONTROL: 'no-cache' + #CACHE_CONTROL: 'no-cache' + SUBDOMAINS: 'False' #'False' no subdomains, 'True' for pending.schema.org, bib.schema.org etc. handlers: @@ -101,6 +104,14 @@ handlers: # upload: static/index.html # application_readable: True +- url: /admin/refresh + login: required + script: sdoapp.app + +- url: /admin + static_dir: admin + application_readable: True + - url: /search_files static_dir: static/search_files secure: always diff --git a/scripts/buildTermConfig.sh b/scripts/buildTermConfig.sh new file mode 100755 index 0000000000..1ff1eafe4d --- /dev/null +++ b/scripts/buildTermConfig.sh @@ -0,0 +1,129 @@ +#!/bin/sh +PWD=`pwd` +PROG="`basename $0`" +if [ `basename $PWD` != "schemaorg" ] +then + echo "$PROG: Not in the schemaorg directory! Aborting" + exit 1 +fi + +TARGET="${PWD}/sdoconfigTermsData.json" +LOCVARIABLE='[[SCHEMAORGLOC]]/' +Header="{ + \"@context\": { + \"@vocab\": \"http://configfiles.schema.org/\" + }, + \"@type\": \"DataFeed\", + \"name\": \"schema.org\"," + +function doHead { + printf ",\n { + \"@type\": \"DataDownload\", + \"fileContent\": \"${2}\", + \"contentLocation\": \"${LOCVARIABLE}${1}\", + \"contentFile\": [\n" >> $TARGET + + } + +function doExtension { + dir=$1 + ( + cd $dir + count=0 + output=0 + for rdf in *.rdfa + do + if [ "$rdf" != '*.rdfa' ] + then + sep=",\n" + if [ $count -eq 0 ] + then + doHead $dir "terms" + sep="" + output=1 + fi + printf "$sep \"$rdf\"" >> $TARGET + count=$((count+1)) + fi + done + if [ $count -ne 0 ] + then + printf "\n ]\n + }" >> $TARGET + fi + + count=0 + for ex in *-examples.txt + do + if [ "$ex" != '*-examples.txt' ] + then + sep=",\n" + if [ $count -eq 0 ] + then + doHead $dir "examples" + output=1 + sep="" + fi + printf "$sep \"$ex\"" >> $TARGET + count=$((count+1)) + fi + done + if [ $count -ne 0 ] + then + printf "\n ]\n + }" >> $TARGET + fi + ) + return $output +} + +function doDocs { + echo " { + \"@type\": \"DataDownload\", + \"fileContent\": \"docs\", + \"contentLocation\": \"${LOCVARIABLE}docs\", + \"contentFile\": [" >> $TARGET + + + count=0 + for i in `(cd docs;find * -type f -print)` + do + sep=",\n" + if [ $count -eq 0 ] + then + sep="" + fi + count=$((count+1)) + printf "$sep \"$i\"" >> $TARGET + + + done + printf '\n ] + }' >> $TARGET + +} + +function doElements { + echo " \"dataFeedElement\": [" >> $TARGET + doDocs + doExtension data + for ext in `ls data/ext` + do + doExtension "data/ext/$ext" + done + echo '\n ] +}' >> $TARGET + +} + +printf "{ + \"@context\": { + \"@vocab\": \"http://configfiles.schema.org/\" + }, + \"@type\": \"DataFeed\", + \"name\": \"schema.org\"," > $TARGET +echo >> $TARGET + +doElements + + diff --git a/sdoapp.py b/sdoapp.py index 147dea107b..2cebb2b6fd 100755 --- a/sdoapp.py +++ b/sdoapp.py @@ -9,6 +9,8 @@ import os import re import webapp2 +import urllib2 +import mimetypes import jinja2 import logging import StringIO @@ -43,19 +45,25 @@ from testharness import * from sdoutil import * from api import * +from apirdfterm import * from apirdflib import load_graph, getNss, getRevNss, buildSingleTermGraph, serializeSingleTermGrapth -from apirdflib import countTypes, countProperties, countEnums +from apirdflib import countTypes, countProperties, countEnums, graphFromFiles, getPathForPrefix, getPrefixForPath, rdfgettops from apimarkdown import Markdown from sdordf2csv import sdordf2csv +CONFIGFILE = os.environ.get("CONFIGFILE","sdoconfig.json") +SdoConfig.load(CONFIGFILE) +if not SdoConfig.valid: + log.error("Invalid config from '%s' or its includes !!" % CONFIGFILE) + os.exit() SCHEMA_VERSION="3.4" if not getInTestHarness(): GAE_APP_ID = app_identity.get_application_id() GAE_VERSION_ID = modules.get_current_version_name() - + FEEDBACK_FORM_BASE_URL='https://docs.google.com/a/google.com/forms/d/1krxHlWJAO3JgvHRZV9Rugkr9VYnMdrI10xbGsWt733c/viewform?entry.1174568178&entry.41124795={0}&entry.882602760={1}' # {0}: term URL, {1} category of term. @@ -76,16 +84,47 @@ # webschemadev # known extension (not skiplist'd, eg. demo1 on schema.org) -JINJA_ENVIRONMENT = jinja2.Environment( - loader=jinja2.FileSystemLoader(os.path.join(os.path.dirname(__file__), 'templates')), +TEMPLATESDIR = SdoConfig.templateDir() +FileBasedTemplates = True + +def urlTemplateLoader(name): + log.info("TEMPLATE LOADER LOOKING FOR: %s" % name) + url = TEMPLATESDIR + "/" + name + log.info("URL: %s" % url) + try: + fd = urllib2.urlopen(url) + res = fd.read() + except urllib2.URLError as e: + log.info("URLError %s" % e) + return None + return res + + + +if TEMPLATESDIR: + if TEMPLATESDIR.startswith("file://"): + TEMPLATESDIR = TEMPLATESDIR[7:] + if "://" in TEMPLATESDIR: + FileBasedTemplates = False +else: + TEMPLATESDIR = os.path.join(os.path.dirname(__file__), 'templates') + log.info("No Templates directory defined - defaulting to %s" % TEMPLATESDIR) + +if FileBasedTemplates: + JINJA_ENVIRONMENT = jinja2.Environment(loader=jinja2.FileSystemLoader(TEMPLATESDIR), + extensions=['jinja2.ext.autoescape'], autoescape=True, cache_size=0) +else: + JINJA_ENVIRONMENT = jinja2.Environment(loader=jinja2.FunctionLoader(urlTemplateLoader), extensions=['jinja2.ext.autoescape'], autoescape=True, cache_size=0) + + CANONICALSCHEME = "http" ENABLE_JSONLD_CONTEXT = True ENABLE_CORS = True ENABLE_HOSTED_EXTENSIONS = True DISABLE_NDB_FOR_LOCALHOST = True - +ENABLEMOREINFO = True WORKINGHOSTS = ["schema.org","schemaorg.appspot.com", "webschemas.org","webschemas-g.appspot.com", "sdo-test.appspot.com", @@ -116,6 +155,18 @@ noindexpages = True +SUBDOMAINS = True +subs = os.environ.get("SUBDOMAINS",None) +if subs: + if subs.lower() == "true": + SUBDOMAINS = True + elif subs.lower() == "false": + SUBDOMAINS = False + else: + log.info("SUBDOMAINS set to invalid value %s - defaulting to %s" %(subs,SUBDOMAINS)) +log.info("SUBDOMAINS set to %s" % SUBDOMAINS) + + ############# Warmup Control ######## WarmedUp = False WarmupState = "Auto" @@ -193,15 +244,6 @@ def tick(): #Keep memcache values fresh so they don't expire memcache.set(key="SysStart", value=systarttime) memcache.set(key="static-version", value=appver) -TIMESTAMPSTOREMODE = "CLOUDSTORE" -if "TIMESTAMPSTOREMODE" in os.environ: - TIMESTAMPSTOREMODE = os.environ["TIMESTAMPSTOREMODE"] - log.info("TIMESTAMPSTOREMODE set to %s from .yaml file" % TIMESTAMPSTOREMODE) -log.info("Initialised with TIMESTAMPSTOREMODE set to %s" % TIMESTAMPSTOREMODE) - -class TimestampEntity(ndb.Model): - content = ndb.TextProperty() - def check4NewVersion(): ret = False dep = None @@ -214,66 +256,33 @@ def check4NewVersion(): f.close() except Exception as e: log.info("ERROR reading: %s" % e) - pass + pass if getInTestHarness() or "localhost" in os.environ['SERVER_NAME']: #Force new version logic for local versions and tests - ret = True + ret = True log.info("Assuming new version for local/test instance") else: - - if TIMESTAMPSTOREMODE == "INMEM": - log.info("deployed-timestamp: '%s' mem version: '%s'" % (dep, memcache.get("deployed-timestamp"))) - if dep != memcache.get("deployed-timestamp"): - ret = True - - elif TIMESTAMPSTOREMODE == "NDBSHARED": - ent = TimestampEntity.get_by_id("deployed-timestamp") - val = "" - if ent: - val = ent.content - log.info("deployed-timestamp: '%s' ndbshared version: '%s'" % (dep, val)) - if dep != val: - ret = True - - elif TIMESTAMPSTOREMODE == "CLOUDSTORE": - val = cloudstoreGetContent("deployed-timestamp.txt", ".status") - log.info("deployed-timestamp: '%s' cloudstore version: '%s'" % (dep, val)) - if dep != val: - ret = True + stored,info = getTimestampedInfo("deployed-timestamp") + if stored != dep: + ret = True return ret, dep - + def storeNewTimestamp(stamp=None): - storeTimestampInfo("deployed-timestamp",stamp) + storeTimestampedInfo("deployed-timestamp",stamp) def storeInitialisedTimestamp(stamp=None): - storeTimestampInfo("initialised-timestamp",stamp) - -def storeTimestampInfo(tag,stamp=None): - if not stamp: - stamp = datetime.datetime.utcnow().strftime("%a %d %b %Y %H:%M:%S UTC") - - if TIMESTAMPSTOREMODE == "INMEM": - log.info("Storing %s version: '%s'" % (tag,stamp)) - memcache.set(key=tag,value=stamp) - - elif TIMESTAMPSTOREMODE == "NDBSHARED": - log.info("Storing ndbshared %s version: '%s'" % (tag,stamp)) - ent = TimestampEntity(id = tag, content = stamp) - ent.put() - - elif TIMESTAMPSTOREMODE == "CLOUDSTORE": - log.info("Storing cloudstore %s version: '%s'" % (tag,stamp)) - val = cloudstoreStoreContent("%s.txt" % tag, stamp, ".status", private=True) - - + storeTimestampedInfo("initialised-timestamp",stamp) + + + if getInTestHarness(): load_examples_data(ENABLED_EXTENSIONS) else: #Ensure clean start for any memcached or ndb store values... - + changed, dep = check4NewVersion() if changed: #We are a new instance of the app - msg = "New app instance [%s:%s] detected - FLUSHING CACHES. (deploy_timestamp='%s')" % (GAE_VERSION_ID,GAE_APP_ID,dep) + msg = "New app instance [%s:%s] detected - FLUSHING CACHES. (deploy_timestamp='%s')\nLoaded Config file from: %s" % (GAE_VERSION_ID,GAE_APP_ID,dep,CONFIGFILE) memcache.flush_all() storeNewTimestamp(dep) @@ -366,13 +375,16 @@ def toHTML(self): def toJSON(self): return self.txt - def traverseForHTML(self, node, depth = 1, hashorslash="/", layers='core', idprefix="", urlprefix="", traverseAllLayers=False, buff=None): + def traverseForHTML(self, term, depth = 1, hashorslash="/", layers='core', idprefix="", urlprefix="", traverseAllLayers=False, buff=None): """Generate a hierarchical tree view of the types. hashorslash is used for relative link prefixing.""" - #log.info("traverseForHTML: node=%s hashorslash=%s" % ( node.id, hashorslash )) + #log.info("traverseForHTML: node=%s hashorslash=%s" % ( term, hashorslash )) + + if not term: + return False - if node.superseded(layers=layers): + if term.superseded() or term.getLayer() == ATTIC: return False localBuff = False @@ -380,17 +392,13 @@ def traverseForHTML(self, node, depth = 1, hashorslash="/", layers='core', idpre localBuff = True buff = StringIO.StringIO() - home = node.getHomeLayer() - gotOutput = False + home = term.getLayer() + gotOutput = True - if not traverseAllLayers and home not in layers: - return False - else: - gotOutput = True if home in ENABLED_EXTENSIONS and home != getHostExt(): urlprefix = makeUrl(home) - + extclass = "" extflag = "" tooltip="" @@ -400,12 +408,12 @@ def traverseForHTML(self, node, depth = 1, hashorslash="/", layers='core', idpre tooltip = "title=\"Extended schema: %s.schema.org\" " % home # we are a supertype of some kind - subTypes = node.GetImmediateSubtypes(layers=ALL_LAYERS) - idstring = idprefix + node.id + subTypes = term.getSubs() + idstring = idprefix + term.getId() if len(subTypes) > 0: # and we haven't been here before - if node.id not in self.visited: - self.emit2buff(buff, ' %s
  • %s%s' % (" " * 4 * depth, idstring, tooltip, extclass, urlprefix, hashorslash, node.id, node.id, extflag) ) + if term.getId() not in self.visited: + self.emit2buff(buff, ' %s
  • %s%s' % (" " * 4 * depth, idstring, tooltip, extclass, urlprefix, hashorslash, term.getId(), term.getId(), extflag) ) self.emit2buff(buff, ' %s
      ' % (" " * 4 * depth)) # handle our subtypes @@ -418,27 +426,22 @@ def traverseForHTML(self, node, depth = 1, hashorslash="/", layers='core', idpre self.emit2buff(buff, ' %s
    ' % (" " * 4 * depth)) else: # we are a supertype but we visited this type before, e.g. saw Restaurant via Place then via Organization - seencount = self.visited.count(node.id) + seencount = self.visited.count(term.getId()) idstring = "%s%s" % (idstring, "+" * seencount) - seen = ' + ' % node.id - self.emit2buff(buff, ' %s
  • %s%s%s' % (" " * 4 * depth, idstring, tooltip, extclass, urlprefix, hashorslash, node.id, node.id, extflag, seen) ) + seen = ' + ' % term.getId() + self.emit2buff(buff, ' %s
  • %s%s%s' % (" " * 4 * depth, idstring, tooltip, extclass, urlprefix, hashorslash, term.getId(), term.getId(), extflag, seen) ) # leaf nodes if len(subTypes) == 0: - if home in layers: - gotOutput = True - seen = "" - if node.id in self.visited: - seencount = self.visited.count(node.id) - idstring = "%s%s" % (idstring, "+" * seencount) - seen = ' + ' % node.id - - self.emit2buff(buff, '%s
  • %s%s%s' % (" " * depth, idstring, tooltip, extclass, urlprefix, hashorslash, node.id, node.id, extflag, seen )) - #else: - #self.visited[node.id] = True # never... - # we tolerate "VideoGame" appearing under both Game and SoftwareApplication - # and would only suppress it if it had its own subtypes. Seems legit. - - self.visited.append(node.id) # remember our visit + gotOutput = True + seen = "" + if term.getId() in self.visited: + seencount = self.visited.count(term.getId()) + idstring = "%s%s" % (idstring, "+" * seencount) + seen = ' + ' % term.getId() + + self.emit2buff(buff, '%s
  • %s%s%s' % (" " * depth, idstring, tooltip, extclass, urlprefix, hashorslash, term.getId(), term.getId(), extflag, seen )) + + self.visited.append(term.getId()) # remember our visit self.emit2buff(buff, ' %s
  • ' % (" " * 4 * depth) ) if localBuff: @@ -450,6 +453,9 @@ def traverseForHTML(self, node, depth = 1, hashorslash="/", layers='core', idpre # based on http://danbri.org/2013/SchemaD3/examples/4063550/hackathon-schema.js - thanks @gregg, @sandro def traverseForJSONLD(self, node, depth = 0, last_at_this_level = True, supertype="None", layers='core'): emit_debug = False + if not node or not node.id: + log.error("Error None value passed to traverseForJSONLD()") + return if node.id in self.visited: # self.emit("skipping %s - already visited" % node.id) return @@ -505,30 +511,29 @@ def encode4json(s): -def GetExamples(node, layers='core'): +def GetExamples(term, layers='core'): """Returns the examples (if any) for some Unit node.""" - return LoadNodeExamples(node,layers) + return LoadTermExamples(term) -def GetExtMappingsRDFa(node, layers='core'): +def GetExtMappingsRDFa(term): """Self-contained chunk of RDFa HTML markup with mappings for this term.""" - if (node.isClass()): - equivs = GetTargets(Unit.GetUnit("owl:equivalentClass"), node, layers=layers) + equivs = term.getEquivalents() + if (term.isClass()): if len(equivs) > 0: markup = '' for c in equivs: - if (c.id.startswith('http')): - markup = markup + "\n" % c.id + if (c.startswith('http')): + markup = markup + "\n" % c else: - markup = markup + "\n" % c.id + markup = markup + "\n" % c return markup - if (node.isAttribute()): - equivs = GetTargets(Unit.GetUnit("owl:equivalentProperty"), node, layers) + if (term.isProperty()): if len(equivs) > 0: markup = '' for c in equivs: - markup = markup + "\n" % c.id + markup = markup + "\n" % c return markup return "" @@ -540,8 +545,8 @@ class ShowUnit (webapp2.RequestHandler): def emitCacheHeaders(self): """Send cache-related headers via HTTP.""" if "CACHE_CONTROL" in os.environ: - log.info("Setting http cache control to '%s' from .yaml" % os.environ["CACHE_CONTROL"]) - self.response.headers['Cache-Control'] = os.environ["CACHE_CONTROL"] + log.info("Setting http cache control to '%s' from .yaml" % os.environ["CACHE_CONTROL"]) + self.response.headers['Cache-Control'] = os.environ["CACHE_CONTROL"] else: self.response.headers['Cache-Control'] = "public, max-age=600" # 10m self.response.headers['Vary'] = "Accept, Accept-Encoding" @@ -551,44 +556,29 @@ def write(self, str): self.outputStrings.append(str) - def moreInfoBlock(self, node, layer='core'): + def moreInfoBlock(self, term, layer='core'): # if we think we have more info on this term, show a bulleted list of extra items. + moreblock = os.environ.get("MOREBLOCK") + if not moreblock or (moreblock.lower() == "false"): + return "" + # defaults bugs = ["No known open issues."] mappings = ["No recorded schema mappings."] items = bugs + mappings - nodetype="Misc" - if node.isEnumeration(): - nodetype = "enumeration" - elif node.isDataType(layers=layer): - nodetype = "datatype" - elif node.isClass(layers=layer): - nodetype = "type" - elif node.isAttribute(layers=layer): - nodetype = "property" - elif node.isEnumerationValue(layers=layer): - nodetype = "enumeratedvalue" - - feedback_url = FEEDBACK_FORM_BASE_URL.format("http://schema.org/{0}".format(node.id), nodetype) + feedback_url = FEEDBACK_FORM_BASE_URL.format(term.getUri, term.getType()) items = [ "Leave public feedback on this term 💬".format(feedback_url), - "Check for open issues.".format(node.id) + "Check for open issues.".format(term.getId()) ] - - for l in all_terms[node.id]: - l = l.replace("#","") - if l == "core": - ext = "" - else: - ext = "extension " - if ENABLE_HOSTED_EXTENSIONS: - items.append("'{0}' is mentioned in {1}layer: {3}".format( node.id, ext, makeUrl(l,node.id), l )) + if term.getLayer() != "core": + items.append("'{0}' is mentioned in the {2} extention.".format( term.getId(), makeUrl(term.getLayer(),"",full=True), term.getLayer() )) moreinfo = """
    @@ -604,15 +594,32 @@ def moreInfoBlock(self, node, layer='core'): moreinfo += "\n
    \n\n" return moreinfo + def getParentNames(self, nodeName, layers): + + ret = [nodeName] + node = Unit.GetUnit(nodeName) + if node and node.id: + sc = Unit.GetUnit("rdfs:subClassOf") + targs = GetTargets(sc, node, layers=layers) + if targs: + for p in targs: + ret.extend(self.getParentNames(p.id, layers=layers)) + + + return ret + def GetParentStack(self, node, layers='core'): """Returns a hiearchical structured used for site breadcrumbs.""" - thing = Unit.GetUnit("Thing") + thing = Unit.GetUnit("schema:Thing") #log.info("GetParentStack for: %s",node) + if not node: + return + if (node not in self.parentStack): self.parentStack.append(node) if (Unit.isAttribute(node, layers=layers)): - self.parentStack.append(Unit.GetUnit("Property")) + self.parentStack.append(Unit.GetUnit("schema:Property")) self.parentStack.append(thing) sc = Unit.GetUnit("rdfs:subClassOf") @@ -631,7 +638,7 @@ def GetParentStack(self, node, layers='core'): self.parentStack.append(thing) - def ml(self, node, label='', title='', prop='', hashorslash='/'): + def ml(self, term, label='', title='', prop='', hashorslash='/'): """ml ('make link') Returns an HTML-formatted link to the class or property URL @@ -639,9 +646,14 @@ def ml(self, node, label='', title='', prop='', hashorslash='/'): * title = optional title attribute on the link * prop = an optional property value to apply to the A element """ + if not term: + return "" + + if ":" in term.getId(): + return self.external_ml(term) if label=='': - label = node.id + label = term.getLabel() if title != '': title = " title=\"%s\"" % (title) if prop: @@ -649,19 +661,19 @@ def ml(self, node, label='', title='', prop='', hashorslash='/'): rdfalink = '' if prop: - rdfalink = '' % (prop,label) + rdfalink = '' % (prop,api.SdoConfig.vocabUri(),label) - if(node.id == "DataType"): #Special case - return "%s%s" % (rdfalink,node.id, node.id) + if(term.id == "DataType"): #Special case + return "%s%s" % (rdfalink,term.getId(), term.getId()) urlprefix = "." - home = node.getHomeLayer() + home = term.getLayer() -# if home in ENABLED_EXTENSIONS and home != getHostExt(): -# port = "" -# if getHostPort() != "80": -# port = ":%s" % getHostPort() -# urlprefix = makeUrl(home,full=True) + if home in ENABLED_EXTENSIONS and home != getHostExt(): + port = "" + if getHostPort() != "80": + port = ":%s" % getHostPort() + urlprefix = makeUrl(home,full=True) extclass = "" extflag = "" @@ -672,9 +684,41 @@ def ml(self, node, label='', title='', prop='', hashorslash='/'): extflag = EXTENSION_SUFFIX tooltip = "title=\"Defined in extension: %s.schema.org\" " % home - return "%s%s%s" % (rdfalink,tooltip, extclass, urlprefix, hashorslash, node.id, title, label, extflag) + return "%s%s%s" % (rdfalink,tooltip, extclass, urlprefix, hashorslash, term.getId(), title, label, extflag) #return "%s%s" % (tooltip, extclass, urlprefix, hashorslash, node.id, prop, title, label, extflag) + def external_ml(self, term): + #log.info("EXTERNAL!!!! %s %s " % (term.getLabel(),term.getId())) + + name = term.getId() + + if not ":" in name: + return name + + if name.startswith("http") and '#' in name: + x = name.split("#") + path = x[0] + "#" + val = x[1] + voc = getPrefixForPath(path) + + + elif name.startswith("http"): + val = os.path.basename(name) + path = name[:len(name) - len(val)] + voc = getPrefixForPath(path) + + else: + x = name.split(":") + voc = x[0] + val = x[1] + path = getPathForPrefix(voc) + if path: + if not path.endswith("#") and not path.endswith("/"): + path += "/" + return "%s:%s" % (path,val,voc,val) + + + def makeLinksFromArray(self, nodearray, tooltip=''): """Make a comma separate list of links via ml() function. @@ -685,75 +729,82 @@ def makeLinksFromArray(self, nodearray, tooltip=''): hyperlinks.append(self.ml(f, f.id, tooltip)) return (", ".join(hyperlinks)) - def emitUnitHeaders(self, node, layers='core'): + def emitUnitHeaders(self, term, layers='core'): """Write out the HTML page headers for this node.""" self.write("

    ") - self.write(node.id) + self.write(term.getLabel()) self.write("

    \n") - home = node.home + home = term.getLayer() if home != "core" and home != "": + exthome = "%s.schema.org" % home + exthomeurl = uri = makeUrl(home,"/",full=True) if home == ATTIC: - self.write("Defined in the %s.schema.org archive area.
    Use of this term is not advised
    " % home) + self.write("Defined in the %s archive area.
    Use of this term is not advised
    " % (exthomeurl,exthome)) else: - self.write("Defined in the %s.schema.org extension.
    " % home) - self.emitCanonicalURL(node) + self.write("Defined in the %s extension.
    " % (exthomeurl,exthome)) + self.emitCanonicalURL(term) - self.BreadCrumbs(node, layers=self.appropriateLayers(layers=layers)) + self.BreadCrumbs(term) - comment = GetComment(node, layers) + comment = term.getComment() self.write("
    %s
    \n\n" % (comment) + "\n") - usage = node.UsageStr() + usage = GetUsage(term.getId()) if len(usage): self.write("
    Usage: %s
    \n\n" % (usage) + "\n") - self.write(self.moreInfoBlock(node)) - - if (node.isClass(layers=layers) and not node.isDataType(layers=layers) and node.id != "DataType"): - self.write("\n \n \n \n \n\n") + if ENABLEMOREINFO: + self.write(self.moreInfoBlock(term)) - def emitCanonicalURL(self,node): - cURL = "%s://schema.org/%s" % (CANONICALSCHEME,node.id) - if CANONICALSCHEME == "http": - other = "https" + def emitCanonicalURL(self,term): + site = SdoConfig.vocabUri() + if site != "http://schema.org": + cURL = "%s%s" % (site,term.getId()) + self.write(" Canonical URL: %s " % (cURL)) else: - other = "http" - sa = '\n' % (other,node.id) - self.write(" Canonical URL: %s " % (cURL, cURL)) - #self.write(" (?)") - self.write(sa) + cURL = "%s://schema.org/%s" % (CANONICALSCHEME,term.getId()) + if CANONICALSCHEME == "http": + other = "https" + else: + other = "http" + sa = '\n' % (other,term.getId()) + + self.write(" Canonical URL: %s " % (cURL, cURL)) + self.write(sa) # Stacks to support multiple inheritance crumbStacks = [] - def BreadCrumbs(self, node, layers): + def BreadCrumbs(self, term): self.crumbStacks = [] cstack = [] self.crumbStacks.append(cstack) - self.WalkCrumbs(node,cstack,layers=layers) - if (node.isAttribute(layers=layers)): - cstack.append(Unit.GetUnit("Property")) - cstack.append(Unit.GetUnit("Thing")) - elif(node.isDataType(layers=layers) and node.id != "DataType"): - cstack.append(Unit.GetUnit("DataType")) + self.WalkCrumbs(term,cstack) + + if term.isProperty(): + cstack.append(VTerm.getTerm("http://schema.org/Property")) + cstack.append(VTerm.getTerm("http://schema.org/Thing")) + elif term.isDataType(): + cstack.append(VTerm.getTerm("http://schema.org/DataType")) - enuma = node.isEnumerationValue(layers=layers) + enuma = term.isEnumerationValue() crumbsout = [] for row in range(len(self.crumbStacks)): thisrow = "" - if(":" in self.crumbStacks[row][len(self.crumbStacks[row])-1].id): + targ = self.crumbStacks[row][len(self.crumbStacks[row])-1] + if not targ: continue count = 0 while(len(self.crumbStacks[row]) > 0): propertyval = None n = self.crumbStacks[row].pop() - if((len(self.crumbStacks[row]) == 1) and + if((len(self.crumbStacks[row]) == 1) and n and not ":" in n.id) : #penultimate crumb that is not a non-schema reference - if node.isAttribute(layers=layers): - if n.isAttribute(layers=layers): #Can only be a subproperty of a property + if term.isProperty(): + if n.isProperty(): #Can only be a subproperty of a property propertyval = "rdfs:subPropertyOf" else: propertyval = "rdfs:subClassOf" @@ -763,8 +814,6 @@ def BreadCrumbs(self, node, layers): thisrow += " :: " else: thisrow += " > " - elif n.id == "Class": # If Class is first breadcrum suppress it - continue count += 1 thisrow += "%s" % (self.ml(n,prop=propertyval)) crumbsout.append(thisrow) @@ -779,35 +828,25 @@ def BreadCrumbs(self, node, layers): self.write("\n") #Walk up the stack, appending crumbs & create new (duplicating crumbs already identified) if more than one parent found - def WalkCrumbs(self, node, cstack, layers): - if "http://" in node.id or "https://" in node.id: #Suppress external class references + def WalkCrumbs(self, term, cstack): + if ":" in term.getId(): #Suppress external class references return - cstack.append(node) + cstack.append(term) tmpStacks = [] tmpStacks.append(cstack) - subs = [] - - if(node.isDataType(layers=layers)): - #subs = GetTargets(Unit.GetUnit("rdf:type"), node, layers=layers) - subs += GetTargets(Unit.GetUnit("rdfs:subClassOf"), node, layers=layers) - elif node.isClass(layers=layers): - subs = GetTargets(Unit.GetUnit("rdfs:subClassOf"), node, layers=layers) - elif(node.isAttribute(layers=layers)): - subs = GetTargets(Unit.GetUnit("rdfs:subPropertyOf"), node, layers=layers) - else: - subs = GetTargets(Unit.GetUnit("rdf:type"), node, layers=layers)# Enumerations are classes that have no declared subclasses - - for i in range(len(subs)): + supers = term.getSupers() + + for i in range(len(supers)): if(i > 0): t = cstack[:] tmpStacks.append(t) self.crumbStacks.append(t) x = 0 - for p in subs: - self.WalkCrumbs(p,tmpStacks[x],layers=layers) - x += 1 + for p in supers: + self.WalkCrumbs(p,tmpStacks[x]) + x += 1 def emitSimplePropertiesPerType(self, cl, layers="core", out=None, hashorslash="/"): """Emits a simple list of properties applicable to the specified type.""" @@ -816,7 +855,7 @@ def emitSimplePropertiesPerType(self, cl, layers="core", out=None, hashorslash=" out = self out.write("
      ") - for prop in sorted(GetSources( Unit.GetUnit("domainIncludes"), cl, layers=layers), key=lambda u: u.id): + for prop in sorted(GetSources( Unit.GetUnit("schema:domainIncludes"), cl, layers=layers), key=lambda u: u.id): if (prop.superseded(layers=layers)): continue out.write("
    • %s
    • " % ( hashorslash, prop.id, prop.id )) @@ -829,53 +868,63 @@ def emitSimplePropertiesIntoType(self, cl, layers="core", out=None, hashorslash= out = self out.write("
        ") - for prop in sorted(GetSources( Unit.GetUnit("rangeIncludes"), cl, layers=layers), key=lambda u: u.id): + for prop in sorted(GetSources( Unit.GetUnit("schema:rangeIncludes"), cl, layers=layers), key=lambda u: u.id): if (prop.superseded(layers=layers)): continue out.write("
      • %s
      • " % ( hashorslash, prop.id, prop.id )) out.write("
      \n\n") + + def hideAtticTerm(self,term): + if getHostExt() == ATTIC: + return False + if term.inLayers([ATTIC]): + return True + return False - def ClassProperties (self, cl, subclass=False, layers="core", out=None, hashorslash="/"): + def ClassProperties (self, cl, subclass=False, term=None, out=None, hashorslash="/"): """Write out a table of properties for a per-type page.""" if not out: out = self - + propcount = 0 headerPrinted = False - di = Unit.GetUnit("domainIncludes") - ri = Unit.GetUnit("rangeIncludes") + props = cl.getProperties() - for prop in sorted(GetSources(di, cl, layers=layers), key=lambda u: u.id): - if (prop.superseded(layers=layers)): + for prop in props: + if prop.superseded() or self.hideAtticTerm(prop): continue - olderprops = prop.supersedes_all(layers=layers) - inverseprop = prop.inverseproperty(layers=layers) - ranges = sorted(GetTargets(ri, prop, layers=layers),key=lambda u: u.id) - doms = sorted(GetTargets(di, prop, layers=layers), key=lambda u: u.id) - comment = GetComment(prop, layers=layers) + olderprops = prop.getSupersedes() + inverseprop = prop.getInverseOf() + ranges = prop.getRanges() + doms = prop.getDomains() + comment = prop.getComment() + if ":" in prop.id and comment == "-": + comment = "Term from external vocabulary" + if not getAppVar("tableHdr"): + setAppVar("tableHdr",True) + if (term.isClass() and not term.isDataType() and term.id != "DataType"): + self.write("
    PropertyExpected TypeDescription
    \n \n \n \n \n\n") + self.tablehdr = True if (not headerPrinted): class_head = self.ml(cl) - if subclass: - class_head = self.ml(cl) out.write("\n \n \n\n\n\n " % (class_head)) headerPrinted = True - out.write("\n \n \n " % (prop.id, self.ml(prop))) + out.write("\n \n \n " % (prop.getUri(), self.ml(prop))) out.write("") out.write("\n \n \n \n\n") for o in olderprops: - c = ShortenOnSentence(StripHtmlTags( GetComment(o,layers=layers) ),60) - tt = "%s: ''%s''" % ( o.id, c) - out.write("\n \n" % (self.ml(o, o.id, tt))) - log.info("Super %s" % o.id) + c = ShortenOnSentence(StripHtmlTags( o.getComment() ),60) + tt = "%s: ''%s''" % ( o.getId(), c) + out.write("\n \n" % (self.ml(o, o.getId(), tt))) + log.info("Super %s" % o.getId()) out.write("\n
    PropertyExpected TypeDescription
    Properties from %s
    \n\n%s\n
    \n\n%s\n \n") first_range = True - for r in sorted(ranges,key=lambda u: u.id): + for r in ranges: if (not first_range): out.write(" or
    ") first_range = False out.write(self.ml(r, prop='rangeIncludes')) out.write(" ") for d in doms: - out.write("" % d.id) + out.write("" % d.getUri()) out.write("
    %s" % (comment)) if (olderprops and len(olderprops) > 0): - olderprops = sorted(olderprops,key=lambda u: u.id) olderlinks = ", ".join([self.ml(o) for o in olderprops]) out.write(" Supersedes %s." % olderlinks ) if (inverseprop != None): @@ -923,7 +972,7 @@ def emitClassExtensionSuperclasses (self, cl, layers="core", out=None): if cl.id == "DataType": self.write("

    Subclass of:

    ") else: - self.write("

    Available supertypes defined in extensions

    ") + self.write("

    Available supertypes defined elsewhere

    ") self.write("
      ") self.write(content) self.write("
    ") @@ -949,13 +998,15 @@ def emitClassExtensionProperties (self, cl, layers="core", out=None): def _ClassExtensionProperties (self, out, cl, layers="core"): """Write out a list of properties not displayed as they are in extensions for a per-type page.""" - di = Unit.GetUnit("domainIncludes") + di = Unit.GetUnit("schema:domainIncludes") targetlayers=self.appropriateLayers(layers) #log.info("Appropriate targets %s" % targetlayers) exts = {} for prop in sorted(GetSources(di, cl, targetlayers), key=lambda u: u.id): + if ":" in prop.id: + continue if (prop.superseded(layers=targetlayers)): continue if inLayer(layers,prop): #Already in the correct layer - no need to report @@ -984,52 +1035,55 @@ def _ClassExtensionProperties (self, out, cl, layers="core"): out.write("\n") - def emitClassIncomingProperties (self, cl, layers="core", out=None, hashorslash="/"): + def emitClassIncomingProperties (self, term, out=None, hashorslash="/"): """Write out a table of incoming properties for a per-type page.""" if not out: out = self - targetlayers=self.appropriateLayers(layers) # Show incomming properties from all layers - headerPrinted = False - di = Unit.GetUnit("domainIncludes") - ri = Unit.GetUnit("rangeIncludes") - #log.info("Incomming for %s" % cl.id) - for prop in sorted(GetSources(ri, cl, layers=layers), key=lambda u: u.id): - if (prop.superseded(layers=layers)): + props = term.getTargetOf() + for prop in props: + if (prop.superseded()): continue - supersedes = prop.supersedes(layers=targetlayers) - inverseprop = prop.inverseproperty(layers=targetlayers) - ranges = sorted(GetTargets(di, prop, layers=targetlayers),key=lambda u: u.id) - comment = GetComment(prop, layers=targetlayers) + supersedes = prop.getSupersedes() + inverseprop = prop.getInverseOf() + ranges = prop.getRanges() + domains = prop.getDomains() + comment = prop.getComment() if (not headerPrinted): - self.write("

    Instances of %s may appear as values for the following properties

    " % (self.ml(cl))) + self.write("

    Instances of %s may appear as values for the following properties

    " % (self.ml(term))) self.write("\n \n \n\n \n \n\n\n") headerPrinted = True self.write("\n\n " % (self.ml(prop)) + "\n") self.write("") self.write("") if (headerPrinted): self.write("
    PropertyOn TypesDescription
    \n %s\n\n") - first_range = True - for r in ranges: - if (not first_range): + first_dom = True + for d in domains: + if (not first_dom): self.write(" or
    ") - first_range = False - self.write(self.ml(r)) + first_dom = False + self.write(self.ml(d)) self.write(" ") self.write("
    %s " % (comment)) - if (supersedes != None): - self.write(" Supersedes %s." % (self.ml(supersedes))) - if (inverseprop != None): + if supersedes: + self.write(" Supersedes") + first = True + for s in supersedes: + if first: + first = False + self.write(",") + self.write(" %s" % self.ml(s)) + self.write(". ") + if inverseprop: self.write("
    inverse property: %s." % (self.ml(inverseprop)) ) self.write("
    \n") - - + def emitRangeTypesForProperty(self, node, layers="core", out=None, hashorslash="/"): """Write out simple HTML summary of this property's expected types.""" if not out: @@ -1047,44 +1101,27 @@ def emitDomainTypesForProperty(self, node, layers="core", out=None, hashorslash= out = self out.write("
      ") - for dt in sorted(GetTargets(Unit.GetUnit("domainIncludes"), node, layers=layers), key=lambda u: u.id): + for dt in sorted(GetTargets(Unit.GetUnit("schema:domainIncludes"), node, layers=layers), key=lambda u: u.id): out.write("
    • %s
    • " % ( hashorslash, dt.id, dt.id )) out.write("
    \n\n") - - def emitAttributeProperties(self, node, layers="core", out=None, hashorslash="/"): + def emitAttributeProperties(self, term, out=None, hashorslash="/"): """Write out properties of this property, for a per-property page.""" if not out: out = self - targetLayers = self.appropriateLayers(layers) - di = Unit.GetUnit("domainIncludes") - ri = Unit.GetUnit("rangeIncludes") - rges = sorted(GetTargets(ri, node, layers=targetLayers), key=lambda u: u.id) - doms = sorted(GetTargets(di, node, layers=targetLayers), key=lambda u: u.id) - ranges = [] - eranges = [] - for r in rges: - if inLayer(layers, r): - ranges.append(r) - else: - eranges.append(r) - domains = [] - edomains = [] - for d in doms: - if inLayer(layers, d): - domains.append(d) - else: - edomains.append(d) - inverseprop = node.inverseproperty(layers=targetLayers) - subprops = sorted(node.subproperties(layers=targetLayers),key=lambda u: u.id) - superprops = sorted(node.superproperties(layers=targetLayers),key=lambda u: u.id) + ranges = term.getRanges() + domains =term.getDomains() + + inverseprop = term.getInverseOf() + subprops = term.getSubs() + superprops = term.getSupers() if (inverseprop != None): tt = "This means the same thing, but with the relationship direction reversed." - out.write("

    Inverse-property: %s.

    " % (self.ml(inverseprop, inverseprop.id,tt, prop=False, hashorslash=hashorslash)) ) + out.write("

    Inverse-property: %s.

    " % (self.ml(inverseprop, inverseprop.getId(),tt, prop=False, hashorslash=hashorslash)) ) out.write("\n") out.write("\n \n \n \n\n\n \n \n \n
    Values expected to be one of these types
    \n ") @@ -1094,23 +1131,10 @@ def emitAttributeProperties(self, node, layers="core", out=None, hashorslash="/" if (not first_range): out.write("
    ") first_range = False - tt = "The '%s' property has values that include instances of the '%s' type." % (node.id, r.id) - out.write(" %s " % (self.ml(r, r.id, tt, prop="rangeIncludes", hashorslash=hashorslash) +"\n")) + tt = "The '%s' property has values that include instances of the '%s' type." % (term.getId(), r.getId()) + out.write(" %s " % (self.ml(r, r.getId(), tt, prop="rangeIncludes", hashorslash=hashorslash) +"\n")) out.write("
    \n\n") - if len(eranges) > 0: - first_range = True - out.write("\n") - out.write(" \n \n \n \n\n\n \n \n
    Expected values defined in extensions
    ") - for r in eranges: - if (not first_range): - out.write("
    ") - first_range = False - defin = "defined in the %s extension" % (makeUrl(r.getHomeLayer(),""),r.getHomeLayer()) - tt = "The '%s' property has values that include instances of the '%s' type." % (node.id, r.id) - out.write("\n %s - %s" % (self.ml(r, r.id, tt, prop="domainIncludes",hashorslash=hashorslash),defin )) - out.write("
    \n\n") - first_domain = True out.write("\n") out.write(" \n \n \n \n\n\n \n \n
    Used on these types
    ") @@ -1118,31 +1142,18 @@ def emitAttributeProperties(self, node, layers="core", out=None, hashorslash="/" if (not first_domain): out.write("
    ") first_domain = False - tt = "The '%s' property is used on the '%s' type." % (node.id, d.id) - out.write("\n %s " % (self.ml(d, d.id, tt, prop="domainIncludes",hashorslash=hashorslash)+"\n" )) + tt = "The '%s' property is used on the '%s' type." % (term.getId(), d.getId()) + out.write("\n %s " % (self.ml(d, d.getId(), tt, prop="domainIncludes",hashorslash=hashorslash)+"\n" )) out.write("
    \n\n") - if len(edomains) > 0: - first_domain = True - out.write("\n") - out.write(" \n \n \n \n\n\n \n \n
    Used on types defined in extensions
    ") - for d in edomains: - if (not first_domain): - out.write("
    ") - first_domain = False - defin = "defined in the %s extension" % (makeUrl(d.getHomeLayer(),""),d.getHomeLayer()) - tt = "The '%s' property is used on the '%s' type." % (node.id, d.id) - out.write("\n %s - %s" % (self.ml(d, d.id, tt, prop="domainIncludes",hashorslash=hashorslash),defin )) - out.write("
    \n\n") - # Sub-properties if (subprops != None and len(subprops) > 0): out.write("\n") out.write(" \n \n \n \n\n") for sp in subprops: - c = ShortenOnSentence(StripHtmlTags( GetComment(sp,layers=layers) ),60) - tt = "%s: ''%s''" % ( sp.id, c) - out.write("\n \n" % (self.ml(sp, sp.id, tt, hashorslash=hashorslash))) + c = ShortenOnSentence(StripHtmlTags( sp.getComment(s) ),60) + tt = "%s: ''%s''" % ( sp.getId(), c) + out.write("\n \n" % (self.ml(sp, sp.getId(), tt, hashorslash=hashorslash))) out.write("\n
    Sub-properties
    %s
    %s
    \n\n") # Super-properties @@ -1150,21 +1161,19 @@ def emitAttributeProperties(self, node, layers="core", out=None, hashorslash="/" out.write("\n") out.write(" \n \n \n \n\n") for sp in superprops: - c = ShortenOnSentence(StripHtmlTags( GetComment(sp,layers=layers) ),60) - tt = "%s: ''%s''" % ( sp.id, c) - out.write("\n \n" % (self.ml(sp, sp.id, tt, hashorslash=hashorslash))) + c = ShortenOnSentence(StripHtmlTags( sp.getComment() ),60) + tt = "%s: ''%s''" % ( sp.getId(), c) + out.write("\n \n" % (self.ml(sp, sp.getId(), tt, hashorslash=hashorslash))) out.write("\n
    Super-properties
    %s
    %s
    \n\n") - self.emitSupersedes(node,layers=layers,out=out,hashorslash=hashorslash) - - def emitSupersedes(self, node, layers="core", out=None, hashorslash="/"): + def emitSupersedes(self, term, out=None, hashorslash="/"): """Write out Supersedes and/or Superseded by for this term""" if not out: out = self - newerprop = node.supersededBy(layers=layers) # None of one. e.g. we're on 'seller'(new) page, we get 'vendor'(old) + newerprop = term.getSupersededBy() # None of one. e.g. we're on 'seller'(new) page, we get 'vendor'(old) #olderprop = node.supersedes(layers=layers) # None or one - olderprops = sorted(node.supersedes_all(layers=layers),key=lambda u: u.id) # list, e.g. 'seller' has 'vendor', 'merchant'. + olderprops = term.getSupersedes() # Supersedes @@ -1173,19 +1182,19 @@ def emitSupersedes(self, node, layers="core", out=None, hashorslash="/"): out.write("
    Supersedes
    %s
    %s
    \n\n") # supersededBy (at most one direct successor) if (newerprop != None): out.write("\n") out.write(" \n \n \n \n\n") - c = ShortenOnSentence(StripHtmlTags( GetComment(newerprop,layers=layers) ),60) - tt = "%s: ''%s''" % ( newerprop.id, c) - out.write("\n \n" % (self.ml(newerprop, newerprop.id, tt))) + c = ShortenOnSentence(StripHtmlTags( newerprop.getComment() ),60) + tt = "%s: ''%s''" % ( newerprop.getId(), c) + out.write("\n \n" % (self.ml(newerprop, newerprop.getId(), tt))) out.write("\n
    supersededBy
    %s
    %s
    \n\n") def rep(self, markup): @@ -1232,17 +1241,17 @@ def handleHomepage(self, node): # TODO: pass in extension, base_domain etc. #sitekeyedhomepage = "homepage %s" % getSiteName() ext = getHostExt() - + if ext == "core": ext = "" - + if len(ext): ext += "." sitekeyedhomepage = "%sindex.html" % ext hp = getPageFromStore(sitekeyedhomepage) self.response.headers['Content-Type'] = "text/html" self.emitCacheHeaders() - if hp != None: + if hp: self.response.out.write( hp ) #log.info("Served datacache homepage.tpl key: %s" % sitekeyedhomepage) else: @@ -1254,7 +1263,7 @@ def handleHomepage(self, node): self.response.out.write( page ) log.debug("Served and cached fresh homepage.tpl key: %s " % sitekeyedhomepage) - setAppVar(CLOUDEXTRAMETA,{'x-goog-meta-sdotermlayer': getHostExt()}) + setAppVar(CLOUDEXTRAMETA,{'x-goog-meta-sdotermlayer': getHostExt()}) PageStore.put(sitekeyedhomepage, page) # self.response.out.write( open("static/index.html", 'r').read() ) return False # - Not caching homepage @@ -1271,8 +1280,39 @@ def getExtendedSiteName(self, layers): def emitSchemaorgHeaders(self, node, ext_mappings='', sitemode="default", sitename="schema.org", layers="core"): self.response.out.write(self.buildSchemaorgHeaders(node, ext_mappings, sitemode, sitename, layers)) - - + + + def buildSiteHeaders(self, term, ext_mappings='', sitemode="default", sitename="schema.org"): + """ + Generates, caches and emits HTML headers for class, property and enumeration pages. Leaves open. + + * entry = name of the class or property + """ + buff = sdoStringIO() + + rdfs_type = 'rdfs:Class' + entry = term.id + if term.isProperty(): + rdfs_type = 'rdfs:Property' + + desc = entry + desc = self.getMetaDescription(term, lengthHint=200) + + template_values = { + 'entry': str(entry), + 'desc' : desc, + 'menu_sel': "Schemas", + 'rdfs_type': rdfs_type, + 'ext_mappings': ext_mappings, + 'noindexpage': noindexpages + } + out = templateRender('genericTermPageHeader.tpl', term, template_values) + buff.write(out) + + ret = buff.getvalue() + buff.close() + return ret + def buildSchemaorgHeaders(self, node, ext_mappings='', sitemode="default", sitename="schema.org", layers="core"): """ Generates, caches and emits HTML headers for class, property and enumeration pages. Leaves open. @@ -1325,22 +1365,22 @@ def buildSchemaorgHeaders(self, node, ext_mappings='', sitemode="default", siten buff.close() return ret - def getMetaDescription(self, node, layers="core",lengthHint=250): + def getMetaDescription(self, term, layers="core",lengthHint=250): ins = "" - if node.isEnumeration(): + if term.isEnumeration(): ins += " Enumeration Type" - elif node.isClass(): + elif term.isClass(): ins += " Type" - elif node.isAttribute(): + elif term.isProperty(): ins += " Property" - elif node.isEnumerationValue(): + elif term.isEnumerationValue(): ins += " Enumeration Value" - desc = "Schema.org%s: %s - " % (ins, node.id) + desc = "Schema.org%s: %s - " % (ins, term.id) lengthHint -= len(desc) - comment = GetComment(node, layers) + comment = term.getComment() desc += ShortenOnSentence(StripHtmlTags(comment),lengthHint) @@ -1351,151 +1391,60 @@ def appropriateLayers(self,layers="core"): return ALL_LAYERS return ALL_LAYERS_NO_ATTIC - def emitExactTermPage(self, node, layers="core"): + def emitExactTermPage(self, term, layers="core"): """Emit a Web page that exactly matches this node.""" - log.debug("EXACT PAGE: %s" % node.id) + log.info("EXACT PAGE: %s" % term.getId()) self.outputStrings = [] # blank slate - ext_mappings = GetExtMappingsRDFa(node, layers=layers) - - global sitemode #,sitename - if ("schema.org" not in self.request.host and sitemode == "mainsite"): - sitemode = "mainsite testsite" - -# self.emitSchemaorgHeaders(node, ext_mappings, sitemode, getSiteName(), layers) - - cached = getPageFromStore(node.id) + cached = getPageFromStore(term.getId()) if (cached != None): - log.info("GOT CACHED page for %s" % node.id) + log.info("GOT CACHED page for %s" % term.getId()) self.response.write(cached) return log.info("Building page") - - self.write(self.buildSchemaorgHeaders(node, ext_mappings, sitemode, getSiteName(), layers)) - - self.parentStack = [] - self.GetParentStack(node, layers=self.appropriateLayers(layers=layers)) - - self.emitUnitHeaders(node, layers=layers) # writes

    ... - - if (node.isClass(layers=layers)): - subclass = True - for p in self.parentStack: - self.ClassProperties(p, p==self.parentStack[0], layers=self.appropriateLayers(layers=layers)) - if (not node.isDataType(layers=layers) and node.id != "DataType"): + + ext_mappings = GetExtMappingsRDFa(term) + self.write(self.buildSiteHeaders(term, ext_mappings, sitemode, getSiteName())) + + log.info("Done buildSiteHeaders") + + self.emitUnitHeaders(term) # writes

    ... + stack = self._removeStackDupes(term.getTermStack()) + setAppVar("tableHdr",False) + if term.isClass(): + for p in stack: + self.ClassProperties(p, p==[0], out=self, term=term) + if getAppVar("tableHdr"): self.write("\n\n
    \n\n") - self.emitClassIncomingProperties(node, layers=layers) - - self.emitClassExtensionSuperclasses(node,layers) - - self.emitClassExtensionProperties(p,layers) - - self.emitSupersedes(node,layers=layers) - - - elif (Unit.isAttribute(node, layers=layers)): - self.emitAttributeProperties(node, layers=layers) - - if (node.isClass(layers=layers)): - children = [] - children = GetSources(Unit.GetUnit("rdfs:subClassOf"), node, self.appropriateLayers(layers))# Normal subclasses - if(node.isDataType() or node.id == "DataType"): - children += GetSources(Unit.GetUnit("rdf:type"), node, self.appropriateLayers(layers))# Datatypes - children = sorted(children, key=lambda u: u.id) - - if (len(children) > 0): - buff = StringIO.StringIO() - extbuff = StringIO.StringIO() - - firstext=True - for c in children: - if c.superseded(layers=layers): - continue - if inLayer(layers, c): - buff.write("
  • %s
  • " % (self.ml(c))) - else: - sep = ", " - if firstext: - sep = "" - firstext=False - extbuff.write("%s%s" % (sep,self.ml(c)) ) - - if (len(buff.getvalue()) > 0): - if node.isDataType(): - self.write("
    More specific DataTypes
      ") - else: - self.write("
      More specific Types
        ") - self.write(buff.getvalue()) - self.write("
      ") + + + self.emitClassIncomingProperties(term) - if (len(extbuff.getvalue()) > 0): - self.write("

      More specific Types available in extensions

      • ") - self.write(extbuff.getvalue()) - self.write("
      ") - buff.close() - extbuff.close() + #self.emitClassExtensionSuperclasses(node,layers) - if (node.isEnumeration(layers=layers)): + #self.emitClassExtensionProperties(p,layers) - children = sorted(GetSources(Unit.GetUnit("rdf:type"), node, self.appropriateLayers(layers)), key=lambda u: u.id) - if (len(children) > 0): - buff = StringIO.StringIO() - extbuff = StringIO.StringIO() - - firstext=True - for c in children: - if inLayer(layers, c): - buff.write("
    • %s
    • " % (self.ml(c))) - else: - sep = "," - if firstext: - sep = "" - firstext=False - extbuff.write("%s%s" % (sep,self.ml(c)) ) + elif term.isProperty(): + self.emitAttributeProperties(term) + + elif term.isDataType(): + self.emitClassIncomingProperties(term) + + self.emitSupersedes(term) + self.emitchildren(term) + self.emitAcksAndSources(term) + self.emitTermExamples(term) + + self.write("
      \n\n\n\n\n" % (getAppEngineVersion(),appver)) - if (len(buff.getvalue()) > 0): - self.write("

      Enumeration members
        ") - self.write(buff.getvalue()) - self.write("
      ") + page = "".join(self.outputStrings) + setAppVar(CLOUDEXTRAMETA,{'x-goog-meta-sdotermlayer': term.getLayer()}) + PageStore.put(term.getId(),page) - if (len(extbuff.getvalue()) > 0): - self.write("

      Enumeration members available in extensions

      • ") - self.write(extbuff.getvalue()) - self.write("
      ") - buff.close() - extbuff.close() - - ackorgs = GetTargets(Unit.GetUnit("dc:source"), node, layers=layers) - if (len(ackorgs) > 0): - sources = [] - acknowledgements =[] - for ao in ackorgs: - acks = sorted(GetTargets(Unit.GetUnit("rdfs:comment"), ao, layers)) - if len(acks) == 0: - val = str(ao) - if val.startswith("http://") or val.startswith("https://"): - val = "[%s](%s)" % (val,val) #Put into markdown format - sources.append(val) - else: - for ack in acks: - acknowledgements.append(ack) - - if len(sources) > 0: - s = "" - if len(sources) > 1: - s = "s" - self.write("

      Source%s

      \n" % s) - for so in sorted(sources): - self.write(Markdown.parse(so,True)) - if len(acknowledgements) > 0: - s = "" - if len(acknowledgements) > 1: - s = "s" - self.write("

      Acknowledgement%s

      \n" % s) - for ack in sorted(acknowledgements): - self.write(Markdown.parse(str(ack),True)) - - examples = GetExamples(node, layers=layers) + self.response.write(page) + + def emitTermExamples(self,term): + examples = GetExamples(term) log.debug("Rendering n=%s examples" % len(examples)) if (len(examples) > 0): example_labels = [ @@ -1508,8 +1457,8 @@ def emitExactTermPage(self, node, layers="core"): exNum = 0 for ex in sorted(examples, key=lambda u: u.keyvalue): - if not ex.egmeta["layer"] in layers: #Example defined in extension we are not in - continue + #if not ex.egmeta["layer"] in layers: #Example defined in extension we are not in + #continue exNum += 1 id="example-%s" % exNum if "id" in ex.egmeta: @@ -1525,27 +1474,66 @@ def emitExactTermPage(self, node, layers="core"): self.write("
      %s
      \n\n" % (example_type, selected, self.rep(ex.get(example_type)))) self.write("\n\n") + + + def _removeStackDupes(self,stack): + cleanstack = [] + i = len(stack) + while i: + i -= 1 + if not stack[i] in cleanstack: + cleanstack.insert(0,stack[i]) + + return cleanstack + + def emitAcksAndSources(self,term): + sources = term.getSources() + if len(sources): + s = "" + if len(sources) > 1: + s = "s" + self.write("

      Source%s

      \n" % s) + for val in sources: + if val.startswith("http://") or val.startswith("https://"): + val = "[%s](%s)" % (val,val) #Put into markdown format + self.write(Markdown.parse(val,True)) + + acknowledgements = term.getAcknowledgements() + if len(acknowledgements): + s = "" + if len(acknowledgements) > 1: + s = "s" + self.write("

      Acknowledgement%s

      \n" % s) + for ack in sorted(acknowledgements): + self.write(Markdown.parse(str(ack),True)) + + + def emitchildren(self,term): + children = term.getSubs() + + log.info("CILDREN: %s" % children) - self.write("

      Schema Version %s.

      \n\n" % SCHEMA_VERSION) - # TODO: add some version info regarding the extension - - # Analytics - self.write("""""") - - - self.write(" \n\n\n\n\n" % (getAppEngineVersion(),appver)) - - page = "".join(self.outputStrings) - setAppVar(CLOUDEXTRAMETA,{'x-goog-meta-sdotermlayer': node.home}) - PageStore.put(node.id,page) + if (len(children) > 0): + buff = StringIO.StringIO() + for c in children: + if c.superseded() or self.hideAtticTerm(c): + continue + buff.write("
    • %s
    • " % (self.ml(c))) -# self.response.write(self.AddCachedText(node, self.outputStrings, layers)) - self.response.write(page) + if (len(buff.getvalue()) > 0): + if term.isDataType(): + self.write("
      More specific DataTypes
        ") + elif term.isClass() or term.isEnumerationValue(): + self.write("
        More specific Types
          ") + elif term.isProperty(): + self.write("
          Sub-properties
            ") + elif term.isEnumeration(): + self.write("
            Enumeration members
              ") + self.write(buff.getvalue()) + self.write("
            ") + buff.close() + def emitHTTPHeaders(self, node): if ENABLE_CORS: self.response.headers.add_header("Access-Control-Allow-Origin", "*") # entire site is public. @@ -1590,9 +1578,9 @@ def handleJSONContext(self, node): jsonldcontext = getPageFromStore(label) if not jsonldcontext: jsonldcontext = GetJsonLdContext(layers=ALL_LAYERS) - + PageStore.put(label,jsonldcontext) - + if PAGESTOREMODE == "CLOUDSTORE": cloudstoreStoreContent("docs/jsonldcontext.json", jsonldcontext, "html") cloudstoreStoreContent("docs/jsonldcontext.json.txt", jsonldcontext, "html") @@ -1639,7 +1627,7 @@ def handleSchemasPage(self, node, layerlist='core'): def handleDumpsPage(self, node, layerlist='core'): self.response.headers['Content-Type'] = "text/html" self.emitCacheHeaders() - + page = getPageFromStore(node) if page: @@ -1660,9 +1648,13 @@ def handleDumpsPage(self, node, layerlist='core'): return True def getCounts(self): + log.info("counts") typesCount = str(countTypes(extension="core")) + log.info("TYPES %s" % typesCount) propsCount = str(countProperties(extension="core")) + log.info("PROPS %s" % propsCount) enumCount = str(countEnums(extension="core")) + log.info("ENUMS %s" % enumCount) text = "" text += "The core vocabulary currently consists of %s Types, " % typesCount @@ -1700,51 +1692,54 @@ def handleFullHierarchyPage(self, node, layerlist='core'): extonlylist.append(i) count += 1 local_button = "" - local_label = "

            Core vocabulary

            " - if count == 0: - local_button = "Core vocabulary" - elif count == 1: - local_button = "Core plus %s extension" % extlist - else: - local_button = "Core plus %s extensions" % extlist - + #local_label = "

            Core vocabulary

            " + local_label = "" ext_button = "" - if count == 1: - ext_button = "Extension %s" % extlist - elif count > 1: - ext_button = "Extensions %s" % extlist - - uThing = Unit.GetUnit("Thing") - uDataType = Unit.GetUnit("DataType") + tops = self.gettops() + full_thing_tree = "" + thing_tree = "" + datatype_tree = "" + first = True + dtcount = 0 + tcount = 0 mainroot = TypeHierarchyTree(local_label) - mainroot.traverseForHTML(uThing, layers=layerlist, idprefix="C.", urlprefix=urlprefix) - thing_tree = mainroot.toHTML() - - fullmainroot = TypeHierarchyTree("

            Core plus all extension vocabularies

            ") - fullmainroot.traverseForHTML(uThing, layers=ALL_LAYERS_NO_ATTIC, idprefix="CE.", urlprefix=urlprefix) - full_thing_tree = fullmainroot.toHTML() - - ext_thing_tree = None - if len(extonlylist) > 0: - extroot = TypeHierarchyTree("

            Extension: %s

            " % extlist) - extroot.traverseForHTML(uThing, layers=extonlylist, traverseAllLayers=True, idprefix="E.", urlprefix=urlprefix) - ext_thing_tree = extroot.toHTML() - dtroot = TypeHierarchyTree("

            Data Types

            ") - dtroot.traverseForHTML(uDataType, layers=layerlist, idprefix="D.", urlprefix=urlprefix) - datatype_tree = dtroot.toHTML() + for t in tops: + if not first: + local_label = "" + first = False + top = VTerm.getTerm(t) + if top.isDataType() or top.getUri() == "http://schema.org/DataType": + dtcount += 1 + dtroot.traverseForHTML(top, layers=layerlist, idprefix="D.", urlprefix=urlprefix) + else: + tcount += 1 + mainroot.traverseForHTML(top, layers=layerlist, idprefix="C.", urlprefix=urlprefix, traverseAllLayers=True) + if dtcount: + datatype_tree += dtroot.toHTML() + if tcount: + full_thing_tree += mainroot.toHTML() + + #fullmainroot = TypeHierarchyTree("

            Core plus all extension vocabularies

            ") + #fullmainroot.traverseForHTML(uThing, layers=ALL_LAYERS_NO_ATTIC, idprefix="CE.", urlprefix=urlprefix) + #full_thing_tree = fullmainroot.toHTML() + + ext_thing_tree = "" + #if len(extonlylist) > 0: + #extroot = TypeHierarchyTree("

            Extension: %s

            " % extlist) + #extroot.traverseForHTML(uThing, layers=extonlylist, traverseAllLayers=True, idprefix="E.", urlprefix=urlprefix) + #ext_thing_tree = extroot.toHTML() + + #dtroot = TypeHierarchyTree("

            Data Types

            ") + #dtroot.traverseForHTML(uDataType, layers=layerlist, idprefix="D.", urlprefix=urlprefix) + #datatype_tree = dtroot.toHTML() full_button = "Core plus all extension vocabularies" - page = templateRender('full.tpl', node, { 'thing_tree': thing_tree, - 'full_thing_tree': full_thing_tree, - 'ext_thing_tree': ext_thing_tree, + page = templateRender('full.tpl', node, { 'full_thing_tree': full_thing_tree, 'datatype_tree': datatype_tree, - 'local_button': local_button, - 'full_button': full_button, - 'ext_button': ext_button, 'menu_sel': "Schemas"}) self.response.out.write( page ) @@ -1753,12 +1748,15 @@ def handleFullHierarchyPage(self, node, layerlist='core'): return True + def gettops(self): + return rdfgettops() + def handleJSONSchemaTree(self, node, layerlist='core'): """Handle a request for a JSON-LD tree representation of the schemas (RDFS-based).""" - + if isinstance(node, Unit): node = node.id - + self.response.headers['Content-Type'] = "application/ld+json" self.emitCacheHeaders() @@ -1811,9 +1809,11 @@ def checkConneg(self,node): def handleExactTermPage(self, node, layers='core'): - if node.startswith("http://schema.org/"): #Special case will map full schema URI to the term name - node = node[18:] - + baseuri = SdoConfig.baseUri() + + if node.startswith(baseuri): #Special case will map full schema URI to the term name + node = node[len(baseuri):] + """Handle with requests for specific terms like /Person, /fooBar. """ dataext = os.path.splitext(node) if dataext[1] in OUTPUTDATATYPES: @@ -1822,77 +1822,57 @@ def handleExactTermPage(self, node, layers='core'): return True if self.checkConneg(node): return True + log.info("GETTING TERM: %s" % node) + term = VTerm.getTerm(node) - schema_node = Unit.GetUnit(node) # e.g. "Person", "CreativeWork". - if not schema_node: #Not a recognised term + if not term: return False - - if not self.checkNodeExt(schema_node): + + if not self.checkNodeExt(term): return False - - self.response.headers['Content-Type'] = "text/html" - self.emitCacheHeaders() - - - if inLayer(layers, schema_node): - self.emitExactTermPage(schema_node, layers=layers) + + if not SUBDOMAINS or term.inLayers(layers): + self.emitExactTermPage(term, layers=layers) return True - else: - # log.info("Looking for node: %s in layers: %s" % (node.id, ",".join(all_layers.keys() )) ) - if not ENABLE_HOSTED_EXTENSIONS: - return False - if schema_node is not None and schema_node.id in all_terms:# look for it in other layers - extensions = [] - ext = {} - ext['href'] = makeUrl(schema_node.getHomeLayer(),schema_node.id,full=True) - ext['text'] = schema_node.getHomeLayer() - extensions.append(ext) - #self.response.out.write("
          • %s
          • " % (makeUrl(x,schema_node.id), x) ) - - template = JINJA_ENVIRONMENT.get_template('wrongExt.tpl') - page = templateRender('wrongExt.tpl', node, - {'target': schema_node.id, - 'targetext': schema_node.getHomeLayer(), - 'extensions': extensions, - 'sitename': "schema.org"}) - - self.response.out.write( page ) - log.debug("Serving fresh wrongExtPage.") - return True - else: - log.debug("No unit identified for node: %s" % node) - return False - - log.info("Should not have reached here!!") - def checkNodeExt(self,node): - if os.environ['STAYINEXTENTION'] and os.environ['STAYINEXTENTION'] == "True": + def checkNodeExt(self,term): + if os.environ.get('STAYINEXTENTION',"False").lower() == "true": return True - - home = node.home + + home = term.getLayer() ext = getHostExt() - log.info("node: '%s' home: '%s' ext: '%s'" % (node,home,ext)) + log.info("term: '%s' home: '%s' ext: '%s'" % (term,home,ext)) + log.info("Supers: %s" % term.getSupers()) if home == CORE and ext == '': return True - if home == ext: - return True - - if home == CORE: - log.info("Redirecting to core entity") - self.redirectToBase(node.id,full=True) - else: - log.info("Redirecting to '%s' entity" % home) - self.redirectToExt(node.id,ext=home, full=True) - return False + if SUBDOMAINS: + log.info("Checking for correct subdomain") + if home == ext: + return True + + if home == CORE: + log.info("Redirecting to core entity") + self.redirectToBase(term.getId(),full=True) + else: + log.info("Redirecting to '%s' entity" % home) + self.redirectToExt(term.getId(),ext=home, full=True) + return False + else: #SUBDOMAINS == False + if ext == '': + return True + else: + log.info("SUBDOMAINS dissabled - Redirecting to core entity") + self.redirectToBase(term.getId(),full=True) + return False def handleExactTermDataOutput(self, node=None, outputtype=None): log.info("handleExactTermDataOutput Node: '%s' Outputtype: '%s'" % (node, outputtype)) ret = False file = None if node and outputtype: - schema_node = Unit.GetUnit(node) - if schema_node: + term = VTerm.getTerm(node) + if term: ret = True index = "%s:%s" % (outputtype,node) data = getPageFromStore(index) @@ -1903,7 +1883,7 @@ def handleExactTermDataOutput(self, node=None, outputtype=None): if outputtype == ".csv": self.response.headers['Content-Type'] = "text/csv; charset=utf-8" if not data: - data = self.emitcsvTerm(schema_node,excludeAttic) + data = self.emitcsvTerm(term,excludeAttic) PageStore.put(index,data) else: format = None @@ -1930,16 +1910,16 @@ def handleExactTermDataOutput(self, node=None, outputtype=None): ret = True return ret - def emitcsvTerm(self,schema_node,excludeAttic=True): + def emitcsvTerm(self,term,excludeAttic=True): csv = sdordf2csv(queryGraph=getQueryGraph(),fullGraph=getQueryGraph(),markdownComments=True,excludeAttic=excludeAttic) file = StringIO.StringIO() - term = "http://schema.org/" + schema_node.id - if schema_node.isClass() or schema_node.isEnumerationValue(): + termUri = term.getUri() + if term.isClass() or term.isEnumerationValue(): csv.type2CSV(header=True,out=file) - csv.type2CSV(term=term,header=False,out=file) - elif schema_node.isAttribute(): + csv.type2CSV(term=termUri,header=False,out=file) + elif term.isProperty(): csv.prop2CSV(header=True,out=file) - csv.prop2CSV(term=term,header=False,out=file) + csv.prop2CSV(term=termUri,header=False,out=file) data = file.getvalue() file.close() return data @@ -1952,7 +1932,7 @@ def handle404Failure(self, node, layers="core", extrainfo=None, suggest=True): self.response.out.write('') self.response.out.write('') self.response.out.write('') - + self.response.out.write('

            404 Not Found.


            Page not found. Please try the homepage.

            ') if suggest: @@ -1960,13 +1940,13 @@ def handle404Failure(self, node, layers="core", extrainfo=None, suggest=True): log.debug("404: clean_node: clean_node: %s node: %s" % (clean_node, node)) - base_term = Unit.GetUnit( node.rsplit('/')[0] ) + base_term = VTerm.getTerm( node.rsplit('/')[0] ) if base_term != None : - self.response.out.write('
            Perhaps you meant: %s


            ' % ( base_term.id, base_term.id )) + self.response.out.write('
            Perhaps you meant: %s


            ' % ( base_term.getId(), base_term.getId() )) - base_actionprop = Unit.GetUnit( node.rsplit('-')[0] ) + base_actionprop = VTerm.getTerm( node.rsplit('-')[0] ) if base_actionprop != None : - self.response.out.write('
            Looking for an Action-related property? Note that xyz-input and xyz-output have special meaning. See also: %s


            ' % ( base_actionprop.id, base_actionprop.id )) + self.response.out.write('
            Looking for an Action-related property? Note that xyz-input and xyz-output have special meaning. See also: %s


            ' % ( base_actionprop.getId(), base_actionprop.getId() )) if extrainfo: self.response.out.write("
            %s
            " % extrainfo) @@ -2004,7 +1984,7 @@ def handleFullReleasePage(self, node, layerlist='core'): return True else: log.debug("Serving tocversionPage from cache.") - page = templateRender('tocVersionPage.tpl', node, + page = templateRender('tocVersionPage.tpl', node, {"releases": sorted(releaselog.iterkeys()), "menu_sel": "Schemas"}) @@ -2068,8 +2048,6 @@ def handleFullReleasePage(self, node, layerlist='core'): az_prop_meta = {} -#TODO: ClassProperties (self, cl, subclass=False, layers="core", out=None, hashorslash="/"): - # TYPES for t in az_types: props4type = HTMLOutput() # properties applicable for a type @@ -2078,7 +2056,6 @@ def handleFullReleasePage(self, node, layerlist='core'): self.emitSimplePropertiesPerType(t, out=props4type, hashorslash="#term_" ) self.emitSimplePropertiesIntoType(t, out=props2type, hashorslash="#term_" ) - #self.ClassProperties(t, out=typeInfo, hashorslash="#term_" ) tcmt = Markup(GetComment(t)) az_type_meta[t]={} az_type_meta[t]['comment'] = tcmt @@ -2109,7 +2086,7 @@ def handleFullReleasePage(self, node, layerlist='core'): else: releasedate = releaselog[str(SCHEMA_VERSION)] - page = templateRender('fullReleasePage.tpl', node, + page = templateRender('fullReleasePage.tpl', node, {"base_href": base_href, 'thing_tree': thing_tree, 'liveversion': SCHEMA_VERSION, @@ -2132,12 +2109,12 @@ def handleExtensionContents(self,ext): # return getPageFromStore('ExtensionContents',ext) buff = StringIO.StringIO() - - az_terms = GetAllTerms(ext) #Returns sorted by id results. + + az_terms = VTerm.getAllTerms(layer=ext) #Returns sorted by id results. az_terms.sort(key = lambda u: u.category) if len(az_terms) > 0: - buff.write("

            Terms defined or referenced in the '%s' extension.

            " % ext) + buff.write("

            Terms defined in the '%s' extension.

            " % ext) keys = [] groups = [] @@ -2171,11 +2148,11 @@ def handleExtensionContents(self,ext): def countTypes(self,interms,select="",layers='core'): ret = 0 for t in interms: - if select == "type" and t.isClass(layers): + if select == "type" and t.isClass(): ret += 1 - elif select == "prop" and t.isAttribute(layers): + elif select == "prop" and t.isProperty(): ret += 1 - elif select == "enum" and t.isEnumerationValue(layers): + elif select == "enum" and t.isEnumerationValue(): ret +=1 elif select == "": ret += 1 @@ -2190,11 +2167,11 @@ def listTerms(self,interms,prefix="",select=None,layers='core'): for t in interms: use = False if select == "type": - use = t.isClass(layers) + use = t.isClass() elif select == "prop": - use = t.isAttribute(layers) + use = t.isProperty() elif select == "enum": - use = t.isEnumerationValue(layers) + use = t.isEnumerationValue() if use: terms.append(t) @@ -2252,7 +2229,7 @@ def setupHostinfo(self, node, test=""): log.info("setupHostinfo: data: scheme='%s' hoststring='%s' initial host_ext='%s'" % (scheme, hostString, str(host_ext) )) - + ver=None if not getInTestHarness(): from google.appengine.api.modules.modules import get_current_version_name @@ -2261,7 +2238,7 @@ def setupHostinfo(self, node, test=""): if host_ext != "": if host_ext in ENABLED_EXTENSIONS: mybasehost = mybasehost[len(host_ext) + 1:] - + elif host_ext == "www": mybasehost = mybasehost[4:] setBaseHost(mybasehost) @@ -2350,25 +2327,25 @@ def get(self, node): if not node or node == "": node = "/" - + if not validNode_re.search(str(node)): #invalid node name log.warning("Invalid node name '%s'" % str(node)) self.handle404Failure(node,suggest=False) return - NotModified = False + NotModified = False matchTag = self.request.headers.get("If-None-Match",None) unMod = self.request.headers.get("If-Unmodified-Since",None) - + #log.info("matchTag '%s' unMod '%s'" % (matchTag,unMod)) - + hdrIndex = getHostExt() if len(hdrIndex): hdrIndex += ":" hdrIndex += node - + hdrs = HeaderStore.get(hdrIndex) - + if hdrs: etag = hdrs.get("ETag",None) mod = hdrs.get("Last-Modified",None) @@ -2399,11 +2376,16 @@ def get(self, node): if self.response.status.startswith("200"): stat = getAppVar(CLOUDSTAT) log.info("CLOUDSTAT %s" % stat) - + if stat: #Use values from cloud storage self.response.headers.add_header("ETag", stat.etag) self.response.headers['Last-Modified'] = time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(stat.st_ctime)) - else: + self.response.headers['Content-Type'] = stat.content_type + else: + if not self.response.headers.get('Content-Type',None): + mimetype, contentType = mimetypes.guess_type(node) + self.response.headers['Content-Type'] = mimetype + self.response.headers.add_header("ETag", getslug() + str(hash(hdrIndex))) self.response.headers['Last-Modified'] = getmodiftime().strftime("%a, %d %b %Y %H:%M:%S GMT") @@ -2455,6 +2437,14 @@ def _get(self, node, doWarm=True): setSiteName(self.getExtendedSiteName(layerlist)) # e.g. 'bib.schema.org', 'schema.org' log.debug("EXT: set sitename to %s " % getSiteName()) + if not LOADEDSOURCES: + log.info("Instance[%s] received request for not stored page: %s" % (getInstanceId(short=True), node) ) + log.info("Instance[%s] needs to load sources to create it" % (getInstanceId(short=True)) ) + load_sources() #Get Examples files and schema definitions + + if node.startswith("docs/"): + return self._getDocs(node,layerlist=layerlist) + if(node == "_ah/warmup"): if "localhost" in os.environ['SERVER_NAME'] and WarmupState.lower() == "auto": log.info("[%s] Warmup dissabled for localhost instance" % getInstanceId(short=True)) @@ -2473,8 +2463,30 @@ def _get(self, node, doWarm=True): #global Warmer #if not WarmedUp: #Warmer.stepWarm(self) + + self.emitHTTPHeaders(node) #Ensure we have the right basic header values + + if(node == "admin/refresh"): + log.info("Processing refesh request") + load_start = datetime.datetime.now() + memcache.flush_all() + memcache.set(key="app_initialising", value=True, time=300) #Give the system 5 mins - auto remove flag in case of crash + cleanmsg = CacheControl.clean() + log.info("Clean count(s): %s" % cleanmsg) + log.info(("[%s] Cache clean took %s " % (getInstanceId(short=True),(datetime.datetime.now() - load_start)))) + memcache.set(key="app_initialising", value=False) + storeInitialisedTimestamp() + self.emitSchemaorgHeaders("Refresh") + #404 could be called from any path, so output all potential locations of schemaorg.css + self.response.out.write('') + self.response.out.write('') + self.response.out.write('') + + self.response.out.write('

            Refresh Completed

            Took: %s

            ' % (datetime.datetime.now() - load_start)) + return False + if(node == "_ah/start"): log.info("Instance[%s] received Start request at %s" % (modules.get_current_instance_id(), global_vars.time_start) ) @@ -2497,56 +2509,11 @@ def _get(self, node, doWarm=True): memcache.flush_all() return False - if not LOADEDSOURCES: - log.info("Instance[%s] received request for not stored page: %s" % (getInstanceId(short=True), node) ) - log.info("Instance[%s] needs to load sources to create it" % (getInstanceId(short=True)) ) - load_sources() #Get Examples files and schema definitions - if (node in ["", "/"]): return self.handleHomepage(node) - hstext = getHostExt() - if hstext == "": - hstext = "core" - - if (node.startswith("docs/") and hstext != "core"): #All docs should operate in core - self.redirectToBase(node,True) - - if node in ["docs/jsonldcontext.json.txt", "docs/jsonldcontext.json"]: - if self.handleJSONContext(node): - return True - else: - log.info("Error handling JSON-LD context: %s" % node) - return False - - if (node == "docs/full.html"): - if self.handleFullHierarchyPage(node, layerlist=layerlist): - return True - else: - log.info("Error handling full.html : %s " % node) - return False - - if (node == "docs/schemas.html"): - if self.handleSchemasPage(node, layerlist=layerlist): - return True - else: - log.info("Error handling schemas.html : %s " % node) - return False - if (node == "docs/developers.html"): - if self.handleDumpsPage(node, layerlist=layerlist): - return True - else: - log.info("Error handling developers.html : %s " % node) - return False - - if (node == "docs/tree.jsonld" or node == "docs/tree.json"): - if self.handleJSONSchemaTree(node, layerlist=ALL_LAYERS): - return True - else: - log.info("Error handling JSON-LD schema tree: %s " % node) - return False currentVerPath = "version/%s" % SCHEMA_VERSION - + if(node.startswith("version/latest")): newurl = "%s%s" % (currentVerPath,node[14:]) log.info("REDIRECTING TO: %s" % newurl) @@ -2581,7 +2548,7 @@ def _get(self, node, doWarm=True): self.handle404Failure(node,extrainfo=inf) return False - + # Pages based on request path matching a Unit in the term graph: if self.handleExactTermPage(node, layers=layerlist): return True @@ -2595,6 +2562,65 @@ def _get(self, node, doWarm=True): log.info("Error handling 404.") return False + def _getDocs(self, node, layerlist=""): + hstext = getHostExt() + if hstext == "": + hstext = "core" + + if (node.startswith("docs/") and hstext != "core"): #All docs should operate in core + return self.redirectToBase(node,True) + + if node in ["docs/jsonldcontext.json.txt", "docs/jsonldcontext.json"]: + if self.handleJSONContext(node): + return True + else: + log.info("Error handling JSON-LD context: %s" % node) + return False + + elif (node == "docs/full.html"): + if self.handleFullHierarchyPage(node, layerlist=layerlist): + return True + else: + log.info("Error handling full.html : %s " % node) + return False + + elif (node == "docs/schemas.html"): + if self.handleSchemasPage(node, layerlist=layerlist): + return True + else: + log.info("Error handling schemas.html : %s " % node) + return False + elif (node == "docs/developers.html"): + if self.handleDumpsPage(node, layerlist=layerlist): + return True + else: + log.info("Error handling developers.html : %s " % node) + return False + + elif (node == "docs/tree.jsonld" or node == "docs/tree.json"): + if self.handleJSONSchemaTree(node, layerlist=ALL_LAYERS): + return True + else: + log.info("Error handling JSON-LD schema tree: %s " % node) + return False + else: #Asking for a sttic file under docs + return self.handleStaticDoc(node) + + def handleStaticDoc(self,node): + if PAGESTOREMODE == "CLOUDSTORE": + log.info("Asking for: %s" % node) + page = getPageFromStore(node,enableFlush=False) + if page: + self.response.out.write( page ) + log.debug("Serving static page: %s" % node) + return True + else: + self.handle404Failure(node) + return False + + + return False + def siteDebug(self): global STATS page = templateRender('siteDebug.tpl', "_siteDebug" ) @@ -2714,11 +2740,11 @@ def stepWarm(self, unit=None, layer=None): realHostExt = getHostExt() if layer: setHostExt(layer) - + self._stepWarm(unit=unit, layer=layer) - + setHostExt(realHostExt) - + def _stepWarm(self, unit=None, layer=None): global WarmedUp @@ -2765,13 +2791,78 @@ def warmAll(self,unit): Warmer = WarmupTool() +def getExtenstionDescriptions(): + extComment = "" + extVers = "" + extName = "" + extDD = "" + ex = getHostExt() + if ex and len(ex): + descs = api.SdoConfig.descriptor(ex) + if descs and len(descs): + extName = descs[0].get("name") + extDD = Markdown.parse(descs[0].get("brief")) + extVers = Markdown.parse(descs[0].get("version")) + extComment = Markdown.parse(descs[0].get("comment")) + + return extName, extDD, extVers, extComment + def templateRender(templateName, node, values=None): global sitemode #,sitename #log.info("templateRender(%s,%s,%s)" % (templateName, node, values)) + #log.info("getHostExt %s" % getHostExt()) + + + if isinstance(node, Unit): + node = node.id + if isinstance(node, VTerm): + node = node.getId() + + extName, extDD, extVers, extComment = getExtenstionDescriptions() + + if node.startswith("docs/"): + docsdir = "./" + homedir = ".." + else: + docsdir = "docs/" + homedir = "." + defvars = { + 'ENABLE_HOSTED_EXTENSIONS': ENABLE_HOSTED_EXTENSIONS, + 'SCHEMA_VERSION': SCHEMA_VERSION, + 'appengineVersion': getAppEngineVersion(), + 'debugging': getAppVar('debugging'), + 'docsdir': docsdir, + 'extComment': extComment, + 'extDD': extDD, + 'extName': extName, + 'extVers': extVers, + 'extensionPath': makeUrl(getHostExt(),"",full=True), + 'homedir': homedir, + 'host_ext': getHostExt(), + 'mybasehost': getBaseHost(), + 'myhost': getHost(), + 'myport': getHostPort(), + 'sitemode': sitemode, + 'sitename': SdoConfig.getname(), + 'staticPath': homedir, + 'targethost': makeUrl("","",full=True), + 'vocabUri': SdoConfig.vocabUri() + } + + if values: + defvars.update(values) + template = JINJA_ENVIRONMENT.get_template(templateName) + return template.render(defvars) +def oldtemplateRender(templateName, node, values=None): + global sitemode #,sitename + log.info("templateRender(%s,%s,%s)" % (templateName, node, values)) + log.info("getHostExt %s" % getHostExt()) + + if isinstance(node, Unit): node = node.id - + extDef = Unit.GetUnit(getNss(getHostExt()),True) extComment = "" extVers = "" @@ -2779,9 +2870,9 @@ def templateRender(templateName, node, values=None): #log.info("EXDEF '%s'" % extDef) if extDef: extComment = GetComment(extDef,ALL_LAYERS) - if extComment == "No comment": + if extComment == "-": extComment = "" - extDDs = GetTargets(Unit.GetUnit("disambiguatingDescription", True), extDef, layers=ALL_LAYERS ) + extDDs = GetTargets(Unit.GetUnit("schema:disambiguatingDescription", True), extDef, layers=ALL_LAYERS ) if len(extDDs) > 0: extDD = Markdown.parse(extDDs[0]) else: @@ -2796,7 +2887,7 @@ def templateRender(templateName, node, values=None): extVers += Markdown.parse(ver) if len(extVers) : extVers += ")" - nms = GetTargets(Unit.GetUnit("name", True), extDef, layers=ALL_LAYERS ) + nms = GetTargets(Unit.GetUnit("schema:name", True), extDef, layers=ALL_LAYERS ) if len(nms) > 0: extName = nms[0] if node.startswith("docs/"): @@ -2808,8 +2899,9 @@ def templateRender(templateName, node, values=None): defvars = { 'ENABLE_HOSTED_EXTENSIONS': ENABLE_HOSTED_EXTENSIONS, 'SCHEMA_VERSION': SCHEMA_VERSION, + 'SUBDOMAINS': SUBDOMAINS, 'sitemode': sitemode, - 'sitename': getSiteName(), + 'sitename': SdoConfig.getname(), 'staticPath': homedir, 'extensionPath': makeUrl(getHostExt(),"",full=True), 'myhost': getHost(), @@ -2822,7 +2914,7 @@ def templateRender(templateName, node, values=None): 'extDD': extDD, 'extVers': extVers, 'extName': extName, - 'targethost': makeUrl("","",full=True), + 'targethost': makeUrl("","",full=True), 'debugging': getAppVar('debugging'), 'appengineVersion': getAppEngineVersion() } @@ -2906,7 +2998,7 @@ def makeUrl(ext="",path="",full=False,scheme=None): if full: if not scheme: scheme = getHttpScheme() - + targethost = os.environ.get("TARGETSITE",getBaseHost()) url = "%s://%s%s%s%s" % (scheme,sub,targethost,port,p) @@ -2914,9 +3006,9 @@ def makeUrl(ext="",path="",full=False,scheme=None): url = "%s" % (p) return url -def getPageFromStore(id,ext=None): +def getPageFromStore(id,ext=None,enableFlush=True): cached = PageStore.get(id,ext) - if cached and "_pageFlush" in getArguments(): + if enableFlush and cached and "_pageFlush" in getArguments(): log.info("Reloading page for %s" % id) PageStore.remove(id,ext) cached = None @@ -2924,12 +3016,16 @@ def getPageFromStore(id,ext=None): schemasInitialized = False def load_schema_definitions(): - #log.info("STARTING UP... reading schemas.") + log.info("STARTING UP... reading schemas.") #load_graph(loadExtensions=ENABLE_HOSTED_EXTENSIONS) global schemasInitialized - read_schemas(loadExtensions=ENABLE_HOSTED_EXTENSIONS) - if ENABLE_HOSTED_EXTENSIONS: - read_extensions(ENABLED_EXTENSIONS) + if SdoConfig.isValid(): + read_schemas(SdoConfig.termFiles()) + load_usage_data(SdoConfig.countsFiles()) + else: + read_local_schemas(loadExtensions=ENABLE_HOSTED_EXTENSIONS) + if ENABLE_HOSTED_EXTENSIONS: + read_extensions(ENABLED_EXTENSIONS) schemasInitialized = True LOADINGSOURCE = None @@ -2953,9 +3049,11 @@ def load_sources(): if not LOADEDSOURCES and not LOADINGSOURCE: # Check again in case things have changed in above loop LOADINGSOURCE = datetime.datetime.now() load_start = datetime.datetime.now() + load_schema_definitions() + log.info(("[%s] Term definitions load took %s " % (getInstanceId(short=True),(datetime.datetime.now() - load_start)))) + load_start = datetime.datetime.now() load_examples_data(ENABLED_EXTENSIONS) log.info(("[%s] Examples load took %s " % (getInstanceId(short=True),(datetime.datetime.now() - load_start)))) - load_schema_definitions() LOADEDSOURCES=True LOADINGSOURCE=None diff --git a/sdocloudstore.py b/sdocloudstore.py index 98e532d875..52800a8708 100644 --- a/sdocloudstore.py +++ b/sdocloudstore.py @@ -26,7 +26,7 @@ initial_delay=0.2, max_delay=5.0, backoff_factor=2, max_retry_period=15 )) # [END retries] -BUCKETROOT = "schemaorg" +BUCKETROOT = "sdoapp" DEFAULTCURRENT = "TestData" CLOUDCACHEENABLE = False CLOUDAUTOAPPENDHTMLEXT = False diff --git a/sdoconfig.json b/sdoconfig.json new file mode 100644 index 0000000000..29a487c8b4 --- /dev/null +++ b/sdoconfig.json @@ -0,0 +1,79 @@ +{ + "@context": { + "@vocab": "http://configfiles.schema.org/" + }, + "@type": "DataFeed", + "name": "schema.org", + "prefix": "schema", + "siteurl": "https://schema.org", + "vocaburl": "http://schema.org/", + "atticurl": "http://attic.schema.org/", + "dataFeedVar": [{"SCHEMAORGLOC": "."}], + "include": "sdoconfigTermsData.json", + "extensiondescription": [ + { + "@type": "VocabDescriptor", + "id": "http://bib.schema.org", + "extension": "bib", + "name": "Bibliographic Extension", + "softwareVersion": "1.0", + "disambiguatingDescription": "terms relating to books and bibliography.", + "comment": "The Bibliographic Extension within [schema.org](http://schema.org) defines terms such as [[Audiobook]], [[Thesis]], [[ComicStory]], and [[workTranslation]]. For more details see the W3C BibExtend Community Group's [wiki](http://www.w3.org/community/schemabibex/wiki/Bib.schema.org-1.0)." + }, + { + "@type": "VocabDescriptor", + "id": "http://meta.schema.org", + "extension": "meta", + "name": "Meta Extension", + "softwareVersion": "1.0", + "disambiguatingDescription": "terms used within schema.org for schema definitions.", + "comment": "The meta extension contains terms primarily designed to support the implementation of the Schema.org vocabulary itself. It includes terms such as [[Class]], [[Property]], [[domainIncludes]] and [[supersededBy]]. They are not currently advocated for widespread use across the web." + }, + { + "@type": "VocabDescriptor", + "id": "http://attic.schema.org", + "extension": "attic", + "name": "Attic Area", + "disambiguatingDescription": "these terms have been either deprecated from the core or extentions, or removed from [pending](/docs/howwework.html#pending) as not accepted into the full vocabulary.", + "comment": "The attic area is an archive area for terms which are no longer part of the core vocabulary or its extensions. _Attic_ terms are preserved here to satisfy previous links to them.\n\nImplementors and publishers are gently encouraged not to use terms in the attic area." + }, + { + "@type": "VocabDescriptor", + "id": "http://pending.schema.org", + "extension": "pending", + "name": "Pending Extension", + "disambiguatingDescription": "these terms are [pending](/docs/howwework.html#pending) wider review. Feedback is welcomed!", + "comment": "The pending extension is a staging area for work-in-progress terms which have yet to be accepted into the core vocabulary. _Pending_ terms are subject to change and should be used with caution.\n\nImplementors and publishers are cautioned that terms in the pending extension may lack consensus and that terminology and definitions could still change significantly after community and steering group review. Consumers of schema.org data who encourage use of such terms are strongly encouraged to update implementations and documentation to track any evolving changes, and to share early implementation feedback with the wider community." + }, + { + "@type": "VocabDescriptor", + "id": "http://auto.schema.org", + "extension": "auto", + "name": "Auto Extension", + "softwareVersion": "1.0", + "disambiguatingDescription": "terms relating to automobiles.", + "comment": "You are viewing the Auto Extension within [schema.org](http://schema.org/). It defines terms such as [[MotorizedBicycle]] and adds terms to [[Car]]. For more details see the W3C [Automotive Ontology Working Group](https://www.w3.org/community/gao/)." + }, + { + "@type": "VocabDescriptor", + "id": "http://health-lifesci.schema.org", + "extension": "health-lifesci", + "name": "Health and Lifesciences Extension", + "softwareVersion": "0.8", + "disambiguatingDescription": "terms relating to healthcare, medicine and the life sciences.", + "comment": "You are viewing the Health and Lifesciences extension within [schema.org](http://schema.org). It defines many medical terms such as [[MedicalCondition]], [[AnatomicalStructure]] and [[MedicalSpecialty]]. See full list below. \n\nThe terms defined in this extension may be considered moderately stable, but some changes are still likely (including renaming and restructuring) through ongoing community collaboration. Organizations using this data are encouraged to join the community discussions linked here, and share their usage scenarios to help improve the schemas.\n\nThe schema.org [medical vocabulary](/docs/meddocs.html) was [originally created](http://blog.schema.org/2012/06/health-and-medical-vocabulary-for.html) in collaboration with [WikiDoc](http://www.wikidoc.org/index.php/Main_Page) and others. Recent improvements including the migration into the health-lifesci extension has been led by the Healthcare Schema ([schemed](https://www.w3.org/community/schemed/)) community group at W3C. The health-lifesci schema.org extension is [available](https://github.com/schemaorg/schemaorg/issues/1116) for other collaborative schema.org extensions in topics centred on healthcare, medicine and the lifesciences (e.g. see also [Bioschemas](https://github.com/schemaorg/schemaorg/issues/1028), and the [pending](http://pending.schema.org/) proposals for describing [US healthcare insurance networks](https://github.com/schemaorg/schemaorg/issues/1062))." + } + ], + "dataFeedElement": [ + { + "@type": "DataDownload", + "fileContent": "templates", + "contentLocation": "[[SCHEMAORGLOC]]/templates" + }, + { + "@type": "DataDownload", + "fileContent": "counts", + "contentFile": "[[SCHEMAORGLOC]]/data/2015-04-vocab_counts.txt" + } + ] +} \ No newline at end of file diff --git a/sdoconfigTermsData.json b/sdoconfigTermsData.json new file mode 100644 index 0000000000..00c216aef7 --- /dev/null +++ b/sdoconfigTermsData.json @@ -0,0 +1,292 @@ +{ + "@context": { + "@vocab": "http://configfiles.schema.org/" + }, + "@type": "DataFeed", + "name": "schema.org", + "dataFeedElement": [ + { + "@type": "DataDownload", + "fileContent": "docs", + "contentLocation": "[[SCHEMAORGLOC]]/docs", + "contentFile": [ + "about.html", + "actions.html", + "automotive-img/VehicleHierarchy.png", + "automotive-img/VehicleEngineSteeringPositionAndCarUsageSpecification.png", + "automotive-img/VehicleTypeAndItsProperties.png", + "automotive-img/CarHierarchy.png", + "automotive-img/VehicleOffer.png", + "automotive-img/CarAcrissCodeAndRoofLoad.png", + "automotive-img/VehicleTypeAndItsSubtypes.png", + "automotive-img/VehicleDriveAndWheelConfiguration.png", + "automotive.html", + "cg/sdo-wot-tpac-2016-09.txt", + "cg/sdo-wot-tpac-2016-09.pdf", + "datamodel.html", + "documents.html", + "extension.html", + "faq.html", + "favicon.ico", + "feedback.html", + "financial-img/8.png", + "financial-img/4.png", + "financial-img/5.png", + "financial-img/7.png", + "financial-img/6.png", + "financial-img/2.png", + "financial-img/3.png", + "financial-img/1.png", + "financial.html", + "full_md.html", + "gs.html", + "hotels.html", + "howwework.html", + "iot-gettingstarted.html", + "kickoff-workshop/sw1109_Vocabulary_LRMI.pdf", + "kickoff-workshop/rnews_schema.pdf", + "kickoff-workshop/sw1109_Vocabulary_GoodRelations.pdf", + "kickoff-workshop/sw1109_Implementation.pdf", + "kickoff-workshop/rdfa_lite.html", + "kickoff-workshop/sw1109_Welcome.pdf", + "kickoff-workshop/sw1109_Vocabulary.pdf", + "kickoff-workshop/sw1109_Vocabulary_MSTechNet.pdf", + "kickoff.html", + "meddocs.html", + "news.html", + "old_extension.html", + "prettify.css", + "prettify.js", + "qa.html", + "releases.html", + "robots-blockall.txt", + "robots.txt", + "schema_hotels_1.png", + "schemaicon.png", + "schemaorg.css", + "schemaorg.owl", + "search_results.html", + "sitemap.xml", + "terms.html" + ] + }, + { + "@type": "DataDownload", + "fileContent": "terms", + "contentLocation": "[[SCHEMAORGLOC]]/data", + "contentFile": [ + "gr-property-acks.rdfa", + "mappings.rdfa", + "schema.rdfa" + ] + + }, + { + "@type": "DataDownload", + "fileContent": "examples", + "contentLocation": "[[SCHEMAORGLOC]]/data", + "contentFile": [ + "issue-1004-examples.txt", + "issue-1100-examples.txt", + "sdo-ClaimReview-issue-1061-examples.txt", + "sdo-automobile-examples.txt", + "sdo-course-examples.txt", + "sdo-creativework-examples.txt", + "sdo-datafeed-examples.txt", + "sdo-digital-document-examples.txt", + "sdo-exhibitionevent-examples.txt", + "sdo-fibo-examples.txt", + "sdo-hotels-examples.txt", + "sdo-howto-examples.txt", + "sdo-identifier-examples.txt", + "sdo-invoice-examples.txt", + "sdo-itemlist-examples.txt", + "sdo-library-examples.txt", + "sdo-lrmi-examples.txt", + "sdo-mainEntity-examples.txt", + "sdo-map-examples.txt", + "sdo-menu-examples.txt", + "sdo-music-examples.txt", + "sdo-offeredby-examples.txt", + "sdo-periodical-examples.txt", + "sdo-property-value-examples.txt", + "sdo-screeningevent-examples.txt", + "sdo-service-examples.txt", + "sdo-social-media-examples.txt", + "sdo-sponsor-examples.txt", + "sdo-sports-examples.txt", + "sdo-tourism-examples.txt", + "sdo-trip-examples.txt", + "sdo-tv-listing-examples.txt", + "sdo-userinteraction-examples.txt", + "sdo-videogame-examples.txt", + "sdo-visualartwork-examples.txt", + "sdo-website-examples.txt" + ] + + }, + { + "@type": "DataDownload", + "fileContent": "terms", + "contentLocation": "[[SCHEMAORGLOC]]/data/ext/attic", + "contentFile": [ + "attic.rdfa" + ] + + }, + { + "@type": "DataDownload", + "fileContent": "terms", + "contentLocation": "[[SCHEMAORGLOC]]/data/ext/auto", + "contentFile": [ + "auto.rdfa" + ] + + }, + { + "@type": "DataDownload", + "fileContent": "terms", + "contentLocation": "[[SCHEMAORGLOC]]/data/ext/bib", + "contentFile": [ + "bsdo-1.0.rdfa", + "comics.rdfa" + ] + + }, + { + "@type": "DataDownload", + "fileContent": "examples", + "contentLocation": "[[SCHEMAORGLOC]]/data/ext/bib", + "contentFile": [ + "bsdo-agent-examples.txt", + "bsdo-atlas-examples.txt", + "bsdo-audiobook-examples.txt", + "bsdo-chapter-examples.txt", + "bsdo-collection-examples.txt", + "bsdo-newspaper-examples.txt", + "bsdo-thesis-examples.txt", + "bsdo-translation-examples.txt", + "comics-examples.txt" + ] + + }, + { + "@type": "DataDownload", + "fileContent": "terms", + "contentLocation": "[[SCHEMAORGLOC]]/data/ext/health-lifesci", + "contentFile": [ + "med-health-core.rdfa", + "physical-activity-and-exercise.rdfa" + ] + + }, + { + "@type": "DataDownload", + "fileContent": "examples", + "contentLocation": "[[SCHEMAORGLOC]]/data/ext/health-lifesci", + "contentFile": [ + "MedicalScholarlyArticle-examples.txt", + "medicalCondition-examples.txt", + "medicalGuideline-examples.txt", + "medicalWebpage-examples.txt" + ] + + }, + { + "@type": "DataDownload", + "fileContent": "terms", + "contentLocation": "[[SCHEMAORGLOC]]/data/ext/meta", + "contentFile": [ + "meta.rdfa" + ] + + }, + { + "@type": "DataDownload", + "fileContent": "terms", + "contentLocation": "[[SCHEMAORGLOC]]/data/ext/pending", + "contentFile": [ + "boilerplate-1375.rdfa", + "boilerplate-195.rdfa", + "issue-1045.rdfa", + "issue-1050.rdfa", + "issue-1062.rdfa", + "issue-1083.rdfa", + "issue-1125.rdfa", + "issue-1156.rdfa", + "issue-1253.rdfa", + "issue-1262.rdfa", + "issue-1293.rdfa", + "issue-1375.rdfa", + "issue-1389.rdfa", + "issue-1397.rdfa", + "issue-1401.rdfa", + "issue-1423.rdfa", + "issue-1425.rdfa", + "issue-1457.rdfa", + "issue-1495.rdfa", + "issue-1525.rdfa", + "issue-1541.rdfa", + "issue-1559.rdfa", + "issue-1576.rdfa", + "issue-1577.rdfa", + "issue-1589.rdfa", + "issue-1591.rdfa", + "issue-1624.rdfa", + "issue-1670.rdfa", + "issue-1672.rdfa", + "issue-1688.rdfa", + "issue-1689.rdfa", + "issue-1698.rdfa", + "issue-1723.rdfa", + "issue-1741.rdfa", + "issue-1779.rdfa", + "issue-1810.rdfa", + "issue-1828.rdfa", + "issue-1886.rdfa", + "issue-195.rdfa", + "issue-1950.rdfa", + "issue-2083.rdfa", + "issue-2085.rdfa", + "issue-243.rdfa", + "issue-271.rdfa", + "issue-383.rdfa", + "issue-447.rdfa", + "issue-743.rdfa", + "issue-894.rdfa" + ] + + }, + { + "@type": "DataDownload", + "fileContent": "examples", + "contentLocation": "[[SCHEMAORGLOC]]/data/ext/pending", + "contentFile": [ + "issue-1045-examples.txt", + "issue-1050-examples.txt", + "issue-1062-examples.txt", + "issue-1156-examples.txt", + "issue-1253-examples.txt", + "issue-1389-examples.txt", + "issue-1423-examples.txt", + "issue-1457-examples.txt", + "issue-1525-examples.txt", + "issue-1670-examples.txt", + "issue-1689-examples.txt", + "issue-1698-examples.txt", + "issue-1741-examples.txt", + "issue-1779-examples.txt", + "issue-1810-examples.txt", + "issue-1950-examples.txt", + "issue-2083-examples.txt", + "issue-2085-examples.txt", + "issue-271-examples.txt", + "issue-383-examples.txt", + "issue-447-examples.txt", + "issue-894-examples.txt", + "issue-template-examples.txt" + ] + + } + ] +} diff --git a/sdodatacomconfig.json b/sdodatacomconfig.json new file mode 100644 index 0000000000..4d7ebece2d --- /dev/null +++ b/sdodatacomconfig.json @@ -0,0 +1,43 @@ +{ + "@context": { + "@vocab": "http://configfiles.schema.org/" + }, + "@type": "DataFeed", + "name": "schema.dataCommons.org", + "prefix": "schemadc", + "siteurl": "https://schema.datacommons.org", + "vocaburl": "https://schema.datacommons.org/", + "dataFeedElement": [ + { + "@type": "DataDownload", + "contentLocation": "https://raw.githubusercontent.com/schemaorg/schemaorg/vocabindi/datacoms/docs", + "contentFile": [ + "favicon.ico", + "prettify.css", + "prettify.js", + "schemaorg.css", + "about.html", + "documents.html" + ], + "fileContent": "docs" + }, + { + "@type": "DataDownload", + "contentLocation": "https://raw.githubusercontent.com/google/datacommons/master/schema", + "contentFile": "datacommons.rdfa", + "fileContent": "terms" + }, + { + "@type": "DataDownload", + "contentLocation": "https://raw.githubusercontent.com/google/datacommons/master/schema/templates", + "fileContent": "templates" + }, + { + "@type": "DataDownload", + "addPrefix": "schema", + "addVocaburl": "http://schema.org/", + "contentFile": "https://raw.githubusercontent.com/schemaorg/schemaorg/master/data/schema.rdfa", + "fileContent": "terms" + } + ] +} \ No newline at end of file diff --git a/sdordf2csv.py b/sdordf2csv.py index 0b09d306b2..e7ac430edb 100644 --- a/sdordf2csv.py +++ b/sdordf2csv.py @@ -4,6 +4,7 @@ from rdflib.term import URIRef import threading +import api from apimarkdown import Markdown from apirdflib import RDFLIBLOCK @@ -27,6 +28,10 @@ def setfullGraph(self,graph=None): def setexcludeAttic(self,state): self.excludeAttic = state + self.attic = api.SdoConfig.atticUri() + if not self.attic: + self.excludeAttic = False + def setmarkdownComments(self,state): self.markdown = state @@ -43,16 +48,15 @@ def doQuery(self,graph=None,query=None): def outputCSVtypes(self,file): atticfilter = "" if self.excludeAttic: - atticfilter = "FILTER NOT EXISTS {?term schema:isPartOf }." + atticfilter = "FILTER NOT EXISTS {?term schema:isPartOf <%s>}." % self.attic query= ('''select ?term where { ?term a ?type. - BIND(STR(?term) AS ?strVal) FILTER NOT EXISTS {?term a rdf:Property}. - FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/"). + FILTER (strstarts(str(?term),'%s')). %s } ORDER BY ?term - ''') % atticfilter + ''') % (api.SdoConfig.vocabUri(),atticfilter) try: RDFLIBLOCK.acquire() types = list(self.queryGraph.query(query)) @@ -66,15 +70,14 @@ def outputCSVtypes(self,file): def outputCSVproperties(self,file): atticfilter = "" if self.excludeAttic: - atticfilter = "FILTER NOT EXISTS {?term schema:isPartOf }." + atticfilter = "FILTER NOT EXISTS {?term schema:isPartOf <%s>}." % self.attic query= ('''select ?term where { ?term a rdf:Property. FILTER EXISTS {?term rdfs:label ?l}. - BIND(STR(?term) AS ?strVal). - FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/"). + FILTER (strstarts(str(?term),'%s')). %s } - ORDER BY ?term''') % atticfilter + ORDER BY ?term''') % (api.SdoConfig.vocabUri(),atticfilter) props = list(self.queryGraph.query(query)) self.prop2CSV(header=True,out=file) for t in props: @@ -160,10 +163,8 @@ def graphValueToCSV(self, subject=None, predicate= None, object= None, graph=Non def getCSVSupertypes(self,term=None,graph=None): query='''select ?sup where{ <%s> rdfs:subClassOf ?sup. - BIND(STR(?sup) AS ?strVal) - FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/") - } - ORDER BY ?sup''' % term + FILTER (strstarts(str(?sup),'%s')). } + ORDER BY ?sup''' % (term,api.SdoConfig.vocabUri()) res = self.doQuery(graph,query) ret = ', '.join([x.sup for x in res]) @@ -172,7 +173,7 @@ def getCSVSupertypes(self,term=None,graph=None): def getCSVTypeProperties(self,term=None,graph=None): atticfilter = "" if self.excludeAttic: - atticfilter = "FILTER NOT EXISTS {?prop schema:isPartOf .}" + atticfilter = "FILTER NOT EXISTS {?prop schema:isPartOf <%s>.}" % self.attic query='''select DISTINCT ?prop where{ ?term (^rdfs:subClassOf*) <%s>. ?prop ?term. @@ -187,7 +188,7 @@ def getCSVTypeProperties(self,term=None,graph=None): def getCSVSubtypes(self,term=None,graph=None): atticfilter = "" if self.excludeAttic: - atticfilter = "FILTER NOT EXISTS {?sub schema:isPartOf .}" + atticfilter = "FILTER NOT EXISTS {?sub schema:isPartOf <%s>.}" % self.attic query='''select ?sub where{ ?sub rdfs:subClassOf <%s>. %s @@ -201,7 +202,7 @@ def getCSVSubtypes(self,term=None,graph=None): def getCSVSupersededBy(self,term=None,graph=None): atticfilter = "" if self.excludeAttic: - atticfilter = "FILTER NOT EXISTS {?sub schema:isPartOf .}" + atticfilter = "FILTER NOT EXISTS {?sub schema:isPartOf <%s>.}" % self.attic query='''select ?sup where{ <%s> schema:supersededBy ?sup. %s @@ -215,7 +216,7 @@ def getCSVSupersededBy(self,term=None,graph=None): def getCSVsuperseds(self,term=None,graph=None): atticfilter = "" if self.excludeAttic: - atticfilter = "FILTER NOT EXISTS {?sup schema:isPartOf .}" + atticfilter = "FILTER NOT EXISTS {?sup schema:isPartOf <%s>.}" % self.attic query='''select ?sup where{ ?sup schema:supersededBy <%s>. %s @@ -229,10 +230,8 @@ def getCSVsuperseds(self,term=None,graph=None): def getCSVSuperProperties(self,term=None,graph=None): query='''select ?sup where{ <%s> rdfs:subPropertyOf ?sup. - BIND(STR(?sup) AS ?strVal) - FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/") - } - ORDER BY ?sup''' % term + FILTER (strstarts(str(?sup),'%s')) } + ORDER BY ?sup''' % (term,api.SdoConfig.vocabUri()) res = self.doQuery(graph,query) ret = ', '.join([x.sup for x in res]) #print "%s subtypeof: '%s'" % (term,ret) @@ -241,7 +240,7 @@ def getCSVSuperProperties(self,term=None,graph=None): def getCSVSubProperties(self,term=None,graph=None): atticfilter = "" if self.excludeAttic: - atticfilter = "FILTER NOT EXISTS {?sub schema:isPartOf .}" + atticfilter = "FILTER NOT EXISTS {?sub schema:isPartOf <%s>.}" % self.attic query='''select ?sub where{ ?sub rdfs:subPropertyOf <%s>. %s @@ -255,7 +254,7 @@ def getCSVSubProperties(self,term=None,graph=None): def getCSVDomainIncludes(self,term=None,graph=None): atticfilter = "" if self.excludeAttic: - atticfilter = "FILTER NOT EXISTS {?type schema:isPartOf .}" + atticfilter = "FILTER NOT EXISTS {?type schema:isPartOf <%s>.}" % self.attic query='''select ?type where{ <%s> ?type. %s @@ -269,7 +268,7 @@ def getCSVDomainIncludes(self,term=None,graph=None): def getCSVRangeIncludes(self,term=None,graph=None): atticfilter = "" if self.excludeAttic: - atticfilter = "FILTER NOT EXISTS {?type schema:isPartOf .}" + atticfilter = "FILTER NOT EXISTS {?type schema:isPartOf <%s>.}" % self.attic query='''select ?type where{ <%s> ?type. %s @@ -288,7 +287,7 @@ def getCSVComment(self,term=None,graph=None): ret = ', '.join([x.com for x in res]) #print "SUBTYPES of %s: '%s'" % (term,ret) if self.markdown: - Markdown.setPre("http://schema.org/") + Markdown.setPre(api.SdoConfig.vocabUri()) ret = Markdown.parse(ret) Markdown.setPre() return ret diff --git a/sdoutil.py b/sdoutil.py index 3ca5ed240b..37f3b37656 100644 --- a/sdoutil.py +++ b/sdoutil.py @@ -4,7 +4,11 @@ import io import threading +import os +import os.path +import fnmatch +SDOCONFIG=None from google.appengine.api import app_identity from google.appengine.api import mail @@ -48,4 +52,26 @@ def setAppVar(var,val): CLOUDEXTRAMETA = "CloudExtraMeta" +def full_path(filename): + """convert local file name to full path.""" + import os.path + folder = os.path.dirname(os.path.realpath(__file__)) + return os.path.join(folder, filename) + +def glob_from_dir(adir, pattern, source="local"): + log.info("glob-from-dir '%s', '%s', %s" % (adir,pattern,source)) + files = [] + try: + if source == "local": + for file in os.listdir(adir): + if fnmatch.fnmatch(file,pattern): + files.append(adir + "/" + file) + except Exception as e: + log.error("Exception from within glob_from_dir: %s: %s" % (e,e.message)) + + return files + + + + diff --git a/templates/docsBasicPageHeader.tpl b/templates/docsBasicPageHeader.tpl index 45536663df..9996b790d8 100644 --- a/templates/docsBasicPageHeader.tpl +++ b/templates/docsBasicPageHeader.tpl @@ -6,7 +6,7 @@
            diff --git a/templates/full.tpl b/templates/full.tpl index eefa3f8e69..5474830a17 100644 --- a/templates/full.tpl +++ b/templates/full.tpl @@ -46,11 +46,10 @@ $(document).ready(function(){ Schema.org is defined as two hierarchies: one for textual property values, and one for the things that they describe.

            -

            Thing

            -

            This is the main schema.org hierarchy: a collection of types (or "classes"), each of which has one or more parent types. Although a type may have more than one super-type, here we show each type in one branch of the tree only. There is also a parallel hierarchy for data types.


            + -
            -{{ thing_tree | safe }} -
            -
            +{% if thing_tree != "" %} +
            + {{ thing_tree | safe }} +
            +{% endif %} +{% if full_thing_tree != "" %} + +
            {{ full_thing_tree | safe }}
            -{% if ext_button != "" %} -
            - {{ ext_thing_tree | safe }} -
            {% endif %} -
            -{{ datatype_tree | safe }} -
            +{% if ext_thing_tree != "" %} + {% if ext_button != "" %} +
            + {{ ext_thing_tree | safe }} +
            + {% endif %} +{% endif %} +{% if datatype_tree != "" %} +
            + {{ datatype_tree | safe }} +
            +{% endif %} diff --git a/templates/genericTermPageHeader.tpl b/templates/genericTermPageHeader.tpl index c76aa2aed7..8481655982 100644 --- a/templates/genericTermPageHeader.tpl +++ b/templates/genericTermPageHeader.tpl @@ -62,8 +62,7 @@ {% include 'basicPageHeader.tpl' with context %} - -
            +
            {{ ext_mappings | safe }} diff --git a/templates/topnotes.tpl b/templates/topnotes.tpl index 38c45b7523..7bb318c990 100644 --- a/templates/topnotes.tpl +++ b/templates/topnotes.tpl @@ -1,14 +1,14 @@ -{% if mybasehost in [ "webschemas.org", "localhost"] %} +{% if mybasehost in [ "localhost"] %}
            Note: you are viewing the - webschemas.org development - version of schema.org. + development + version of {{ sitename }}. See How we work for more details.
            {% endif %} -{% if sitename != "schema.org" %} +{% if sitename != "schema.org" and host_ext != "" and extName != "" and extDD != "" and SUBDOMAINS == True %}
            core + {{host_ext}} ({{extName}}): {{extDD|safe}}
            diff --git a/webschemas.yaml b/webschemas.yaml index 5fbd6c92e8..357daf08a5 100644 --- a/webschemas.yaml +++ b/webschemas.yaml @@ -19,6 +19,8 @@ inbound_services: env_variables: TARGETSITE: 'webschemas.org' PRODSITEDEBUG: 'False' + CONFIGFILE: 'sdoconfig.json' + MOREBLOCK: 'True' WARMUPSTATE: 'Auto' # 'Off', 'On', 'Auto' - Off for localhost, On elsewhere STAYINEXTENTION: 'False' PAGESTOREMODE: 'CLOUDSTORE' # 'INMEM' (In instance memory), 'NDBSHARED' (NDB shared - accross instances), 'CLOUDSTORE' (Cloudstorage files) @@ -26,6 +28,7 @@ env_variables: TIMESTAMPSTOREMODE: 'CLOUDSTORE' # 'INMEM', 'NDBSHARED', 'CLOUDSTORE' # CACHE_CONTROL: 'public, max-age=600' CACHE_CONTROL: 'no-cache' + SUBDOMAINS: 'False' #'False' no subdomains, 'True' for pending.schema.org, bib.schema.org etc. handlers: @@ -101,6 +104,14 @@ handlers: # upload: static/index.html # application_readable: True +- url: /admin/refresh + login: required + script: sdoapp.app + +- url: /admin + static_dir: admin + application_readable: True + - url: /search_files static_dir: static/search_files secure: always