Skip to content

Commit

Permalink
Syntactically upgraded the CGI script; version bumped
Browse files Browse the repository at this point in the history
  • Loading branch information
iherman committed Jan 22, 2020
1 parent c8c3864 commit 03080ca
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 52 deletions.
4 changes: 2 additions & 2 deletions pyRdfa/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@
@summary: RDFa parser (distiller)
@requires: Python version 2.5 or up; 2.7 is preferred
@requires: Python version 2.8 or python 3.8 or up
@requires: U{RDFLib<http://rdflib.net>}; version 3.X is preferred.
@requires: U{html5lib<http://code.google.com/p/html5lib/>} for the HTML5 parsing (note that version 1.0b1 and 1.0b2 should be avoided, it may lead to unicode encoding problems)
@requires: U{httpheader<http://deron.meranda.us/python/httpheader/>}; however, a small modification had to make on the original file, so for this reason and to make distribution easier this module (single file) is added to the package.
Expand All @@ -156,7 +156,7 @@
@var uri_schemes: List of registered (or widely used) URI schemes; used for warnings...
"""

__version__ = "3.5.3"
__version__ = "4.0.0"
__author__ = 'Ivan Herman'
__contact__ = 'Ivan Herman, ivan@w3.org'
__license__ = 'W3C® SOFTWARE NOTICE AND LICENSE, http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231'
Expand Down
162 changes: 112 additions & 50 deletions scripts/CGI_RDFa.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/python2.5
#!/usr/local/bin/python3
# -*- coding: utf-8 -*-
# Maintainer: Ivan Herman <ivan@w3.org>

Expand All @@ -15,30 +15,78 @@
"""

"""
$Id: RDFa.py,v 1.9 2012/03/12 11:06:47 ivan Exp $
$Id: RDFa.py,v 1.27 2018/05/23 08:57:19 carcone Exp $
"""

__version__ = "3.0"
__version__ = "4.0.0"
import cgi
import cgitb; cgitb.enable()
import cgitb
import sys, os
import StringIO
#import StringIO
#cgi.print_environ()

if sys.platform == "darwin" :
# this is my local machine
sys.path.insert(0,"/Users/ivan/W3C/dev/2004/PythonLib-IH")
sys.path.insert(0,"/Users/ivan/Library/Python")
sys.path.insert(0,"/Users/ivan/W3C/dev/2004/PythonLib-IH/rdfa-1.1")
sys.path.insert(0,'/Users/ivan/Library/Python')
sys.path.insert(0,'/Users/ivan/Library/Python/RDFa')
os.environ['PyRdfaCacheDir'] = '/Users/ivan/.pyrdfa-cache'
cgitb.enable()

else :
# this is the server on W3C
sys.path.insert(0,"/usr/local/lib/python2.4/site-packages/PythonLib-IH")
sys.path.insert(0,"/usr/local/lib/python2.4/site-packages/PythonLib-IH/rdfa-1.1")
# This will have to be updated for the Python3 installation!!!
# webencodings pip3 should also be done!!!
sys.path.insert(0,"/usr/lib/python2.7/dist-packages")
sys.path.insert(0,'/home/ivan/lib/python')
os.environ['PyRdfaCacheDir'] = '/usr/local/apache/cgi/cgi-bin-other/RDFa/data-local'
cgitb.enable(display=0, logdir="/home/nobody/tracebacks/")

from pyRdfa import processURI, RDFaError

# Register the RDFa JSON-LD serializer; for some reasons installing via pip did not work
from rdflib.plugin import register, Serializer
register('json', Serializer, 'rdflib_jsonld.serializer', 'JsonLDSerializer')

def err_message(msg) :
from cleanhtml import clean_print
print('Content-type: text/html; charset=utf-8')
print('Status: 400 Invalid Input')
print()
print("<html>")
print("<head>")
print("<title>Error in RDFa processing</title>")
print("</head><body>")
print("<h1>Error in distilling RDFa</h1>")
print("<p>")
clean_print("pyRdfa cannot process this URI: %s", uri)
print("</p>")
if len(msg) != 0 :
print("<p>")
clean_print(msg)
print("</p>")
print("</body>")
print("</html>")
sys.exit(1)


def brett_test(uri) :

if not sys.platform == "darwin" :
from checkremote import check_url_safety, UnsupportedResourceError
from urllib2 import HTTPError, URLError
try:
check_url_safety(uri)
except HTTPError as e:
err_message('HTTP Error with the error code: %s and the error message: "%s"' (e.code, e.reason))
except URLError as e:
err_message('URL Error with the error message: "%s"' % e.reason)
except UnsupportedResourceError as e:
msg = e.args[0] + ": " + e.args[1]
err_message('Unsupported Resource Error with the error message "%s"' % msg)
except Exception as e:
l = len(e.args)
msg = "" if l == 0 else (e.args[0] if l == 1 else e.args)
err_message('Exception raised: "%s"' % msg)

#
# to make this thing exist...
uri = ""
Expand All @@ -50,53 +98,67 @@
uri = "text:"
else :
if not "uri" in form :
print 'Content-type: text/html; charset=utf-8'
print 'Status: 400 Invalid Input'
print
print "<html>"
print "<head>"
print "<title>Error in RDFa processing</title>"
print "</head><body>"
print "<h1>Error in distilling RDFa</h1>"
print "No URI has been specified"
print "</body>"
print "</html>"
print('Content-type: text/html; charset=utf-8')
print('Status: 400 Invalid Input')
print()
print("<html>")
print("<head>")
print("<title>Error in RDFa processing</title>")
print("</head><body>")
print("<h1>Error in distilling RDFa</h1>")
print("<p>No URI has been specified</p>")
print("</body>")
print("</html>")
sys.exit(1)

try :
#uri = form["uri"].value
uri = form.getfirst("uri")
except :
print 'Content-type: text/html; charset=utf-8'
print 'Status: 400 Invalid Input'
print
print "<html>"
print "<head>"
print "<title>Error in RDFa processing</title>"
print "</head><body>"
print "<h1>Error in distilling RDFa</h1>"
print "No URI has been specified"
print "</body>"
print "</html>"
print('Content-type: text/html; charset=utf-8')
print('Status: 400 Invalid Input')
print()
print("<html>")
print("<head>")
print("<title>Error in RDFa processing</title>")
print("</head><body>")
print("<h1>Error in distilling RDFa</h1>")
print("<p>No URI has been specified</p>")
print("</body>")
print("</html>")
sys.exit(1)

if "validate" in form :
from rdfavalidator import validateURI
print 'Content-Type: text/html; charset=utf-8'
print
print validateURI(uri, form)
if not (uri == 'text:' or uri == 'uploaded:') :
brett_test(uri)
print('Content-Type: text/html; charset=utf-8')
print()
print(validateURI(uri, form))
else :
# Thanks to Sergio and Diego for the idea and code for the referer branch
if uri == "referer" :
uri = os.getenv('HTTP_REFERER')
newuri = "http://www.w3.org/2012/pyRdfa/extract?uri=" + uri
print "Status: 302 Moved"
print "Location: " + newuri
print
else :
if "format" in form.keys() :
format = form.getfirst("format")
try :
# Thanks to Sergio and Diego for the idea and code for the referer branch
if uri == "referer" :
uri = os.getenv('HTTP_REFERER')
if uri is None:
newuri = "http://www.w3.org/2012/pyRdfa/no_referer.html"
else:
brett_test(uri)
newuri = "http://www.w3.org/2012/pyRdfa/extract?uri=" + uri
print("Status: 307 Moved Temporarily")
print("Location: " + newuri)
print()
else :
format = "turtle"
retval = processURI(uri, format, form)
print retval
# last point of check: use Brett's script to check the validity of the URI
if not (uri == 'text:' or uri == 'uploaded:') :
brett_test(uri)

if "format" in form.keys() :
format = form.getfirst("format")
else :
format = "turtle"
retval = processURI(uri, format, form)
print(retval)
except Exception as e :
l = len(e.args)
msg = "" if l == 0 else (e.args[0] if l == 1 else e.args)
err_message('Exception raised: "%s"' % msg)

0 comments on commit 03080ca

Please sign in to comment.