From ee5e637eed70e154451f210138b0b077f2f576e4 Mon Sep 17 00:00:00 2001 From: Filipe De Sousa Date: Tue, 7 May 2013 22:29:51 +0100 Subject: [PATCH] Working implementation using the wsgi.handlers.CGIHandler implementation over the previous CGI implementation. Still only working for detailed predictions. More generic handling of the namespace-qualified tags, now in the initialiser of the base class. New base class for Status queries e.g. LineStatusQuery and StationStatusQuery now only implement the _parse_xml method of StatusQuery, which now implements the common methods _make_filename and _process_request. --- py/query.py | 237 +++++++++++++++++++++------------------------------ py/status.py | 9 +- py/tfl.py | 121 +++++++++++++++++--------- 3 files changed, 186 insertions(+), 181 deletions(-) diff --git a/py/query.py b/py/query.py index 3cd7de4..a2f85e4 100755 --- a/py/query.py +++ b/py/query.py @@ -8,6 +8,7 @@ import os import status import urllib2 +import logging try: import xml.etree.cElementTree as etree @@ -58,12 +59,16 @@ class BaseQuery(object): __metaclass__ = ABCMeta query = "" - cache_expiry_time = 0 + cache_expiry_time = 30 xmlns = "http://trackernet.lul.co.uk" + params = (REQUEST, ) + tags = {} def __init__(self, form): - self.request_url = self._process_request(form) - self.cache_filename = self._make_filename(form) + self.form = form + # Get strings for the namespace-qualified tags + for key, val in self.tags.items(): + self.tags[key] = etree.QName(self.xmlns, val).text @abstractmethod def _process_request(self, form): @@ -96,8 +101,7 @@ def _get_cache(self): except (IOError, OSError) as e: if e.errno == errno.ENOENT: return None - else: - raise + raise e def _make_folders(self, filename): foldername = os.path.dirname(filename) @@ -126,22 +130,27 @@ def _write_json(self, json): raise def fetch(self): - cached = self._get_cache() + self.request_url = self._process_request(self.form) + self.cache_filename = self._make_filename(self.form) + + resp_json = self._get_cache() - if cached is None: - res = self._request() + if not resp_json: + try: + res = self._request() - statuscode = status.StatusCodes.getstatuscode(int(res.getcode())) - if statuscode.iserror: - raise status.ResponseError(statuscode, 'Failed to fetch XML') - elif not statuscode.canhavebody: - raise status.ResponseError(statuscode, 'No content in response') + statuscode = status.StatusCodes.getstatuscode(int(res.getcode())) + if statuscode.iserror: + raise status.ResponseError(statuscode, 'Failed to fetch XML') + elif not statuscode.canhavebody: + raise status.ResponseError(statuscode, 'No content in response') - resp = self._get_xml(res) - resp_json = json.dumps(resp) - self._write_json(resp_json) - else: - resp_json = cached + resp = self._get_xml(res) + resp_json = json.dumps(resp) + self._write_json(resp_json) + except urllib2.HTTPError as httpe: + statuscode = status.StatusCodes.getstatuscode(int(httpe.code)) + raise status.RequestError(statuscode) return resp_json @@ -149,174 +158,124 @@ def fetch(self): class DetailedPredictionQuery(BaseQuery): """DetailedPredictionQuery""" query = PREDICTION_DETAILED - cache_expiry_time = 30 - - def __init__(self, form): - super(DetailedPredictionQuery, self).__init__(form) - # Namespace-qualified tags - self.created_tag = etree.QName(self.xmlns, 'WhenCreated').text - self.line_tag = etree.QName(self.xmlns, 'Line').text - self.linename_tag = etree.QName(self.xmlns, 'LineName').text - self.station_tag = etree.QName(self.xmlns, 'S').text - self.platform_tag = etree.QName(self.xmlns, 'P').text - self.train_tag = etree.QName(self.xmlns, 'T').text + params = (REQUEST, LINE, STATION) + tags = { + 'created_tag': 'WhenCreated', + 'line_tag': 'Line', + 'linename_tag': 'LineName', + 'station_tag': 'S', + 'platform_tag': 'P', + 'train_tag': 'T' + } def _process_request(self, form): - line = form.getfirst(LINE) - station = form.getfirst(STATION) + self.line = form[LINE] + self.station = form[STATION] - if line not in LINES_LIST: - raise ValueError("Line code '%s' is not valid" % line) - if not station: + if self.line not in LINES_LIST: + raise ValueError("Line code '{}' is not valid".format(self.line)) + if not self.station: raise ValueError("Station code is empty") - request_url = "{0}/{1}/{2}/{3}".format(BASE_URL, self.query, - line, station) - return request_url + url = "{0}/{1}/{2}/{3}".format(BASE_URL, self.query, + self.line, self.station) + return url def _make_filename(self, form): - line = form.getfirst(LINE) - station = form.getfirst(STATION) - - if line not in LINES_LIST: - raise ValueError("Line code '%s' is not valid" % line) - if not station: - raise ValueError("Station code is empty") - - cache_filename = os.path.join('.', BASE_FILE, self.query, line, station) - + cache_filename = os.path.join('.', BASE_FILE, self.query, + self.line, self.station) return cache_filename + FILE_EXTENSION def _parse_xml(self, xml): root = xml.getroot() - - resp = { - 'information': { - # Informational parts of response - 'created': root.find(self.created_tag).text, - 'linecode': root.find(self.line_tag).text, - 'linename': root.find(self.linename_tag).text, - 'stations': [ - # List of Stations - { - 'stationcode': station.attrib['Code'], - 'stationname': station.attrib['N'], - 'platforms': [ - # List of Platforms for a Station - { - 'platformname': platform.attrib['N'], - 'platformnumber': platform.attrib['Num'], - 'trains': [ - # List of Trains for a Platform - { - 'lcid': train.attrib['LCID'], - 'timeto': train.attrib['SecondsTo'], - 'location': train.attrib['TimeTo'], - 'destination': train.attrib['Destination'], - 'destcode': train.attrib['DestCode'], - 'tripno': train.attrib['TripNo'] - } - for train in platform.findall(self.train_tag) - ] # End of trains list comprehension - } - for platform in station.findall(self.platform_tag) - ] # End of platforms list comprehension - } - for station in root.findall(self.station_tag) - ] # End of stations list comprehension - } + resp = {'information': { + # Informational parts of response + 'created': root.find(self.tags['created_tag']).text, + 'linecode': root.find(self.tags['line_tag']).text, + 'linename': root.find(self.tags['linename_tag']).text, + 'stations': [{ + # List of Stations + 'stationcode': station.attrib['Code'], + 'stationname': station.attrib['N'], + 'platforms': [{ + # List of Platforms for a Station + 'platformname': platform.attrib['N'], + 'platformnumber': platform.attrib['Num'], + 'trains': [{ + # List of Trains for a Platform + 'lcid': train.attrib['LCID'], + 'timeto': train.attrib['SecondsTo'], + 'location': train.attrib['TimeTo'], + 'destination': train.attrib['Destination'], + 'destcode': train.attrib['DestCode'], + 'tripno': train.attrib['TripNo'] + } for train in platform.findall(self.tags['train_tag'])]} + # End of trains list comprehension + for platform in station.findall(self.tags['platform_tag'])]} + # End of platforms list comprehension + for station in root.findall(self.tags['station_tag'])]} + # End of stations list comprehension } - return resp class SummaryPredictionQuery(BaseQuery): """SummaryPredictionQuery""" query = PREDICTION_SUMMARY - cache_expiry_time = 30 - - def __init__(self, form): - super(SummaryPredictionQuery, self).__init__(form) + params = (REQUEST, LINE) def _process_request(self, form): - request = form.getfirst(REQUEST) - line = form.getfirst(LINE) + self.line = form[LINE] - if line not in LINES_LIST: - raise ValueError("Line code '%s' is not valid" % line) + if self.line not in LINES_LIST: + raise ValueError("Line code '{}' is not valid".format(self.line)) - request_url = "{0}/{1}/{2}".format(BASE_URL, PREDICTION_SUMMARY, line) + request_url = "{0}/{1}/{2}".format(BASE_URL, self.query, self.line) return request_url def _make_filename(self, form): - line = form.getfirst(LINE) - - if line not in LINES_LIST: - raise ValueError("Line code '%s' is not valid" % line) - - cache_filename = os.path.join('.', BASE_FILE, self.query, line) - + cache_filename = os.path.join('.', BASE_FILE, self.query, self.line) return cache_filename + FILE_EXTENSION def _parse_xml(self, xml): pass -class LineStatusQuery(BaseQuery): - """LineStatusQuery""" - query = LINE_STATUS - cache_expiry_time = 30 +class StatusQuery(BaseQuery): + __metaclass__ = ABCMeta - def __init__(self, form): - super(LineStatusQuery, self).__init__(form) + params = (REQUEST, INCIDENTS_ONLY) def _process_request(self, form): - incidents_only = form.getfirst(INCIDENTS_ONLY) - incidents_only = _parse_boolean(incidents_only) + self.incidents_only = _parse_boolean(form[INCIDENTS_ONLY]) - request_url = "{0}/{1}".format(BASE_URL, LINE_STATUS) - if incidents_only: + request_url = "{0}/{1}".format(BASE_URL, self.query) + if self.incidents_only: request_url = "{0}/{1}".format(request_url, INCIDENTS_ONLY) return request_url def _make_filename(self, form): - incidents_only = form.getfirst(INCIDENTS_ONLY) - incidents_only = _parse_boolean(incidents_only) - cache_filename = os.path.join('.', BASE_FILE, self.query, - INCIDENTS_ONLY if incidents_only else 'full') - + INCIDENTS_ONLY if self.incidents_only + else 'full') return cache_filename + FILE_EXTENSION + @abstractmethod def _parse_xml(self, xml): pass -class StationStatusQuery(BaseQuery): - """StationStatusQuery""" - query = STATION_STATUS - cache_expiry_time = 30 - - def __init__(self, form): - super(StationStatusQuery, self).__init__(form) - - def _process_request(self, form): - incidents_only = form.getfirst(INCIDENTS_ONLY) - incidents_only = _parse_boolean(incidents_only) - - request_url = "{0}/{1}".format(BASE_URL, STATION_STATUS) - if incidents_only: - request_url = "{0}/{1}".format(request_url, INCIDENTS_ONLY) - return request_url +class LineStatusQuery(StatusQuery): + """LineStatusQuery""" + query = LINE_STATUS - def _make_filename(self, form): - incidents_only = form.getfirst(INCIDENTS_ONLY) - incidents_only = _parse_boolean(incidents_only) + def _parse_xml(self, xml): + pass - cache_filename = os.path.join('.', BASE_FILE, self.query, - INCIDENTS_ONLY if incidents_only else 'full') - return cache_filename + FILE_EXTENSION +class StationStatusQuery(StatusQuery): + """StationStatusQuery""" + query = STATION_STATUS def _parse_xml(self, xml): pass @@ -326,9 +285,7 @@ class StationListQuery(BaseQuery): """StationListQuery""" query = "stationslist" cache_expiry_time = 2419200 # Four weeks in seconds - - def __init__(self, form): - super(StationListQuery, self).__init__(form) + params = (REQUEST, ) def _process_request(self, form): pass diff --git a/py/status.py b/py/status.py index 95ade21..9ce9220 100755 --- a/py/status.py +++ b/py/status.py @@ -102,6 +102,12 @@ def gethttpheader(cls, code): header = 'HTTP/1.1 {0.code} {0.message}'.format(statuscode) return header + @classmethod + def gethttpstatus(cls, code): + statuscode = cls.getstatuscode(code) + http_status = '{0.code} {0.message}'.format(statuscode) + return http_status + # Exceptions class BaseStatusError(Exception): @@ -109,7 +115,8 @@ def __init__(self, status=None, message=None): self.status = (StatusCodes.getstatuscode(status) or StatusCodes.HTTP_BAD_REQUEST) self.httpheader = StatusCodes.gethttpheader(self.status) - self.message = (message or self.httpheader) + self.httpstatus = StatusCodes.gethttpstatus(self.status) + self.message = message if self.status.canhavebody else None class RequestError(BaseStatusError): diff --git a/py/tfl.py b/py/tfl.py index 9d695a5..2b471d1 100755 --- a/py/tfl.py +++ b/py/tfl.py @@ -3,13 +3,12 @@ from __future__ import print_function from datetime import datetime -import cgi -import cgitb +from status import StatusCodes, RequestError, ResponseError +from wsgiref.handlers import CGIHandler import query -import status - -# cgitb.enable() -cgitb.enable(display=1) +import types +import urlparse +import logging # Queries for parse_args QUERIES = { @@ -20,41 +19,83 @@ query.STATIONS_LIST: query.StationListQuery } -def parse_query(form): - request = form.getfirst(query.REQUEST) - if request: - start_time = datetime.now() - query_class = QUERIES.get(request) - - if query_class: - query_inst = query_class(form) - return query_inst - else: - raise status.RequestError(status.StatusCodes.HTTP_BAD_REQUEST, - "Invalid request '{}'".format(request)) - else: - raise status.RequestError(status.StatusCodes.HTTP_BAD_REQUEST, - "Invalid empty request") - -def print_big(content, buffersize=8192): - content = str(content) - - for l in range(0, len(content) + 1, buffersize): - print(content[l:l + buffersize], end='') - -def main(): +SEQUENCES_TYPE = (set, dict, list, tuple) + +def parse_query(environ): + form = {} + query_string = environ['QUERY_STRING'].lower() + query_params = urlparse.parse_qs(query_string) + query_class = None + + # Resolve the query class to instantiate and return + try: + # Falls back to an empty list so the exception handler catches it + request = query_params.get(query.REQUEST, [])[0] + query_class = QUERIES[request] + except KeyError as ke: + raise RequestError(StatusCodes.HTTP_BAD_REQUEST, + "Invalid request '{}'".format(ke.message) + if ke.message else "Empty request") + except IndexError as ie: + raise RequestError(StatusCodes.HTTP_BAD_REQUEST, "Empty request") + + # Resolve the parameters that exist try: - form = cgi.FieldStorage() - req = parse_query(form) - resp = req.fetch() - - print_big("Content-Type: application/json\n\n" + resp) - except (status.RequestError, status.ResponseError) as re: - print_big("Content-Type: text/html\n" + re.httpheader + "\n") - if re.canhavebody and re.message: - print_big(re.message) + for param in query_class.params: + form[param] = query_params[param][0] + except KeyError as ke: + raise RequestError(StatusCodes.HTTP_BAD_REQUEST, + "Missing parameter '{}'".format(ke.message)) + + logging.info('Request: %s', form) + query_instance = query_class(form) + return query_instance + +def main(environ, start_response): + logging.basicConfig(filename='tfl.py.log', level=logging.DEBUG, + format='%(asctime)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + + status_code = StatusCodes.gethttpstatus(StatusCodes.HTTP_INTERNAL_SERVER_ERROR) + response_headers = [("Content-Type", "application/json; charset=UTF-8")] + response_body = [] + + start_time = datetime.now() + + try: + req = parse_query(environ) + response_body = req.fetch() + status_code = StatusCodes.gethttpstatus(StatusCodes.HTTP_OK) + except (RequestError, ResponseError) as re: + if re.status.iserror: + logging.exception('Error in request or response') + status_code = re.httpstatus + response_body = re.message if re.status.canhavebody else '' + except Exception as e: + logging.exception('Unknown exception processing request') + + end_time = datetime.now() + + try: + # Make sure we're passing a sensible sequence + if not isinstance(response_body, SEQUENCES_TYPE): + response_body = [response_body] + + # Make sure all elements of body are strings and total the length + content_length = 0 + for elem in response_body: + if not isinstance(elem, types.StringTypes): + elem = str(elem) + content_length += len(elem) + response_headers.append(("Content-Length", str(content_length))) + + logging.info('Response: %s; Start time: %s; End time: %s', + status_code, start_time, end_time) + start_response(status_code, response_headers) + return response_body except Exception as e: - cgitb.handler() + logging.exception('Unknown exception sending response') + finally: + logging.shutdown() if __name__ == '__main__': - main() + CGIHandler().run(main)