from itertools import chain
import sys
from urlparse import urlsplit
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from django.core.handlers.modpython import ModPythonRequest
from django.core.handlers.wsgi import WSGIRequest
from django.http import HttpResponse, QueryDict
from django.utils.text import unescape_entities
from django.utils.encoding import force_unicode
from django.utils.datastructures import MultiValueDict
from django.wsgi.django_to_wsgi import wsgi_app_from_django_view
from django.http import multipartparser as mpp
def django_view_from_wsgi_app(wsgiapp,script_name=None):
'''
decorator to create a django view from a WSGI application
'''
def view(request):
prefix_data = []
output_headers = MultiValueDict()
status_holder = [''] # needs to be accessible from a closure. GAH
def write(data):
prefix_data.append(data)
def start_response(status, headers, exc_info=None):
# we actually don't have to do anything with exc_info here, because
# response headers can't have been sent until at least the response
# object is returned, which won't happen until the wsgi app is
# finished executing. so we actually want to do exactly the same
# thing whether or not exc_info is included.
status_holder[0] = int(status.split(' ', 1)[0])
output_headers.clear()
for name, value in headers:
output_headers.appendlist(name, value)
return write
environ = _environ_from_request(request,script_name)
output_iterable = wsgiapp(environ, start_response)
# the data from all the write() calls followed by
# the iterable of strings returned from the wsgi app
fullbody = chain(prefix_data, output_iterable)
response = HttpResponse(fullbody)
if status_holder[0]:
response.status_code = status_holder[0]
for key in output_headers:
for value in output_headers.getlist(key):
response[key] = value
return response
return view
def django_view_dec_from_wsgi_middleware(wsgimware):
def view_decorator(view_func):
return django_view_from_wsgi_app(
wsgimware(wsgi_app_from_django_view(view_func)))
return view_decorator
def _environ_from_request(request,script_name=None):
"""Construct a WSGI environment dict from a Django request object.
If Django happens to be hosted under a WSGI server, a copy of the
existing environment is used. Otherwise a fresh environ dict is
created and populated, using sensible defaults for any keys that
cannot be determined from the Django request.
If the optional argumetn 'script_name' is specified, this is used
as the value of SCRIPT_NAME, with PATH_INFO adjusted accordingly.
"""
if isinstance(request, WSGIRequest):
# if django is being hosted by WSGI then we already have an environ
environ = request.environ.copy()
else:
# start with fallbacks
multithread = multiprocess = True
protocol = '1.1'
if isinstance(request, ModPythonRequest):
# in mod_python we also have ways to get info about the server
protocol = request._req.protocol
try:
from mod_python import apache
multithread = bool(apache.mpm_query(apache.AP_MPMQ_IS_THREADED))
multiprocess = bool(apache.mpm_query(apache.AP_MPMQ_IS_FORKED))
except:
pass
path = urlsplit(request.path)
environ = {
'SCRIPT_NAME': '',
'SERVER_NAME': request.META.get('SERVER_NAME'),
'SERVER_PORT': request.META.get('SERVER_PORT'),
'CONTENT_TYPE': request.META.get('CONTENT_TYPE'),
'CONTENT_LENGTH': request.META.get('CONTENT_LENGTH'),
'REQUEST_METHOD': request.method,
'SERVER_PROTOCOL': protocol,
'PATH_INFO': path.path,
'QUERY_STRING': path.query,
'wsgi.version': (1, 0),
'wsgi.url_scheme': path.scheme or 'http',
'wsgi.errors': sys.stderr,
'wsgi.multithread': multithread,
'wsgi.multiprocess': multiprocess,
'wsgi.run_once': False,
}
for name, value in request.META.iteritems():
if name.startswith('HTTP_'):
environ[name] = value
# adjust SCRIPT_NAME and PATH_INFO if needed
if script_name is not None:
if not script_name.startswith(environ["SCRIPT_NAME"]):
raise ValueError("WSGI environ SCRIPT_NAME cannot be outside the Django root")
pop_from_path = script_name[len(environ["SCRIPT_NAME"]):]
if not environ["PATH_INFO"].startswith(pop_from_path):
raise ValueError("WSGI environ SCRIPT_NAME must be part of the path.")
environ["SCRIPT_NAME"] = script_name
environ["PATH_INFO"] = environ["PATH_INFO"][len(pop_from_path):]
# pass through the input stream as efficiently as possible
environ['wsgi.input'] = _input_from_request(request)
try:
size = environ['wsgi.input'].size
except AttributeError:
pass
else:
environ['CONTENT_LENGTH'] = size
return environ
def _input_from_request(request):
"""Construct the WSGI input stream from the request data.
If the request body has not been read then the underlying input
stream is passed through to WSGI, to allow efficient streaming
reads. If the stream has already been read (e.g. by some Django
middleware that accesses request.POST) then a simulated stream is
constructed based on the data recorded in the request.
Special consideration is given to multipart POST requests, where
Django's file upload handlers need to be correctly invoked as the
input stream is read by the WSGI app. Conversely, if the file
upload has already been processed, the data needs to be passed
into the WSGI app in appropriately-sized chunks.
"""
already_read = hasattr(request,"_raw_post_data")
if request.method == "POST":
content_type = request.META.get("HTTP_CONTENT_TYPE")
if not content_type:
content_type = request.META.get("CONTENT_TYPE","")
if content_type.startswith("multipart"):
if already_read:
# reconstruct the input stream from the stored files
return _input_from_files(request)
else:
# parse POST and FILES as the input stream is read
return _input_from_upload(request)
if not already_read:
if isinstance(request, WSGIRequest):
return request.environ['wsgi.input']
if isinstance(request, ModPythonRequest):
return request._req
return StringIO(request.raw_post_data)
class _InputStreamBase(object):
"""Base class for simulated input streams.
This class provides an implementation of read() and friends,
as required for a WSGI input stream. These methods will all
read strings from an underlying iterator self._chunks, which
subclasses are expected to provide.
"""
def __init__(self):
self._buffer = ""
self._chunks = []
def read(self,size=None):
if size is None:
return "".join(self._chunks)
if size <= len(self._buffer):
data = self._buffer[:size]
self._buffer = self._buffer[size:]
return data
data = self._buffer
while len(data) < size:
try:
data = data + self._chunks.next()
except StopIteration:
break
self._buffer = data[size:]
return data
def readline(self,size=None):
if size is None:
size = 100
data = self.read(size)
pos = data.find("\n")
if pos >= 0:
self._buffer = data[pos+1:] + self._buffer
data = data[:pos+1]
return data
def readlines(self,hint):
lines = []
line = self.readline()
while line:
lines.append(line)
line = self.readline()
return lines
def __iter__(self):
line = self.readline()
while line:
yield line
line = self.readline()
class _input_from_files(_InputStreamBase):
"""Reconstruct the request body from POST and FILE data.
This basically reverses the effect of Django's MultiPartParser,
producing a file-like object that streams the contents of each POST
and FILE variable in rfc2388 format.
Since this may not format the data exactly as it was in the original
request body, the original Content-Length header might be made invalid.
The property 'size' is provided to give the appropriate new length.
"""
def __init__(self,request):
super(_input_from_files,self).__init__()
self.request = request
# Use logic inside MultiPartParser to determine boundary & encoding
parser = mpp.MultiPartParser(request.META,"",request.upload_handlers,request.encoding)
self._boundary = parser._boundary
self._encoding = parser._encoding
self._chunks = self._generate_chunks()
def _generate_contents(self):
"""Generator yielding the contents of this stream.
Each item yielded is either a string of an uploaded file object.
This is a separate method so that we can use it to calculate the
'size' property as well as to generate the actual data.
"""
for name in self.request.POST:
for val in self.request.POST.getlist(name):
disp = 'Content-Disposition: form-data; name="%s"' % (name,)
yield "--" + self._boundary
yield "\r\n"
yield disp
yield "\r\n"
yield "\r\n"
yield val.encode(self._encoding)
yield "\r\n"
for name in self.request.FILES:
for val in self.request.FILES.getlist(name):
disp='Content-Disposition: form-data; name="%s"; filename="%s"'
disp = disp % (name,val.name,)
yield "--" + self._boundary
yield "\r\n"
yield disp
yield "\r\n"
yield "\r\n"
yield val
yield "\r\n"
yield "--%s--\r\n" % (self._boundary,)
def _generate_chunks(self):
for c in self._generate_contents():
if isinstance(c,basestring):
yield c
else:
for chunk in c.chunks():
yield chunk
def _get_size(self):
size = 0
for c in self._generate_contents():
if isinstance(c,basestring):
size += len(c)
else:
size += c.size
return size
size = property(_get_size)
class _input_from_upload(_InputStreamBase):
"""Construct a WSGI input stream from an unprocessed Django request.
This class provides a readable file-like interface to the input data
of a Django request object, when the input data has *not* already been
processed by Django.
The trick here is activating Django's file upload handlers as the
data is read - this is harder than it sounds, because Django processes
uploads in a push-based fashion while the stream needs to access data
in a pull-based fashion.
The class _TeeMulitPartParser re-implements the main upload parsing
loop from django.http.multipartparser, while also yielding chunks of
the original data after they have been processed. These are simply
concatenated to form the WSGI input stream.
"""
def __init__(self,request):
super(_input_from_upload,self).__init__()
self.request = request
self._chunks = self._generate_chunks()
# Trick the request into calling our own parse_file_upload
# method with the raw input data. This saves trying to poke
# around in its private members to get at the stream.
request.parse_file_upload = self.parse_file_upload
len(request.FILES)
def parse_file_upload(self,META,post_data):
"""Replacement for self.request.parse_file_upload.
Rather than immediately processing the upload in a push-based manner,
this replacement method simply creates and records the parser object.
Later calls to read() etc will invoke the actual parsing.
"""
handlers = self.request.upload_handlers
encoding = self.request.encoding
self.parser = _TeeMultiPartParser(META,post_data,handlers,encoding)
self.parser._post = QueryDict('', mutable=True)
self.parser._files = MultiValueDict()
return (self.parser._post,self.parser._files)
def _generate_chunks(self):
yield ""
for chunk in self.parser.tee_parse():
yield chunk
class _TeeMultiPartParser(mpp.MultiPartParser):
"""MultiPartParser sublcass that can tee the original data.
This subclass re-implements the logic in MultiPartParser.parse(),
with the additional ability to yield each chunk of the original
stream data after it has been processed.
"""
def tee_parse(self):
# Copied almost verbatim from MultiPartParser.parse().
# The only changes are to yield chunks of the original data
# after they have been processed. It would be nicer to
# patch MultiPartParser to support pull-based parsing, but
# I didn't want to touch such a core component at this stage.
encoding = self._encoding
handlers = self._upload_handlers
input_data = _TeeStream(self._input_data)
limited_input_data = mpp.LimitBytes(input_data, self._content_length)
# See if the handler wants to take care of the parsing.
for handler in handlers:
result = handler.handle_raw_input(limited_input_data,
self._meta,
self._content_length,
self._boundary,
encoding)
if result is not None:
for k in result[0]:
self._post.setlist(k,result[0].getlist(k))
for k in result[1]:
self._files.setlist(k,result[1].getlist(k))
yield input_data.tee_read()
return
# Instantiate the parser and stream:
stream = mpp.LazyStream(mpp.ChunkIter(limited_input_data, self._chunk_size))
# Whether or not to signal a file-completion at the beginning of the loop.
old_field_name = None
counters = [0] * len(handlers)
try:
for item_type, meta_data, field_stream in mpp.Parser(stream, self._boundary):
if old_field_name:
# We run this at the beginning of the next loop
# since we cannot be sure a file is complete until
# we hit the next boundary/part of the multipart content.
self.handle_file_complete(old_field_name, counters)
old_field_name = None
try:
disposition = meta_data['content-disposition'][1]
field_name = disposition['name'].strip()
except (KeyError, IndexError, AttributeError):
continue
transfer_encoding = meta_data.get('content-transfer-encoding')
field_name = force_unicode(field_name, encoding, errors='replace')
if item_type == mpp.FIELD:
# This is a post field, we can just set it in the post
if transfer_encoding == 'base64':
raw_data = field_stream.read()
try:
data = str(raw_data).decode('base64')
except:
data = raw_data
else:
data = field_stream.read()
self._post.appendlist(field_name,
force_unicode(data, encoding, errors='replace'))
yield input_data.tee_read()
elif item_type == mpp.FILE:
# This is a file, use the handler...
file_name = disposition.get('filename')
if not file_name:
continue
file_name = force_unicode(file_name, encoding, errors='replace')
file_name = self.IE_sanitize(unescape_entities(file_name))
content_type = meta_data.get('content-type', ('',))[0].strip()
try:
charset = meta_data.get('content-type', (0,{}))[1].get('charset', None)
except:
charset = None
try:
content_length = int(meta_data.get('content-length')[0])
except (IndexError, TypeError, ValueError):
content_length = None
counters = [0] * len(handlers)
try:
for handler in handlers:
try:
handler.new_file(field_name, file_name,
content_type, content_length,
charset)
except mpp.StopFutureHandlers:
break
yield input_data.tee_read()
for chunk in field_stream:
if transfer_encoding == 'base64':
# We only special-case base64 transfer encoding
try:
chunk = str(chunk).decode('base64')
except Exception, e:
# Since this is only a chunk, any error is an unfixable error.
raise mpp.MultiPartParserError("Could not decode base64 data: %r" % e)
for i, handler in enumerate(handlers):
chunk_length = len(chunk)
chunk = handler.receive_data_chunk(chunk,
counters[i])
counters[i] += chunk_length
if chunk is None:
# If the chunk received by the handler is None, then don't continue.
break
yield input_data.tee_read()
except mpp.SkipFile, e:
# Just use up the rest of this file...
for chunk in _tee_exhaust(field_stream,input_data):
yield chunk
else:
# Handle file upload completions on next iteration.
old_field_name = field_name
else:
# If this is neither a FIELD or a FILE, just exhaust the stream.
for chunk in _tee_exhaust(field_stream,input_data):
yield chunk
except mpp.StopUpload, e:
if not e.connection_reset:
for chunk in _tee_exhaust(limited_input_data,input_data):
yield chunk
else:
# Make sure that the request data is all fed
for chunk in _tee_exhaust(limited_input_data,input_data):
yield chunk
# Signal that the upload has completed.
for handler in handlers:
retval = handler.upload_complete()
if retval:
break
class _TeeStream(object):
"""Readable stream that copies its data to another destination.
As data is read from the stream, a copy is collected in a buffer.
Calling the method tee_read() returns the contents of this buffer
and resets it to empty.
"""
def __init__(self,stream):
self._stream = stream
self._tee = []
def read(self,size=None):
data = self._stream.read()
self._tee.append(data)
return data
def tee_read(self):
data = "".join(self._tee)
self._tee = []
return data
def _tee_exhaust(stream_or_iterable,tee_stream):
"""
Completely exhausts an iterator or stream.
For each chunk reaf from the input stream, yield the next data from
the tee_stream.
"""
iterator = None
try:
iterator = iter(stream_or_iterable)
except TypeError:
iterator = mpp.ChunkIter(stream_or_iterable, 16384)
if iterator is None:
raise mpp.MultiPartParserError('multipartparser.exhaust() was passed a non-iterable or stream parameter')
for __ in iterator:
yield tee_stream.tee_read()