public
Fork of teepark/django
Description: django project public repo
Homepage: http://www.djangoproject.com/
Clone URL: git://github.com/rfk/django.git
django / django / wsgi / wsgi_to_django.py
100644 537 lines (457 sloc) 21.387 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
from itertools import chain
import sys
from urlparse import urlsplit
try:
    from cStringIO import StringIO
except ImportError:
    from StringIO import StringIO
 
from django.core.handlers.modpython import ModPythonRequest
from django.core.handlers.wsgi import WSGIRequest
from django.http import HttpResponse, QueryDict
from django.utils.text import unescape_entities
from django.utils.encoding import force_unicode
from django.utils.datastructures import MultiValueDict
from django.wsgi.django_to_wsgi import wsgi_app_from_django_view
from django.http import multipartparser as mpp
 
 
def django_view_from_wsgi_app(wsgiapp,script_name=None):
    '''
decorator to create a django view from a WSGI application
'''
    def view(request):
        prefix_data = []
        output_headers = MultiValueDict()
        status_holder = [''] # needs to be accessible from a closure. GAH
 
        def write(data):
            prefix_data.append(data)
 
        def start_response(status, headers, exc_info=None):
            # we actually don't have to do anything with exc_info here, because
            # response headers can't have been sent until at least the response
            # object is returned, which won't happen until the wsgi app is
            # finished executing. so we actually want to do exactly the same
            # thing whether or not exc_info is included.
            status_holder[0] = int(status.split(' ', 1)[0])
            output_headers.clear()
            for name, value in headers:
                output_headers.appendlist(name, value)
            return write
 
        environ = _environ_from_request(request,script_name)
        output_iterable = wsgiapp(environ, start_response)
 
        # the data from all the write() calls followed by
        # the iterable of strings returned from the wsgi app
        fullbody = chain(prefix_data, output_iterable)
 
        response = HttpResponse(fullbody)
        if status_holder[0]:
            response.status_code = status_holder[0]
        for key in output_headers:
            for value in output_headers.getlist(key):
                response[key] = value
 
        return response
    return view
 
def django_view_dec_from_wsgi_middleware(wsgimware):
    def view_decorator(view_func):
        return django_view_from_wsgi_app(
                wsgimware(wsgi_app_from_django_view(view_func)))
    return view_decorator
 
def _environ_from_request(request,script_name=None):
    """Construct a WSGI environment dict from a Django request object.
 
If Django happens to be hosted under a WSGI server, a copy of the
existing environment is used. Otherwise a fresh environ dict is
created and populated, using sensible defaults for any keys that
cannot be determined from the Django request.
 
If the optional argumetn 'script_name' is specified, this is used
as the value of SCRIPT_NAME, with PATH_INFO adjusted accordingly.
"""
    if isinstance(request, WSGIRequest):
        # if django is being hosted by WSGI then we already have an environ
        environ = request.environ.copy()
    else:
        # start with fallbacks
        multithread = multiprocess = True
        protocol = '1.1'
        if isinstance(request, ModPythonRequest):
            # in mod_python we also have ways to get info about the server
            protocol = request._req.protocol
            try:
                from mod_python import apache
                multithread = bool(apache.mpm_query(apache.AP_MPMQ_IS_THREADED))
                multiprocess = bool(apache.mpm_query(apache.AP_MPMQ_IS_FORKED))
            except:
                pass
        path = urlsplit(request.path)
        environ = {
            'SCRIPT_NAME': '',
            'SERVER_NAME': request.META.get('SERVER_NAME'),
            'SERVER_PORT': request.META.get('SERVER_PORT'),
            'CONTENT_TYPE': request.META.get('CONTENT_TYPE'),
            'CONTENT_LENGTH': request.META.get('CONTENT_LENGTH'),
            'REQUEST_METHOD': request.method,
            'SERVER_PROTOCOL': protocol,
            'PATH_INFO': path.path,
            'QUERY_STRING': path.query,
            'wsgi.version': (1, 0),
            'wsgi.url_scheme': path.scheme or 'http',
            'wsgi.errors': sys.stderr,
            'wsgi.multithread': multithread,
            'wsgi.multiprocess': multiprocess,
            'wsgi.run_once': False,
        }
 
        for name, value in request.META.iteritems():
            if name.startswith('HTTP_'):
                environ[name] = value
 
    # adjust SCRIPT_NAME and PATH_INFO if needed
    if script_name is not None:
        if not script_name.startswith(environ["SCRIPT_NAME"]):
            raise ValueError("WSGI environ SCRIPT_NAME cannot be outside the Django root")
        pop_from_path = script_name[len(environ["SCRIPT_NAME"]):]
        if not environ["PATH_INFO"].startswith(pop_from_path):
            raise ValueError("WSGI environ SCRIPT_NAME must be part of the path.")
        environ["SCRIPT_NAME"] = script_name
        environ["PATH_INFO"] = environ["PATH_INFO"][len(pop_from_path):]
 
    # pass through the input stream as efficiently as possible
    environ['wsgi.input'] = _input_from_request(request)
    try:
        size = environ['wsgi.input'].size
    except AttributeError:
        pass
    else:
        environ['CONTENT_LENGTH'] = size
 
    return environ
 
def _input_from_request(request):
    """Construct the WSGI input stream from the request data.
 
If the request body has not been read then the underlying input
stream is passed through to WSGI, to allow efficient streaming
reads. If the stream has already been read (e.g. by some Django
middleware that accesses request.POST) then a simulated stream is
constructed based on the data recorded in the request.
 
Special consideration is given to multipart POST requests, where
Django's file upload handlers need to be correctly invoked as the
input stream is read by the WSGI app. Conversely, if the file
upload has already been processed, the data needs to be passed
into the WSGI app in appropriately-sized chunks.
"""
    already_read = hasattr(request,"_raw_post_data")
    if request.method == "POST":
        content_type = request.META.get("HTTP_CONTENT_TYPE")
        if not content_type:
            content_type = request.META.get("CONTENT_TYPE","")
        if content_type.startswith("multipart"):
            if already_read:
                # reconstruct the input stream from the stored files
                return _input_from_files(request)
            else:
                # parse POST and FILES as the input stream is read
                return _input_from_upload(request)
    if not already_read:
        if isinstance(request, WSGIRequest):
            return request.environ['wsgi.input']
        if isinstance(request, ModPythonRequest):
            return request._req
    return StringIO(request.raw_post_data)
 
 
 
class _InputStreamBase(object):
    """Base class for simulated input streams.
 
This class provides an implementation of read() and friends,
as required for a WSGI input stream. These methods will all
read strings from an underlying iterator self._chunks, which
subclasses are expected to provide.
"""
 
    def __init__(self):
        self._buffer = ""
        self._chunks = []
 
    def read(self,size=None):
        if size is None:
            return "".join(self._chunks)
        if size <= len(self._buffer):
            data = self._buffer[:size]
            self._buffer = self._buffer[size:]
            return data
        data = self._buffer
        while len(data) < size:
            try:
                data = data + self._chunks.next()
            except StopIteration:
                break
        self._buffer = data[size:]
        return data
 
    def readline(self,size=None):
        if size is None:
            size = 100
        data = self.read(size)
        pos = data.find("\n")
        if pos >= 0:
            self._buffer = data[pos+1:] + self._buffer
            data = data[:pos+1]
        return data
 
    def readlines(self,hint):
        lines = []
        line = self.readline()
        while line:
            lines.append(line)
            line = self.readline()
        return lines
 
    def __iter__(self):
        line = self.readline()
        while line:
            yield line
            line = self.readline()
 
 
class _input_from_files(_InputStreamBase):
    """Reconstruct the request body from POST and FILE data.
 
This basically reverses the effect of Django's MultiPartParser,
producing a file-like object that streams the contents of each POST
and FILE variable in rfc2388 format.
 
Since this may not format the data exactly as it was in the original
request body, the original Content-Length header might be made invalid.
The property 'size' is provided to give the appropriate new length.
"""
 
    def __init__(self,request):
        super(_input_from_files,self).__init__()
        self.request = request
        # Use logic inside MultiPartParser to determine boundary & encoding
        parser = mpp.MultiPartParser(request.META,"",request.upload_handlers,request.encoding)
        self._boundary = parser._boundary
        self._encoding = parser._encoding
        self._chunks = self._generate_chunks()
 
    def _generate_contents(self):
        """Generator yielding the contents of this stream.
 
Each item yielded is either a string of an uploaded file object.
This is a separate method so that we can use it to calculate the
'size' property as well as to generate the actual data.
"""
        for name in self.request.POST:
            for val in self.request.POST.getlist(name):
                disp = 'Content-Disposition: form-data; name="%s"' % (name,)
                yield "--" + self._boundary
                yield "\r\n"
                yield disp
                yield "\r\n"
                yield "\r\n"
                yield val.encode(self._encoding)
                yield "\r\n"
        for name in self.request.FILES:
            for val in self.request.FILES.getlist(name):
                disp='Content-Disposition: form-data; name="%s"; filename="%s"'
                disp = disp % (name,val.name,)
                yield "--" + self._boundary
                yield "\r\n"
                yield disp
                yield "\r\n"
                yield "\r\n"
                yield val
                yield "\r\n"
        yield "--%s--\r\n" % (self._boundary,)
        
    def _generate_chunks(self):
        for c in self._generate_contents():
            if isinstance(c,basestring):
                yield c
            else:
                for chunk in c.chunks():
                    yield chunk
 
    def _get_size(self):
        size = 0
        for c in self._generate_contents():
            if isinstance(c,basestring):
                size += len(c)
            else:
                size += c.size
        return size
    size = property(_get_size)
 
 
class _input_from_upload(_InputStreamBase):
    """Construct a WSGI input stream from an unprocessed Django request.
 
This class provides a readable file-like interface to the input data
of a Django request object, when the input data has *not* already been
processed by Django.
 
The trick here is activating Django's file upload handlers as the
data is read - this is harder than it sounds, because Django processes
uploads in a push-based fashion while the stream needs to access data
in a pull-based fashion.
 
The class _TeeMulitPartParser re-implements the main upload parsing
loop from django.http.multipartparser, while also yielding chunks of
the original data after they have been processed. These are simply
concatenated to form the WSGI input stream.
"""
 
    def __init__(self,request):
        super(_input_from_upload,self).__init__()
        self.request = request
        self._chunks = self._generate_chunks()
        # Trick the request into calling our own parse_file_upload
        # method with the raw input data. This saves trying to poke
        # around in its private members to get at the stream.
        request.parse_file_upload = self.parse_file_upload
        len(request.FILES)
 
    def parse_file_upload(self,META,post_data):
        """Replacement for self.request.parse_file_upload.
 
Rather than immediately processing the upload in a push-based manner,
this replacement method simply creates and records the parser object.
Later calls to read() etc will invoke the actual parsing.
"""
        handlers = self.request.upload_handlers
        encoding = self.request.encoding
        self.parser = _TeeMultiPartParser(META,post_data,handlers,encoding)
        self.parser._post = QueryDict('', mutable=True)
        self.parser._files = MultiValueDict()
        return (self.parser._post,self.parser._files)
 
    def _generate_chunks(self):
        yield ""
        for chunk in self.parser.tee_parse():
            yield chunk
 
 
class _TeeMultiPartParser(mpp.MultiPartParser):
    """MultiPartParser sublcass that can tee the original data.
 
This subclass re-implements the logic in MultiPartParser.parse(),
with the additional ability to yield each chunk of the original
stream data after it has been processed.
"""
 
    def tee_parse(self):
        # Copied almost verbatim from MultiPartParser.parse().
        # The only changes are to yield chunks of the original data
        # after they have been processed. It would be nicer to
        # patch MultiPartParser to support pull-based parsing, but
        # I didn't want to touch such a core component at this stage.
        encoding = self._encoding
        handlers = self._upload_handlers
        input_data = _TeeStream(self._input_data)
        limited_input_data = mpp.LimitBytes(input_data, self._content_length)
 
        # See if the handler wants to take care of the parsing.
        for handler in handlers:
            result = handler.handle_raw_input(limited_input_data,
                                              self._meta,
                                              self._content_length,
                                              self._boundary,
                                              encoding)
            if result is not None:
                for k in result[0]:
                    self._post.setlist(k,result[0].getlist(k))
                for k in result[1]:
                    self._files.setlist(k,result[1].getlist(k))
                yield input_data.tee_read()
                return
 
        # Instantiate the parser and stream:
        stream = mpp.LazyStream(mpp.ChunkIter(limited_input_data, self._chunk_size))
 
        # Whether or not to signal a file-completion at the beginning of the loop.
        old_field_name = None
        counters = [0] * len(handlers)
 
        try:
            for item_type, meta_data, field_stream in mpp.Parser(stream, self._boundary):
                  
                if old_field_name:
                    # We run this at the beginning of the next loop
                    # since we cannot be sure a file is complete until
                    # we hit the next boundary/part of the multipart content.
                    self.handle_file_complete(old_field_name, counters)
                    old_field_name = None
 
                try:
                    disposition = meta_data['content-disposition'][1]
                    field_name = disposition['name'].strip()
                except (KeyError, IndexError, AttributeError):
                    continue
 
                transfer_encoding = meta_data.get('content-transfer-encoding')
                field_name = force_unicode(field_name, encoding, errors='replace')
                if item_type == mpp.FIELD:
                    # This is a post field, we can just set it in the post
                    if transfer_encoding == 'base64':
                        raw_data = field_stream.read()
                        try:
                            data = str(raw_data).decode('base64')
                        except:
                            data = raw_data
                    else:
                        data = field_stream.read()
 
                    self._post.appendlist(field_name,
                                          force_unicode(data, encoding, errors='replace'))
                    yield input_data.tee_read()
 
                elif item_type == mpp.FILE:
                    # This is a file, use the handler...
                    file_name = disposition.get('filename')
                    if not file_name:
                        continue
                    file_name = force_unicode(file_name, encoding, errors='replace')
                    file_name = self.IE_sanitize(unescape_entities(file_name))
 
                    content_type = meta_data.get('content-type', ('',))[0].strip()
                    try:
                        charset = meta_data.get('content-type', (0,{}))[1].get('charset', None)
                    except:
                        charset = None
 
                    try:
                        content_length = int(meta_data.get('content-length')[0])
                    except (IndexError, TypeError, ValueError):
                        content_length = None
 
                    counters = [0] * len(handlers)
                    try:
                        for handler in handlers:
                            try:
                                handler.new_file(field_name, file_name,
                                                 content_type, content_length,
                                                 charset)
                            except mpp.StopFutureHandlers:
                                break
 
                        yield input_data.tee_read()
                        for chunk in field_stream:
                            if transfer_encoding == 'base64':
                                # We only special-case base64 transfer encoding
                                try:
                                    chunk = str(chunk).decode('base64')
                                except Exception, e:
                                    # Since this is only a chunk, any error is an unfixable error.
                                    raise mpp.MultiPartParserError("Could not decode base64 data: %r" % e)
 
                            for i, handler in enumerate(handlers):
                                chunk_length = len(chunk)
                                chunk = handler.receive_data_chunk(chunk,
                                                                   counters[i])
                                counters[i] += chunk_length
                                if chunk is None:
                                    # If the chunk received by the handler is None, then don't continue.
                                    break
                            yield input_data.tee_read()
 
                    except mpp.SkipFile, e:
                        # Just use up the rest of this file...
                        for chunk in _tee_exhaust(field_stream,input_data):
                            yield chunk
                    else:
                        # Handle file upload completions on next iteration.
                        old_field_name = field_name
                else:
                    # If this is neither a FIELD or a FILE, just exhaust the stream.
                    for chunk in _tee_exhaust(field_stream,input_data):
                        yield chunk
        except mpp.StopUpload, e:
            if not e.connection_reset:
                for chunk in _tee_exhaust(limited_input_data,input_data):
                    yield chunk
        else:
            # Make sure that the request data is all fed
            for chunk in _tee_exhaust(limited_input_data,input_data):
                yield chunk
 
        # Signal that the upload has completed.
        for handler in handlers:
            retval = handler.upload_complete()
            if retval:
                break
 
 
class _TeeStream(object):
    """Readable stream that copies its data to another destination.
 
As data is read from the stream, a copy is collected in a buffer.
Calling the method tee_read() returns the contents of this buffer
and resets it to empty.
"""
 
    def __init__(self,stream):
        self._stream = stream
        self._tee = []
 
    def read(self,size=None):
        data = self._stream.read()
        self._tee.append(data)
        return data
 
    def tee_read(self):
        data = "".join(self._tee)
        self._tee = []
        return data
 
def _tee_exhaust(stream_or_iterable,tee_stream):
    """
Completely exhausts an iterator or stream.
 
For each chunk reaf from the input stream, yield the next data from
the tee_stream.
"""
    iterator = None
    try:
        iterator = iter(stream_or_iterable)
    except TypeError:
        iterator = mpp.ChunkIter(stream_or_iterable, 16384)
 
    if iterator is None:
        raise mpp.MultiPartParserError('multipartparser.exhaust() was passed a non-iterable or stream parameter')
 
    for __ in iterator:
        yield tee_stream.tee_read()