add change notes, reduce spurious diffs

Pylons · Jan 28, 2012 · 32764a0 · 32764a0
1 parent 3517083
commit 32764a0
Show file tree

Hide file tree

Showing 2 changed files with 75 additions and 30 deletions.
diff --git a/docs/news.txt b/docs/news.txt
@@ -13,6 +13,53 @@ Next release
 * Fix corner-case ``response.status_int`` and ``response.status`` mutation
   bug on py3 (use explicit floor division).
 
+* Backwards incompatibility: Request and BaseRequest objects now return
+  Unicode for ``request.path_info`` and ``request.script_name`` under Python
+  2.  Rationale: the legacy behavior of returning the respective raw environ
+  values was nonsensical on Python 3.  Working with non-ascii encoded environ
+  variables as raw WSGI values under Python 3 makes no sense, as PEP 3333
+  specifies that environ variables are bytes-tunneled-as-latin-1 strings, and
+  therefore their values are not sensible to use if there are high-order
+  characters in them.
+
+  If you don't care about Python 3, and you need backwards compatibility, to
+  get legacy behavior of returning bytes on Python 2 for these attributes,
+  use ``webob.LegacyRequest`` instead of ``webob.Request``.  Although it's
+  possible to use ``webob.LegacyRequest`` under Python 3, it makes no sense
+  at all to do so, so don't.
+
+* The above backwards incompatibility fixed nonsensical behavior of
+  ``request.host_url``, ``request.application_url``, ``request.path_url``,
+  ``request.path``, ``request.path_qs``, ``request.url``,
+  ``request.relative_url``, ``request.path_info_peek``,
+  ``request.path_info_pop`` under Python 3.
+
+* The WebOb Request object now respects two WSGI environment variables:
+  ``webob.url_encoding`` and ``webob.remote_user_encoding``.
+  ``webob.url_encoding`` will be used to decode the raw WSGI PATH_INFO and
+  SCRIPT_NAME variables when the ``request.path_info`` API is used.
+  ``webob.remote_user_encoding`` will be used to decode the raw WSGI
+  REMOTE_USER variable when the ``request.remote_user`` API is used.
+
+* Request objects now accept two additional constructor parameters:
+  ``url_encoding`` and ``remote_user_encoding``.  These will be used to
+  decode PATH_INFO/SCRIPT_NAME and REMOTE_USER respectively from their
+  WSGI-encoded values.
+
+  Note that passing ``url_encoding`` will cause the WSGI environment variable
+  ``webob.url_encoding`` to be set, and passing ``remote_user_encoding`` will
+  cause the WSGI environment variable ``webob.remote_user_encoding`` to be
+  set.
+
+* Fix ``webob.response._request_uri`` internal function to generate sensible
+  request URI under Python 3.  This fixed a problem under Python 3 if you
+  were using non-absolute Location headers in responses.
+
+* Internal: a cached version of EnvironHeaders is no longer kept as a
+  _headers attribute of the request when ``request.headers`` is used, because
+  changes to EnvironHeaders meant the caching would have implied a circular
+  reference.
+
 1.2b2
 ------
 

diff --git a/webob/request.py b/webob/request.py
@@ -208,7 +208,7 @@ def decode(self, charset=None, errors='strict'):
 
         new_content_type = CHARSET_RE.sub('; charset="UTF-8"',
                                           self._content_type_raw)
-        content_type = native_(self.content_type)
+        content_type = self.content_type
         r = self.__class__(
             self.environ.copy(),
             query_string=t.transcode_query(self.query_string),
@@ -228,7 +228,7 @@ def decode(self, charset=None, errors='strict'):
             fs = cgi.FieldStorage(fp=self.body_file,
                                   environ=fs_environ,
                                   keep_blank_values=True,
-                                  encoding=native_(charset),
+                                  encoding=charset,
                                   errors=errors)
         else:
             fs = cgi.FieldStorage(fp=self.body_file,
@@ -308,7 +308,7 @@ def body_file_seekable(self):
         return self.body_file_raw
 
     url_encoding = environ_getter('webob.url_encoding', 'UTF-8')
-    remote_user_encoding = environ_getter('webob.remote_user_encoding', 'UTF-8')
+    remote_user_encoding = environ_getter('webob.remote_user_encoding', 'ascii')
     scheme = environ_getter('wsgi.url_scheme')
     method = environ_getter('REQUEST_METHOD', 'GET')
     http_version = environ_getter('SERVER_PROTOCOL')
@@ -345,7 +345,7 @@ def _content_type__get(self):
         return self._content_type_raw.split(';', 1)[0]
     def _content_type__set(self, value=None):
         if value is not None:
-            value = native_(value)
+            value = str(value)
             if ';' not in value:
                 content_type = self._content_type_raw
                 if ';' in content_type:
@@ -393,11 +393,12 @@ def client_addr(self):
            ``HTTP_X_FORWARDED_FOR`` has the correct values.  The WSGI server
            must be behind a trusted proxy for this to be true.
         """
-        xff = self.environ.get('HTTP_X_FORWARDED_FOR', None)
+        e = self.environ
+        xff = e.get('HTTP_X_FORWARDED_FOR', None)
         if xff is not None:
             addr = xff.split(',')[0].strip()
         else:
-            addr = self.environ.get('REMOTE_ADDR', None)
+            addr = e.get('REMOTE_ADDR', None)
         return addr
 
     @property
@@ -412,36 +413,38 @@ def host_port(self):
         the environ at all, this attribute will return the value of the
         ``SERVER_PORT`` header (which is guaranteed to be present).
         """
-        host = self.environ.get('HTTP_HOST', None)
+        e = self.environ
+        host = e.get('HTTP_HOST', None)
         if host is not None:
             if ':' in host:
                 host, port = host.split(':', 1)
             else:
-                url_scheme = self.environ['wsgi.url_scheme']
+                url_scheme = e['wsgi.url_scheme']
                 if url_scheme == 'https':
                     port = '443'
                 else:
                     port = '80'
         else:
-            port = self.environ['SERVER_PORT']
+            port = e['SERVER_PORT']
         return port
 
     @property
     def host_url(self):
         """
         The URL through the host (no path)
         """
-        scheme = self.environ.get('wsgi.url_scheme')
+        e = self.environ
+        scheme = e.get('wsgi.url_scheme')
         url = scheme + '://'
-        host = self.environ.get('HTTP_HOST', None)
+        host = e.get('HTTP_HOST', None)
         if host is not None:
             if ':' in host:
                 host, port = host.split(':', 1)
             else:
                 port = None
         else:
-            host = self.environ.get('SERVER_NAME')
-            port = self.environ.get('SERVER_PORT')
+            host = e.get('SERVER_NAME')
+            port = e.get('SERVER_PORT')
         if scheme == 'https':
             if port == '443':
                 port = None
@@ -484,7 +487,7 @@ def path_qs(self):
         The path of the request, without host but with query string
         """
         path = self.path
-        qs = self.encget('QUERY_STRING', None)
+        qs = self.environ.get('QUERY_STRING', None)
         if qs:
             path += '?' + qs
         return path
@@ -495,7 +498,7 @@ def url(self):
         The full request URL, including QUERY_STRING
         """
         url = self.path_url
-        qs = self.encget('QUERY_STRING', None)
+        qs = self.environ.get('QUERY_STRING', None)
         if qs:
             url += '?' + qs
         return url
@@ -649,12 +652,10 @@ def is_xhr(self):
 
     def _host__get(self):
         """Host name provided in HTTP_HOST, with fall-back to SERVER_NAME"""
-        host = self.environ.get('HTTP_HOST')
-        if host is None:
-            h = self.environ.get('SERVER_NAME')
-            p = self.environ.get('SERVER_PORT')
-            host = h + ':' + p
-        return host
+        if 'HTTP_HOST' in self.environ:
+            return self.environ['HTTP_HOST']
+        else:
+            return '%(SERVER_NAME)s:%(SERVER_PORT)s' % self.environ
     def _host__set(self, value):
         self.environ['HTTP_HOST'] = value
     def _host__del(self):
@@ -987,7 +988,7 @@ def _cache_control__get(self):
         <http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9>`_)
         """
         env = self.environ
-        value = self.environ.get('HTTP_CACHE_CONTROL', '')
+        value = env.get('HTTP_CACHE_CONTROL', '')
         cache_header, cache_obj = env.get('webob._cache_control', (None, None))
         if cache_obj is not None and cache_header == value:
             return cache_obj
@@ -1004,10 +1005,10 @@ def _cache_control__set(self, value):
             value = CacheControl(value, type='request')
         if isinstance(value, CacheControl):
             str_value = str(value)
-            self.environ['HTTP_CACHE_CONTROL'] = str_value
+            env['HTTP_CACHE_CONTROL'] = str_value
             env['webob._cache_control'] = (str_value, value)
         else:
-            self.environ['HTTP_CACHE_CONTROL'] = value
+            env['HTTP_CACHE_CONTROL'] = value
             env['webob._cache_control'] = (None, None)
 
     def _cache_control__del(self):
@@ -1076,15 +1077,13 @@ def as_bytes(self, skip_body=False):
         host = self.host_url
         assert url.startswith(host)
         url = url[len(host):]
-        method = self.method
-        http_version = self.http_version
-        parts = [bytes_('%s %s %s' % (method, url, http_version))]
+        parts = [bytes_('%s %s %s' % (self.method, url, self.http_version))]
         #self.headers.setdefault('Host', self.host)
 
         # acquire body before we handle headers so that
         # content-length will be set
         body = None
-        if self.method in ('POST', 'PUT'):
+        if self.method in ('PUT', 'POST'):
             if skip_body > 1:
                 if len(self.body) > skip_body:
                     body = bytes_('<body skipped (len=%s)>' % len(self.body))
@@ -1186,7 +1185,7 @@ def from_file(cls, fp):
             if hname in r.headers:
                 hval = r.headers[hname] + ', ' + hval
             r.headers[hname] = hval
-        if r.method in ('POST', 'PUT'):
+        if r.method in ('PUT', 'POST'):
             clen = r.content_length
             if clen is None:
                 body = fp.read()
@@ -1590,7 +1589,6 @@ def _encode_multipart(vars, content_type, fout=None):
         return content_type, f.getvalue()
 
 def detect_charset(ctype):
-    ctype = native_(ctype)
     m = CHARSET_RE.search(ctype)
     if m:
         return m.group(1).strip('"').strip()