Browse files

Reverted the pulling out of various middleware:

RateLimit
StaticWeb
TempURL/FormPOST

Change-Id: I988e93e6f4aacb817a2e354d43a04e47516fdf88
  • Loading branch information...
1 parent 3d3ed34 commit 1c3b75c29140939350807bf0e5faa2d35e7257a8 @gholt gholt committed May 16, 2012
View
70 bin/swift-form-signature
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+
+import hmac
+from hashlib import sha1
+from os.path import basename
+from sys import argv, exit
+from time import time
+
+
+if __name__ == '__main__':
+ if len(argv) != 7:
+ prog = basename(argv[0])
+ print 'Syntax: %s <path> <redirect> <max_file_size> ' \
+ '<max_file_count> <seconds> <key>' % prog
+ print
+ print 'Where:'
+ print ' <path> The prefix to use for form uploaded'
+ print ' objects. For example:'
+ print ' /v1/account/container/object_prefix_ would'
+ print ' ensure all form uploads have that path'
+ print ' prepended to the browser-given file name.'
+ print ' <redirect> The URL to redirect the browser to after'
+ print ' the uploads have completed.'
+ print ' <max_file_size> The maximum file size per file uploaded.'
+ print ' <max_file_count> The maximum number of uploaded files'
+ print ' allowed.'
+ print ' <seconds> The number of seconds from now to allow'
+ print ' the form post to begin.'
+ print ' <key> The X-Account-Meta-Temp-URL-Key for the'
+ print ' account.'
+ print
+ print 'Example output:'
+ print ' Expires: 1323842228'
+ print ' Signature: 18de97e47345a82c4dbfb3b06a640dbb'
+ exit(1)
+ path, redirect, max_file_size, max_file_count, seconds, key = argv[1:]
+ try:
+ max_file_size = int(max_file_size)
+ except ValueError:
+ max_file_size = -1
+ if max_file_size < 0:
+ print 'Please use a <max_file_size> value greater than or equal to 0.'
+ exit(1)
+ try:
+ max_file_count = int(max_file_count)
+ except ValueError:
+ max_file_count = 0
+ if max_file_count < 1:
+ print 'Please use a positive <max_file_count> value.'
+ exit(1)
+ try:
+ expires = int(time() + int(seconds))
+ except ValueError:
+ expires = 0
+ if expires < 1:
+ print 'Please use a positive <seconds> value.'
+ exit(1)
+ parts = path.split('/', 4)
+ # Must be four parts, ['', 'v1', 'a', 'c'], must be a v1 request, have
+ # account and container values, and optionally have an object prefix.
+ if len(parts) < 4 or parts[0] or parts[1] != 'v1' or not parts[2] or \
+ not parts[3]:
+ print '<path> must point to a container at least.'
+ print 'For example: /v1/account/container'
+ print ' Or: /v1/account/container/object_prefix'
+ exit(1)
+ sig = hmac.new(key, '%s\n%s\n%s\n%s\n%s' % (path, redirect, max_file_size,
+ max_file_count, expires), sha1).hexdigest()
+ print ' Expires:', expires
+ print 'Signature:', sig
View
59 bin/swift-temp-url
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+
+import hmac
+from hashlib import sha1
+from os.path import basename
+from sys import argv, exit
+from time import time
+
+
+if __name__ == '__main__':
+ if len(argv) != 5:
+ prog = basename(argv[0])
+ print 'Syntax: %s <method> <seconds> <path> <key>' % prog
+ print
+ print 'Where:'
+ print ' <method> The method to allow, GET or PUT.'
+ print ' Note: HEAD will also be allowed.'
+ print ' <seconds> The number of seconds from now to allow requests.'
+ print ' <path> The full path to the resource.'
+ print ' Example: /v1/AUTH_account/c/o'
+ print ' <key> The X-Account-Meta-Temp-URL-Key for the account.'
+ print
+ print 'Example output:'
+ print ' /v1/AUTH_account/c/o?temp_url_sig=34d49efc32fe6e3082e411e' \
+ 'eeb85bd8a&temp_url_expires=1323482948'
+ print
+ print 'This can be used to form a URL to give out for the access '
+ print 'allowed. For example:'
+ print ' echo https://swift-cluster.example.com`%s GET 60 ' \
+ '/v1/AUTH_account/c/o mykey`' % prog
+ print
+ print 'Might output:'
+ print ' https://swift-cluster.example.com/v1/AUTH_account/c/o?' \
+ 'temp_url_sig=34d49efc32fe6e3082e411eeeb85bd8a&' \
+ 'temp_url_expires=1323482948'
+ exit(1)
+ method, seconds, path, key = argv[1:]
+ if method not in ('GET', 'PUT'):
+ print 'Please use either the GET or PUT method.'
+ exit(1)
+ try:
+ expires = int(time() + int(seconds))
+ except ValueError:
+ expires = 0
+ if expires < 1:
+ print 'Please use a positive <seconds> value.'
+ exit(1)
+ parts = path.split('/', 4)
+ # Must be five parts, ['', 'v1', 'a', 'c', 'o'], must be a v1 request, have
+ # account, container, and object values, and the object value can't just
+ # have '/'s.
+ if len(parts) != 5 or parts[0] or parts[1] != 'v1' or not parts[2] or \
+ not parts[3] or not parts[4].strip('/'):
+ print '<path> must point to an object.'
+ print 'For example: /v1/account/container/object'
+ exit(1)
+ sig = hmac.new(key, '%s\n%s\n%s' % (method, expires, path),
+ sha1).hexdigest()
+ print '%s?temp_url_sig=%s&temp_url_expires=%s' % (path, sig, expires)
View
140 doc/manpages/proxy-server.conf.5
@@ -90,7 +90,7 @@ are acceptable within this section.
.IP "\fBpipeline\fR"
It is used when you need apply a number of filters. It is a list of filters
ended by an application. The default should be \fB"catch_errors healthcheck
-cache tempauth proxy-server"\fR
+cache ratelimit tempauth proxy-server"\fR
.RE
.PD
@@ -209,6 +209,53 @@ Default for memcache_servers is to try to read the property from /etc/swift/memc
+.RS 0
+.IP "\fB[filter:ratelimit]\fR"
+.RE
+
+Rate limits requests on both an Account and Container level. Limits are configurable.
+
+.RS 3
+.IP \fBuse\fR
+Entry point for paste.deploy for the ratelimit middleware. This is the reference to the installed python egg.
+The default is \fBegg:swift#ratelimit\fR.
+.IP "\fBset log_name\fR"
+Label used when logging. The default is ratelimit.
+.IP "\fBset log_facility\fR"
+Syslog log facility. The default is LOG_LOCAL0.
+.IP "\fBset log_level\fR "
+Logging level. The default is INFO.
+.IP "\fBset log_headers\fR "
+Enables the ability to log request headers. The default is False.
+.IP \fBclock_accuracy\fR
+This should represent how accurate the proxy servers' system clocks are with each other.
+1000 means that all the proxies' clock are accurate to each other within 1 millisecond.
+No ratelimit should be higher than the clock accuracy. The default is 1000.
+.IP \fBmax_sleep_time_seconds\fR
+App will immediately return a 498 response if the necessary sleep time ever exceeds
+the given max_sleep_time_seconds. The default is 60 seconds.
+.IP \fBlog_sleep_time_seconds\fR
+To allow visibility into rate limiting set this value > 0 and all sleeps greater than
+the number will be logged. If set to 0 means disabled. The default is 0.
+.IP \fBrate_buffer_seconds\fR
+Number of seconds the rate counter can drop and be allowed to catch up
+(at a faster than listed rate). A larger number will result in larger spikes in
+rate but better average accuracy. The default is 5.
+.IP \fBaccount_ratelimit\fR
+If set, will limit PUT and DELETE requests to /account_name/container_name. Number is
+in requests per second. If set to 0 means disabled. The default is 0.
+.IP \fBaccount_whitelist\fR
+Comma separated lists of account names that will not be rate limited. The default is ''.
+.IP \fBaccount_blacklist\fR
+Comma separated lists of account names that will not be allowed. Returns a 497 response.
+The default is ''.
+.IP \fBcontainer_ratelimit_size\fR
+When set with container_limit_x = r: for containers of size x, limit requests per second
+to r. Will limit PUT, DELETE, and POST requests to /a/c/o. The default is ''.
+.RE
+
+
+
.RS 0
.IP "\fB[filter:catch_errors]\fR"
.RE
@@ -229,6 +276,97 @@ Enables the ability to log request headers. The default is False.
.RS 0
+.IP "\fB[filter:cname_lookup]\fR"
+.RE
+
+Note: this middleware requires python-dnspython
+
+.RS 3
+.IP \fBuse\fR
+Entry point for paste.deploy for the cname_lookup middleware. This is the reference to the installed python egg.
+The default is \fBegg:swift#cname_lookup\fR.
+.IP "\fBset log_name\fR"
+Label used when logging. The default is cname_lookup.
+.IP "\fBset log_facility\fR"
+Syslog log facility. The default is LOG_LOCAL0.
+.IP "\fBset log_level\fR "
+Logging level. The default is INFO.
+.IP "\fBset log_headers\fR"
+Enables the ability to log request headers. The default is False.
+.IP \fBstorage_domain\fR
+The domain to be used by the middleware.
+.IP \fBlookup_depth\fR
+How deep in the CNAME chain to look for something that matches the storage domain.
+The default is 1.
+.RE
+
+
+
+.RS 0
+.IP "\fB[filter:staticweb]\fR"
+.RE
+
+Note: Put staticweb just after your auth filter(s) in the pipeline
+
+.RS 3
+.IP \fBuse\fR
+Entry point for paste.deploy for the staticweb middleware. This is the reference to the installed python egg.
+The default is \fBegg:swift#staticweb\fR.
+.IP \fBcache_timeout\fR
+Seconds to cache container x-container-meta-web-* header values. The default is 300 seconds.
+.IP "\fBset log_name\fR"
+Label used when logging. The default is staticweb.
+.IP "\fBset log_facility\fR"
+Syslog log facility. The default is LOG_LOCAL0.
+.IP "\fBset log_level\fR "
+Logging level. The default is INFO.
+.IP "\fBset log_headers\fR"
+Enables the ability to log request headers. The default is False.
+.IP "\fBset access_log_name\fR"
+Label used when logging. The default is staticweb.
+.IP "\fBset access_log_facility\fR"
+Syslog log facility. The default is LOG_LOCAL0.
+.IP "\fBset access_log_level\fR "
+Logging level. The default is INFO.
+.RE
+
+
+
+.RS 0
+.IP "\fB[filter:tempurl]\fR"
+.RE
+
+Note: Put tempurl just before your auth filter(s) in the pipeline
+
+.RS 3
+.IP \fBincoming_remove_headers\fR
+The headers to remove from incoming requests. Simply a whitespace delimited list of header names and names can optionally end with '*' to indicate a prefix match. incoming_allow_headers is a list of exceptions to these removals.
+.IP \fBincoming_allow_headers\fR
+The headers allowed as exceptions to incoming_remove_headers. Simply a whitespace delimited list of header names and names can optionally end with '*' to indicate a prefix match.
+.IP "\fBoutgoing_remove_headers\fR"
+The headers to remove from outgoing responses. Simply a whitespace delimited list of header names and names can optionally end with '*' to indicate a prefix match. outgoing_allow_headers is a list of exceptions to these removals.
+.IP "\fBoutgoing_allow_headers\fR"
+The headers allowed as exceptions to outgoing_remove_headers. Simply a whitespace delimited list of header names and names can optionally end with '*' to indicate a prefix match.
+.IP "\fBset log_level\fR "
+.RE
+
+
+
+.RS 0
+.IP "\fB[filter:formpost]\fR"
+.RE
+
+Note: Put formpost just before your auth filter(s) in the pipeline
+
+.RS 3
+.IP \fBuse\fR
+Entry point for paste.deploy for the formpost middleware. This is the reference to the installed python egg.
+The default is \fBegg:swift#formpost\fR.
+.RE
+
+
+
+.RS 0
.IP "\fB[filter:name_check]\fR"
.RE
View
4 doc/source/associated_projects.rst
@@ -52,8 +52,4 @@ Content Distribution Network Integration
Other
-----
-* `Domain Remap <https://github.com/notmyname/swift-domainremap>`_ - Translates subdomains on the Host header to path elements that are appropriate for swift.
* `Glance <https://github.com/openstack/glance>`_ - Provides services for discovering, registering, and retrieving virtual machine images (for OpenStack Compute [Nova], for example).
-* `Rate Limit <https://github.com/dpgoetz/swift-ratelimit>`_ - Enforces limits on the request rates to accounts and containers.
-* `StaticWeb <http://gholt.github.com/swift-staticweb/>`_ - Allows serving static websites from Swift containers using ACLs and other metadata on those containers.
-* `TempURL/FormPOST <http://gholt.github.com/swift-tempurl/>`_ - Temporary, Expiring URLs and Form POSTing middleware.
View
1 doc/source/index.rst
@@ -47,6 +47,7 @@ Overview and Concepts
overview_reaper
overview_auth
overview_replication
+ ratelimit
overview_large_objects
overview_object_versioning
overview_container_sync
View
42 doc/source/misc.rst
@@ -133,9 +133,51 @@ Manager
:members:
:show-inheritance:
+Ratelimit
+=========
+
+.. automodule:: swift.common.middleware.ratelimit
+ :members:
+ :show-inheritance:
+
Swift3
======
.. automodule:: swift.common.middleware.swift3
:members:
:show-inheritance:
+
+StaticWeb
+=========
+
+.. automodule:: swift.common.middleware.staticweb
+ :members:
+ :show-inheritance:
+
+TempURL
+=======
+
+.. automodule:: swift.common.middleware.tempurl
+ :members:
+ :show-inheritance:
+
+FormPost
+========
+
+.. automodule:: swift.common.middleware.formpost
+ :members:
+ :show-inheritance:
+
+Domain Remap
+============
+
+.. automodule:: swift.common.middleware.domain_remap
+ :members:
+ :show-inheritance:
+
+CNAME Lookup
+============
+
+.. automodule:: swift.common.middleware.cname_lookup
+ :members:
+ :show-inheritance:
View
72 doc/source/ratelimit.rst
@@ -0,0 +1,72 @@
+=============
+Rate Limiting
+=============
+
+Rate limiting in swift is implemented as a pluggable middleware. Rate
+limiting is performed on requests that result in database writes to the
+account and container sqlite dbs. It uses memcached and is dependent on
+the proxy servers having highly synchronized time. The rate limits are
+limited by the accuracy of the proxy server clocks.
+
+--------------
+Configuration
+--------------
+
+All configuration is optional. If no account or container limits are provided
+there will be no rate limiting. Configuration available:
+
+======================== ========= ===========================================
+Option Default Description
+------------------------ --------- -------------------------------------------
+clock_accuracy 1000 Represents how accurate the proxy servers'
+ system clocks are with each other. 1000
+ means that all the proxies' clock are
+ accurate to each other within 1
+ millisecond. No ratelimit should be
+ higher than the clock accuracy.
+max_sleep_time_seconds 60 App will immediately return a 498 response
+ if the necessary sleep time ever exceeds
+ the given max_sleep_time_seconds.
+log_sleep_time_seconds 0 To allow visibility into rate limiting set
+ this value > 0 and all sleeps greater than
+ the number will be logged.
+rate_buffer_seconds 5 Number of seconds the rate counter can
+ drop and be allowed to catch up (at a
+ faster than listed rate). A larger number
+ will result in larger spikes in rate but
+ better average accuracy.
+account_ratelimit 0 If set, will limit PUT and DELETE requests
+ to /account_name/container_name.
+ Number is in requests per second.
+account_whitelist '' Comma separated lists of account names that
+ will not be rate limited.
+account_blacklist '' Comma separated lists of account names that
+ will not be allowed. Returns a 497 response.
+container_ratelimit_size '' When set with container_limit_x = r:
+ for containers of size x, limit requests
+ per second to r. Will limit PUT, DELETE,
+ and POST requests to /a/c/o.
+======================== ========= ===========================================
+
+The container rate limits are linearly interpolated from the values given. A
+sample container rate limiting could be:
+
+container_ratelimit_100 = 100
+
+container_ratelimit_200 = 50
+
+container_ratelimit_500 = 20
+
+This would result in
+
+================ ============
+Container Size Rate Limit
+---------------- ------------
+0-99 No limiting
+100 100
+150 75
+500 20
+1000 20
+================ ============
+
+
View
89 etc/proxy-server.conf-sample
@@ -21,7 +21,7 @@
# log_statsd_metric_prefix =
[pipeline:main]
-pipeline = catch_errors healthcheck cache tempauth proxy-server
+pipeline = catch_errors healthcheck cache ratelimit tempauth proxy-server
[app:proxy-server]
use = egg:swift#proxy
@@ -136,6 +136,38 @@ use = egg:swift#memcache
# commas, as in: 10.1.2.3:11211,10.1.2.4:11211
# memcache_servers = 127.0.0.1:11211
+[filter:ratelimit]
+use = egg:swift#ratelimit
+# You can override the default log routing for this filter here:
+# set log_name = ratelimit
+# set log_facility = LOG_LOCAL0
+# set log_level = INFO
+# set log_headers = False
+# clock_accuracy should represent how accurate the proxy servers' system clocks
+# are with each other. 1000 means that all the proxies' clock are accurate to
+# each other within 1 millisecond. No ratelimit should be higher than the
+# clock accuracy.
+# clock_accuracy = 1000
+# max_sleep_time_seconds = 60
+# log_sleep_time_seconds of 0 means disabled
+# log_sleep_time_seconds = 0
+# allows for slow rates (e.g. running up to 5 sec's behind) to catch up.
+# rate_buffer_seconds = 5
+# account_ratelimit of 0 means disabled
+# account_ratelimit = 0
+
+# these are comma separated lists of account names
+# account_whitelist = a,b
+# account_blacklist = c,d
+
+# with container_limit_x = r
+# for containers of size x limit requests per second to r. The container
+# rate will be linearly interpolated from the values given. With the values
+# below, a container of size 5 will get a rate of 75.
+# container_ratelimit_0 = 100
+# container_ratelimit_10 = 50
+# container_ratelimit_50 = 20
+
[filter:catch_errors]
use = egg:swift#catch_errors
# You can override the default log routing for this filter here:
@@ -144,6 +176,61 @@ use = egg:swift#catch_errors
# set log_level = INFO
# set log_headers = False
+[filter:cname_lookup]
+# Note: this middleware requires python-dnspython
+use = egg:swift#cname_lookup
+# You can override the default log routing for this filter here:
+# set log_name = cname_lookup
+# set log_facility = LOG_LOCAL0
+# set log_level = INFO
+# set log_headers = False
+# storage_domain = example.com
+# lookup_depth = 1
+
+# Note: Put staticweb just after your auth filter(s) in the pipeline
+[filter:staticweb]
+use = egg:swift#staticweb
+# Seconds to cache container x-container-meta-web-* header values.
+# cache_timeout = 300
+# You can override the default log routing for this filter here:
+# set log_name = staticweb
+# set log_facility = LOG_LOCAL0
+# set log_level = INFO
+# set access_log_name = staticweb
+# set access_log_facility = LOG_LOCAL0
+# set access_log_level = INFO
+# set log_headers = False
+
+# Note: Put tempurl just before your auth filter(s) in the pipeline
+[filter:tempurl]
+use = egg:swift#tempurl
+#
+# The headers to remove from incoming requests. Simply a whitespace delimited
+# list of header names and names can optionally end with '*' to indicate a
+# prefix match. incoming_allow_headers is a list of exceptions to these
+# removals.
+# incoming_remove_headers = x-timestamp
+#
+# The headers allowed as exceptions to incoming_remove_headers. Simply a
+# whitespace delimited list of header names and names can optionally end with
+# '*' to indicate a prefix match.
+# incoming_allow_headers =
+#
+# The headers to remove from outgoing responses. Simply a whitespace delimited
+# list of header names and names can optionally end with '*' to indicate a
+# prefix match. outgoing_allow_headers is a list of exceptions to these
+# removals.
+# outgoing_remove_headers = x-object-meta-*
+#
+# The headers allowed as exceptions to outgoing_remove_headers. Simply a
+# whitespace delimited list of header names and names can optionally end with
+# '*' to indicate a prefix match.
+# outgoing_allow_headers = x-object-meta-public-*
+
+# Note: Put formpost just before your auth filter(s) in the pipeline
+[filter:formpost]
+use = egg:swift#formpost
+
# Note: Just needs to be placed before the proxy-server in the pipeline.
[filter:name_check]
use = egg:swift#name_check
View
28 locale/swift.pot
@@ -401,6 +401,34 @@ msgstr ""
msgid "Error: %s"
msgstr ""
+#: swift/common/middleware/cname_lookup.py:91
+#, python-format
+msgid "Mapped %(given_domain)s to %(found_domain)s"
+msgstr ""
+
+#: swift/common/middleware/cname_lookup.py:102
+#, python-format
+msgid "Following CNAME chain for %(given_domain)s to %(found_domain)s"
+msgstr ""
+
+#: swift/common/middleware/ratelimit.py:172
+msgid "Returning 497 because of blacklisting"
+msgstr ""
+
+#: swift/common/middleware/ratelimit.py:185
+#, python-format
+msgid "Ratelimit sleep log: %(sleep)s for %(account)s/%(container)s/%(object)s"
+msgstr ""
+
+#: swift/common/middleware/ratelimit.py:192
+#, python-format
+msgid "Returning 498 because of ops rate limiting (Max Sleep) %s"
+msgstr ""
+
+#: swift/common/middleware/ratelimit.py:212
+msgid "Warning: Cannot ratelimit without a memcached client"
+msgstr ""
+
#: swift/common/middleware/swauth.py:635
#, python-format
msgid ""
View
7 setup.py
@@ -56,6 +56,7 @@
'bin/swift-dispersion-populate',
'bin/swift-dispersion-report',
'bin/swift-drive-audit',
+ 'bin/swift-form-signature',
'bin/swift-get-nodes',
'bin/swift-init',
'bin/swift-object-auditor',
@@ -70,6 +71,7 @@
'bin/swift-recon',
'bin/swift-recon-cron',
'bin/swift-ring-builder',
+ 'bin/swift-temp-url',
],
entry_points={
'paste.app_factory': [
@@ -81,10 +83,15 @@
'paste.filter_factory': [
'healthcheck=swift.common.middleware.healthcheck:filter_factory',
'memcache=swift.common.middleware.memcache:filter_factory',
+ 'ratelimit=swift.common.middleware.ratelimit:filter_factory',
+ 'cname_lookup=swift.common.middleware.cname_lookup:filter_factory',
'catch_errors=swift.common.middleware.catch_errors:filter_factory',
'swift3=swift.common.middleware.swift3:filter_factory',
+ 'staticweb=swift.common.middleware.staticweb:filter_factory',
'tempauth=swift.common.middleware.tempauth:filter_factory',
'recon=swift.common.middleware.recon:filter_factory',
+ 'tempurl=swift.common.middleware.tempurl:filter_factory',
+ 'formpost=swift.common.middleware.formpost:filter_factory',
'name_check=swift.common.middleware.name_check:filter_factory',
],
},
View
543 swift/common/middleware/formpost.py
@@ -0,0 +1,543 @@
+# Copyright (c) 2011 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+FormPost Middleware
+
+Translates a browser form post into a regular Swift object PUT.
+
+The format of the form is::
+
+ <form action="<swift-url>" method="POST"
+ enctype="multipart/form-data">
+ <input type="hidden" name="redirect" value="<redirect-url>" />
+ <input type="hidden" name="max_file_size" value="<bytes>" />
+ <input type="hidden" name="max_file_count" value="<count>" />
+ <input type="hidden" name="expires" value="<unix-timestamp>" />
+ <input type="hidden" name="signature" value="<hmac>" />
+ <input type="file" name="file1" /><br />
+ <input type="submit" />
+ </form>
+
+The <swift-url> is the URL to the Swift desination, such as::
+
+ https://swift-cluster.example.com/v1/AUTH_account/container/object_prefix
+
+The name of each file uploaded will be appended to the <swift-url>
+given. So, you can upload directly to the root of container with a
+url like::
+
+ https://swift-cluster.example.com/v1/AUTH_account/container/
+
+Optionally, you can include an object prefix to better separate
+different users' uploads, such as::
+
+ https://swift-cluster.example.com/v1/AUTH_account/container/object_prefix
+
+Note the form method must be POST and the enctype must be set as
+"multipart/form-data".
+
+The redirect attribute is the URL to redirect the browser to after
+the upload completes. The URL will have status and message query
+parameters added to it, indicating the HTTP status code for the
+upload (2xx is success) and a possible message for further
+information if there was an error (such as "max_file_size exceeded").
+
+The max_file_size attribute must be included and indicates the
+largest single file upload that can be done, in bytes.
+
+The max_file_count attribute must be included and indicates the
+maximum number of files that can be uploaded with the form. Include
+additional ``<input type="file" name="filexx" />`` attributes if
+desired.
+
+The expires attribute is the Unix timestamp before which the form
+must be submitted before it is invalidated.
+
+The signature attribute is the HMAC-SHA1 signature of the form. Here is
+sample code for computing the signature::
+
+ import hmac
+ from hashlib import sha1
+ from time import time
+ path = '/v1/account/container/object_prefix'
+ redirect = 'https://myserver.com/some-page'
+ max_file_size = 104857600
+ max_file_count = 10
+ expires = int(time() + 600)
+ key = 'mykey'
+ hmac_body = '%s\\n%s\\n%s\\n%s\\n%s' % (path, redirect,
+ max_file_size, max_file_count, expires)
+ signature = hmac.new(key, hmac_body, sha1).hexdigest()
+
+The key is the value of the X-Account-Meta-Temp-URL-Key header on the
+account.
+
+Be certain to use the full path, from the /v1/ onward.
+
+The command line tool ``swift-form-signature`` may be used (mostly
+just when testing) to compute expires and signature.
+
+Also note that the file attributes must be after the other attributes
+in order to be processed correctly. If attributes come after the
+file, they won't be sent with the subrequest (there is no way to
+parse all the attributes on the server-side without reading the whole
+thing into memory -- to service many requests, some with large files,
+there just isn't enough memory on the server, so attributes following
+the file are simply ignored).
+"""
+
+__all__ = ['FormPost', 'filter_factory', 'READ_CHUNK_SIZE', 'MAX_VALUE_LENGTH']
+
+import hmac
+import re
+import rfc822
+from hashlib import sha1
+from StringIO import StringIO
+from time import gmtime, strftime, time
+from time import time
+from urllib import quote, unquote
+
+from swift.common.utils import get_logger, streq_const_time
+from swift.common.wsgi import make_pre_authed_env
+from swift.common.http import HTTP_BAD_REQUEST
+
+
+#: The size of data to read from the form at any given time.
+READ_CHUNK_SIZE = 4096
+
+#: The maximum size of any attribute's value. Any additional data will be
+#: truncated.
+MAX_VALUE_LENGTH = 4096
+
+#: Regular expression to match form attributes.
+ATTRIBUTES_RE = re.compile(r'(\w+)=(".*?"|[^";]+)(; ?|$)')
+
+
+class FormInvalid(Exception):
+ pass
+
+
+def _parse_attrs(header):
+ """
+ Given the value of a header like:
+ Content-Disposition: form-data; name="somefile"; filename="test.html"
+
+ Return data like
+ ("form-data", {"name": "somefile", "filename": "test.html"})
+
+ :param header: Value of a header (the part after the ': ').
+ :returns: (value name, dict) of the attribute data parsed (see above).
+ """
+ attributes = {}
+ attrs = ''
+ if '; ' in header:
+ header, attrs = header.split('; ', 1)
+ m = True
+ while m:
+ m = ATTRIBUTES_RE.match(attrs)
+ if m:
+ attrs = attrs[len(m.group(0)):]
+ attributes[m.group(1)] = m.group(2).strip('"')
+ return header, attributes
+
+
+class _IterRequestsFileLikeObject(object):
+
+ def __init__(self, wsgi_input, boundary, input_buffer):
+ self.no_more_data_for_this_file = False
+ self.no_more_files = False
+ self.wsgi_input = wsgi_input
+ self.boundary = boundary
+ self.input_buffer = input_buffer
+
+ def read(self, length=None):
+ if not length:
+ length = READ_CHUNK_SIZE
+ if self.no_more_data_for_this_file:
+ return ''
+
+ # read enough data to know whether we're going to run
+ # into a boundary in next [length] bytes
+ if len(self.input_buffer) < length + len(self.boundary) + 2:
+ to_read = length + len(self.boundary) + 2
+ while to_read > 0:
+ chunk = self.wsgi_input.read(to_read)
+ to_read -= len(chunk)
+ self.input_buffer += chunk
+ if not chunk:
+ self.no_more_files = True
+ break
+
+ boundary_pos = self.input_buffer.find(self.boundary)
+
+ # boundary does not exist in the next (length) bytes
+ if boundary_pos == -1 or boundary_pos > length:
+ ret = self.input_buffer[:length]
+ self.input_buffer = self.input_buffer[length:]
+ # if it does, just return data up to the boundary
+ else:
+ ret, self.input_buffer = self.input_buffer.split(self.boundary, 1)
+ self.no_more_files = self.input_buffer.startswith('--')
+ self.no_more_data_for_this_file = True
+ self.input_buffer = self.input_buffer[2:]
+ return ret
+
+ def readline(self):
+ if self.no_more_data_for_this_file:
+ return ''
+ boundary_pos = newline_pos = -1
+ while newline_pos < 0 and boundary_pos < 0:
+ chunk = self.wsgi_input.read(READ_CHUNK_SIZE)
+ self.input_buffer += chunk
+ newline_pos = self.input_buffer.find('\r\n')
+ boundary_pos = self.input_buffer.find(self.boundary)
+ if not chunk:
+ self.no_more_files = True
+ break
+ # found a newline
+ if newline_pos >= 0 and \
+ (boundary_pos < 0 or newline_pos < boundary_pos):
+ # Use self.read to ensure any logic there happens...
+ ret = ''
+ to_read = newline_pos + 2
+ while to_read > 0:
+ chunk = self.read(to_read)
+ # Should never happen since we're reading from input_buffer,
+ # but just for completeness...
+ if not chunk:
+ break
+ to_read -= len(chunk)
+ ret += chunk
+ return ret
+ else: # no newlines, just return up to next boundary
+ return self.read(len(self.input_buffer))
+
+
+def _iter_requests(wsgi_input, boundary):
+ """
+ Given a multi-part mime encoded input file object and boundary,
+ yield file-like objects for each part.
+
+ :param wsgi_input: The file-like object to read from.
+ :param boundary: The mime boundary to separate new file-like
+ objects on.
+ :returns: A generator of file-like objects for each part.
+ """
+ boundary = '--' + boundary
+ if wsgi_input.readline().strip() != boundary:
+ raise FormInvalid('invalid starting boundary')
+ boundary = '\r\n' + boundary
+ input_buffer = ''
+ done = False
+ while not done:
+ it = _IterRequestsFileLikeObject(wsgi_input, boundary, input_buffer)
+ yield it
+ done = it.no_more_files
+ input_buffer = it.input_buffer
+
+
+class _CappedFileLikeObject(object):
+ """
+ A file-like object wrapping another file-like object that raises
+ an EOFError if the amount of data read exceeds a given
+ max_file_size.
+
+ :param fp: The file-like object to wrap.
+ :param max_file_size: The maximum bytes to read before raising an
+ EOFError.
+ """
+
+ def __init__(self, fp, max_file_size):
+ self.fp = fp
+ self.max_file_size = max_file_size
+ self.amount_read = 0
+
+ def read(self, size=None):
+ ret = self.fp.read(size)
+ self.amount_read += len(ret)
+ if self.amount_read > self.max_file_size:
+ raise EOFError('max_file_size exceeded')
+ return ret
+
+ def readline(self):
+ ret = self.fp.readline()
+ self.amount_read += len(ret)
+ if self.amount_read > self.max_file_size:
+ raise EOFError('max_file_size exceeded')
+ return ret
+
+
+class FormPost(object):
+ """
+ FormPost Middleware
+
+ See above for a full description.
+
+ :param app: The next WSGI filter or app in the paste.deploy
+ chain.
+ :param conf: The configuration dict for the middleware.
+ """
+
+ def __init__(self, app, conf):
+ #: The next WSGI application/filter in the paste.deploy pipeline.
+ self.app = app
+ #: The filter configuration dict.
+ self.conf = conf
+ #: The logger to use with this middleware.
+ self.logger = get_logger(conf, log_route='formpost')
+ #: The HTTP user agent to use with subrequests.
+ self.agent = '%(orig)s FormPost'
+
+ def __call__(self, env, start_response):
+ """
+ Main hook into the WSGI paste.deploy filter/app pipeline.
+
+ :param env: The WSGI environment dict.
+ :param start_response: The WSGI start_response hook.
+ :returns: Response as per WSGI.
+ """
+ if env['REQUEST_METHOD'] == 'POST':
+ try:
+ content_type, attrs = \
+ _parse_attrs(env.get('CONTENT_TYPE') or '')
+ if content_type == 'multipart/form-data' and \
+ 'boundary' in attrs:
+ resp_status = [0]
+
+ def _start_response(status, headers, exc_info=None):
+ resp_status[0] = int(status.split(' ', 1)[0])
+ start_response(status, headers, exc_info)
+
+ self._log_request(env, resp_status)
+ return self._translate_form(env, start_response,
+ attrs['boundary'])
+ except (FormInvalid, EOFError), err:
+ self._log_request(env, HTTP_BAD_REQUEST)
+ body = 'FormPost: %s' % err
+ start_response('400 Bad Request',
+ (('Content-Type', 'text/plain'),
+ ('Content-Length', str(len(body)))))
+ return [body]
+ return self.app(env, start_response)
+
+ def _translate_form(self, env, start_response, boundary):
+ """
+ Translates the form data into subrequests and issues a
+ response.
+
+ :param env: The WSGI environment dict.
+ :param start_response: The WSGI start_response hook.
+ :returns: Response as per WSGI.
+ """
+ key = self._get_key(env)
+ status = message = ''
+ attributes = {}
+ file_count = 0
+ for fp in _iter_requests(env['wsgi.input'], boundary):
+ hdrs = rfc822.Message(fp, 0)
+ disp, attrs = \
+ _parse_attrs(hdrs.getheader('Content-Disposition', ''))
+ if disp == 'form-data' and attrs.get('filename'):
+ file_count += 1
+ try:
+ if file_count > int(attributes.get('max_file_count') or 0):
+ status = '400 Bad Request'
+ message = 'max file count exceeded'
+ break
+ except ValueError:
+ raise FormInvalid('max_file_count not an integer')
+ attributes['filename'] = attrs['filename'] or 'filename'
+ if 'content-type' not in attributes and 'content-type' in hdrs:
+ attributes['content-type'] = \
+ hdrs['Content-Type'] or 'application/octet-stream'
+ status, message = self._perform_subrequest(env, start_response,
+ attributes, fp, key)
+ if status[:1] != '2':
+ break
+ else:
+ data = ''
+ mxln = MAX_VALUE_LENGTH
+ while mxln:
+ chunk = fp.read(mxln)
+ if not chunk:
+ break
+ mxln -= len(chunk)
+ data += chunk
+ while fp.read(READ_CHUNK_SIZE):
+ pass
+ if 'name' in attrs:
+ attributes[attrs['name'].lower()] = data.rstrip('\r\n--')
+ if not status:
+ status = '400 Bad Request'
+ message = 'no files to process'
+ if not attributes.get('redirect'):
+ body = status
+ if message:
+ body = status + '\r\nFormPost: ' + message.title()
+ start_response(status, [('Content-Type', 'text/plain'),
+ ('Content-Length', len(body))])
+ return [body]
+ status = status.split(' ', 1)[0]
+ body = '<html><body><p><a href="%s?status=%s&message=%s">Click to ' \
+ 'continue...</a></p></body></html>' % \
+ (attributes['redirect'], quote(status), quote(message))
+ start_response('303 See Other',
+ [('Location', '%s?status=%s&message=%s' %
+ (attributes['redirect'], quote(status), quote(message))),
+ ('Content-Length', str(len(body)))])
+ return [body]
+
+ def _perform_subrequest(self, env, start_response, attributes, fp, key):
+ """
+ Performs the subrequest and returns a new response.
+
+ :param env: The WSGI environment dict.
+ :param start_response: The WSGI start_response hook.
+ :param attributes: dict of the attributes of the form so far.
+ :param fp: The file-like object containing the request body.
+ :param key: The account key to validate the signature with.
+ :returns: Response as per WSGI.
+ """
+ if not key:
+ return '401 Unauthorized', 'invalid signature'
+ try:
+ max_file_size = int(attributes.get('max_file_size') or 0)
+ except ValueError:
+ raise FormInvalid('max_file_size not an integer')
+ subenv = make_pre_authed_env(env, 'PUT', agent=self.agent)
+ subenv['HTTP_TRANSFER_ENCODING'] = 'chunked'
+ subenv['wsgi.input'] = _CappedFileLikeObject(fp, max_file_size)
+ if subenv['PATH_INFO'][-1] != '/' and \
+ subenv['PATH_INFO'].count('/') < 4:
+ subenv['PATH_INFO'] += '/'
+ subenv['PATH_INFO'] += attributes['filename'] or 'filename'
+ if 'content-type' in attributes:
+ subenv['CONTENT_TYPE'] = \
+ attributes['content-type'] or 'application/octet-stream'
+ elif 'CONTENT_TYPE' in subenv:
+ del subenv['CONTENT_TYPE']
+ try:
+ if int(attributes.get('expires') or 0) < time():
+ return '401 Unauthorized', 'form expired'
+ except ValueError:
+ raise FormInvalid('expired not an integer')
+ hmac_body = '%s\n%s\n%s\n%s\n%s' % (
+ env['PATH_INFO'],
+ attributes.get('redirect') or '',
+ attributes.get('max_file_size') or '0',
+ attributes.get('max_file_count') or '0',
+ attributes.get('expires') or '0'
+ )
+ sig = hmac.new(key, hmac_body, sha1).hexdigest()
+ if not streq_const_time(sig, (attributes.get('signature') or
+ 'invalid')):
+ return '401 Unauthorized', 'invalid signature'
+ substatus = [None]
+
+ def _start_response(status, headers, exc_info=None):
+ substatus[0] = status
+
+ i = iter(self.app(subenv, _start_response))
+ try:
+ i.next()
+ except StopIteration:
+ pass
+ return substatus[0], ''
+
+ def _get_key(self, env):
+ """
+ Returns the X-Account-Meta-Temp-URL-Key header value for the
+ account, or None if none is set.
+
+ :param env: The WSGI environment for the request.
+ :returns: X-Account-Meta-Temp-URL-Key str value, or None.
+ """
+ parts = env['PATH_INFO'].split('/', 4)
+ if len(parts) < 4 or parts[0] or parts[1] != 'v1' or not parts[2] or \
+ not parts[3]:
+ return None
+ account = parts[2]
+ key = None
+ memcache = env.get('swift.cache')
+ if memcache:
+ key = memcache.get('temp-url-key/%s' % account)
+ if not key:
+ newenv = make_pre_authed_env(env, 'HEAD', '/v1/' + account,
+ self.agent)
+ newenv['CONTENT_LENGTH'] = '0'
+ newenv['wsgi.input'] = StringIO('')
+ key = [None]
+
+ def _start_response(status, response_headers, exc_info=None):
+ for h, v in response_headers:
+ if h.lower() == 'x-account-meta-temp-url-key':
+ key[0] = v
+
+ i = iter(self.app(newenv, _start_response))
+ try:
+ i.next()
+ except StopIteration:
+ pass
+ key = key[0]
+ if key and memcache:
+ memcache.set('temp-url-key/%s' % account, key, timeout=60)
+ return key
+
+ def _log_request(self, env, response_status_int):
+ """
+ Used when a request might not be logged by the underlying
+ WSGI application, but we'd still like to record what
+ happened. An early 401 Unauthorized is a good example of
+ this.
+
+ :param env: The WSGI environment for the request.
+ :param response_status_int: The HTTP status we'll be replying
+ to the request with.
+ """
+ the_request = quote(unquote(env.get('PATH_INFO') or '/'))
+ if env.get('QUERY_STRING'):
+ the_request = the_request + '?' + env['QUERY_STRING']
+ client = env.get('HTTP_X_CLUSTER_CLIENT_IP')
+ if not client and 'HTTP_X_FORWARDED_FOR' in env:
+ # remote host for other lbs
+ client = env['HTTP_X_FORWARDED_FOR'].split(',')[0].strip()
+ if not client:
+ client = env.get('REMOTE_ADDR')
+ self.logger.info(' '.join(quote(str(x)) for x in (
+ client or '-',
+ env.get('REMOTE_ADDR') or '-',
+ strftime('%d/%b/%Y/%H/%M/%S', gmtime()),
+ env.get('REQUEST_METHOD') or 'GET',
+ the_request,
+ env.get('SERVER_PROTOCOL') or '1.0',
+ response_status_int,
+ env.get('HTTP_REFERER') or '-',
+ (env.get('HTTP_USER_AGENT') or '-') + ' FormPOST',
+ env.get('HTTP_X_AUTH_TOKEN') or '-',
+ '-',
+ '-',
+ '-',
+ env.get('swift.trans_id') or '-',
+ '-',
+ '-',
+ )))
+
+
+def filter_factory(global_conf, **local_conf):
+ """ Returns the WSGI filter for use with paste.deploy. """
+ conf = global_conf.copy()
+ conf.update(local_conf)
+ return lambda app: FormPost(app, conf)
View
239 swift/common/middleware/ratelimit.py
@@ -0,0 +1,239 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import time
+import eventlet
+from webob import Request, Response
+
+from swift.common.utils import split_path, cache_from_env, get_logger
+from swift.proxy.server import get_container_memcache_key
+from swift.common.memcached import MemcacheConnectionError
+
+
+class MaxSleepTimeHitError(Exception):
+ pass
+
+
+class RateLimitMiddleware(object):
+ """
+ Rate limiting middleware
+
+ Rate limits requests on both an Account and Container level. Limits are
+ configurable.
+ """
+
+ BLACK_LIST_SLEEP = 1
+
+ def __init__(self, app, conf, logger=None):
+ self.app = app
+ if logger:
+ self.logger = logger
+ else:
+ self.logger = get_logger(conf, log_route='ratelimit')
+ self.account_ratelimit = float(conf.get('account_ratelimit', 0))
+ self.max_sleep_time_seconds = \
+ float(conf.get('max_sleep_time_seconds', 60))
+ self.log_sleep_time_seconds = \
+ float(conf.get('log_sleep_time_seconds', 0))
+ self.clock_accuracy = int(conf.get('clock_accuracy', 1000))
+ self.rate_buffer_seconds = int(conf.get('rate_buffer_seconds', 5))
+ self.ratelimit_whitelist = [acc.strip() for acc in
+ conf.get('account_whitelist', '').split(',') if acc.strip()]
+ self.ratelimit_blacklist = [acc.strip() for acc in
+ conf.get('account_blacklist', '').split(',') if acc.strip()]
+ self.memcache_client = None
+ conf_limits = []
+ for conf_key in conf.keys():
+ if conf_key.startswith('container_ratelimit_'):
+ cont_size = int(conf_key[len('container_ratelimit_'):])
+ rate = float(conf[conf_key])
+ conf_limits.append((cont_size, rate))
+
+ conf_limits.sort()
+ self.container_ratelimits = []
+ while conf_limits:
+ cur_size, cur_rate = conf_limits.pop(0)
+ if conf_limits:
+ next_size, next_rate = conf_limits[0]
+ slope = (float(next_rate) - float(cur_rate)) \
+ / (next_size - cur_size)
+
+ def new_scope(cur_size, slope, cur_rate):
+ # making new scope for variables
+ return lambda x: (x - cur_size) * slope + cur_rate
+ line_func = new_scope(cur_size, slope, cur_rate)
+ else:
+ line_func = lambda x: cur_rate
+
+ self.container_ratelimits.append((cur_size, cur_rate, line_func))
+
+ def get_container_maxrate(self, container_size):
+ """
+ Returns number of requests allowed per second for given container size.
+ """
+ last_func = None
+ if container_size:
+ container_size = int(container_size)
+ for size, rate, func in self.container_ratelimits:
+ if container_size < size:
+ break
+ last_func = func
+ if last_func:
+ return last_func(container_size)
+ return None
+
+ def get_ratelimitable_key_tuples(self, req_method, account_name,
+ container_name=None, obj_name=None):
+ """
+ Returns a list of key (used in memcache), ratelimit tuples. Keys
+ should be checked in order.
+
+ :param req_method: HTTP method
+ :param account_name: account name from path
+ :param container_name: container name from path
+ :param obj_name: object name from path
+ """
+ keys = []
+ # COPYs are not limited
+ if self.account_ratelimit and \
+ account_name and container_name and not obj_name and \
+ req_method in ('PUT', 'DELETE'):
+ keys.append(("ratelimit/%s" % account_name,
+ self.account_ratelimit))
+
+ if account_name and container_name and obj_name and \
+ req_method in ('PUT', 'DELETE', 'POST'):
+ container_size = None
+ memcache_key = get_container_memcache_key(account_name,
+ container_name)
+ container_info = self.memcache_client.get(memcache_key)
+ if isinstance(container_info, dict):
+ container_size = container_info.get('container_size', 0)
+ container_rate = self.get_container_maxrate(container_size)
+ if container_rate:
+ keys.append(("ratelimit/%s/%s" % (account_name,
+ container_name),
+ container_rate))
+ return keys
+
+ def _get_sleep_time(self, key, max_rate):
+ '''
+ Returns the amount of time (a float in seconds) that the app
+ should sleep.
+
+ :param key: a memcache key
+ :param max_rate: maximum rate allowed in requests per second
+ :raises: MaxSleepTimeHitError if max sleep time is exceeded.
+ '''
+ try:
+ now_m = int(round(time.time() * self.clock_accuracy))
+ time_per_request_m = int(round(self.clock_accuracy / max_rate))
+ running_time_m = self.memcache_client.incr(key,
+ delta=time_per_request_m)
+ need_to_sleep_m = 0
+ if (now_m - running_time_m >
+ self.rate_buffer_seconds * self.clock_accuracy):
+ next_avail_time = int(now_m + time_per_request_m)
+ self.memcache_client.set(key, str(next_avail_time),
+ serialize=False)
+ else:
+ need_to_sleep_m = \
+ max(running_time_m - now_m - time_per_request_m, 0)
+
+ max_sleep_m = self.max_sleep_time_seconds * self.clock_accuracy
+ if max_sleep_m - need_to_sleep_m <= self.clock_accuracy * 0.01:
+ # treat as no-op decrement time
+ self.memcache_client.decr(key, delta=time_per_request_m)
+ raise MaxSleepTimeHitError("Max Sleep Time Exceeded: %.2f" %
+ (float(need_to_sleep_m) / self.clock_accuracy))
+
+ return float(need_to_sleep_m) / self.clock_accuracy
+ except MemcacheConnectionError:
+ return 0
+
+ def handle_ratelimit(self, req, account_name, container_name, obj_name):
+ '''
+ Performs rate limiting and account white/black listing. Sleeps
+ if necessary.
+
+ :param account_name: account name from path
+ :param container_name: container name from path
+ :param obj_name: object name from path
+ '''
+ if account_name in self.ratelimit_blacklist:
+ self.logger.error(_('Returning 497 because of blacklisting: %s'),
+ account_name)
+ eventlet.sleep(self.BLACK_LIST_SLEEP)
+ return Response(status='497 Blacklisted',
+ body='Your account has been blacklisted', request=req)
+ if account_name in self.ratelimit_whitelist:
+ return None
+ for key, max_rate in self.get_ratelimitable_key_tuples(
+ req.method, account_name, container_name=container_name,
+ obj_name=obj_name):
+ try:
+ need_to_sleep = self._get_sleep_time(key, max_rate)
+ if self.log_sleep_time_seconds and \
+ need_to_sleep > self.log_sleep_time_seconds:
+ self.logger.warning(_("Ratelimit sleep log: %(sleep)s for "
+ "%(account)s/%(container)s/%(object)s"),
+ {'sleep': need_to_sleep, 'account': account_name,
+ 'container': container_name, 'object': obj_name})
+ if need_to_sleep > 0:
+ eventlet.sleep(need_to_sleep)
+ except MaxSleepTimeHitError, e:
+ self.logger.error(_('Returning 498 for %(meth)s to '
+ '%(acc)s/%(cont)s/%(obj)s . Ratelimit (Max Sleep) %(e)s'),
+ {'meth': req.method, 'acc': account_name,
+ 'cont': container_name, 'obj': obj_name, 'e': str(e)})
+ error_resp = Response(status='498 Rate Limited',
+ body='Slow down', request=req)
+ return error_resp
+ return None
+
+ def __call__(self, env, start_response):
+ """
+ WSGI entry point.
+ Wraps env in webob.Request object and passes it down.
+
+ :param env: WSGI environment dictionary
+ :param start_response: WSGI callable
+ """
+ req = Request(env)
+ if self.memcache_client is None:
+ self.memcache_client = cache_from_env(env)
+ if not self.memcache_client:
+ self.logger.warning(
+ _('Warning: Cannot ratelimit without a memcached client'))
+ return self.app(env, start_response)
+ try:
+ version, account, container, obj = split_path(req.path, 1, 4, True)
+ except ValueError:
+ return self.app(env, start_response)
+ ratelimit_resp = self.handle_ratelimit(req, account, container, obj)
+ if ratelimit_resp is None:
+ return self.app(env, start_response)
+ else:
+ return ratelimit_resp(env, start_response)
+
+
+def filter_factory(global_conf, **local_conf):
+ """
+ paste.deploy app factory for creating WSGI proxy apps.
+ """
+ conf = global_conf.copy()
+ conf.update(local_conf)
+
+ def limit_filter(app):
+ return RateLimitMiddleware(app, conf)
+ return limit_filter
View
564 swift/common/middleware/staticweb.py
@@ -0,0 +1,564 @@
+# Copyright (c) 2010-2012 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This StaticWeb WSGI middleware will serve container data as a static web site
+with index file and error file resolution and optional file listings. This mode
+is normally only active for anonymous requests. If you want to use it with
+authenticated requests, set the ``X-Web-Mode: true`` header on the request.
+
+The ``staticweb`` filter should be added to the pipeline in your
+``/etc/swift/proxy-server.conf`` file just after any auth middleware. Also, the
+configuration section for the ``staticweb`` middleware itself needs to be
+added. For example::
+
+ [DEFAULT]
+ ...
+
+ [pipeline:main]
+ pipeline = healthcheck cache tempauth staticweb proxy-server
+
+ ...
+
+ [filter:staticweb]
+ use = egg:swift#staticweb
+ # Seconds to cache container x-container-meta-web-* header values.
+ # cache_timeout = 300
+ # You can override the default log routing for this filter here:
+ # set log_name = staticweb
+ # set log_facility = LOG_LOCAL0
+ # set log_level = INFO
+ # set access_log_name = staticweb
+ # set access_log_facility = LOG_LOCAL0
+ # set access_log_level = INFO
+ # set log_headers = False
+
+Any publicly readable containers (for example, ``X-Container-Read: .r:*``, see
+`acls`_ for more information on this) will be checked for
+X-Container-Meta-Web-Index and X-Container-Meta-Web-Error header values::
+
+ X-Container-Meta-Web-Index <index.name>
+ X-Container-Meta-Web-Error <error.name.suffix>
+
+If X-Container-Meta-Web-Index is set, any <index.name> files will be served
+without having to specify the <index.name> part. For instance, setting
+``X-Container-Meta-Web-Index: index.html`` will be able to serve the object
+.../pseudo/path/index.html with just .../pseudo/path or .../pseudo/path/
+
+If X-Container-Meta-Web-Error is set, any errors (currently just 401
+Unauthorized and 404 Not Found) will instead serve the
+.../<status.code><error.name.suffix> object. For instance, setting
+``X-Container-Meta-Web-Error: error.html`` will serve .../404error.html for
+requests for paths not found.
+
+For psuedo paths that have no <index.name>, this middleware can serve HTML file
+listings if you set the ``X-Container-Meta-Web-Listings: true`` metadata item
+on the container.
+
+If listings are enabled, the listings can have a custom style sheet by setting
+the X-Container-Meta-Web-Listings-CSS header. For instance, setting
+``X-Container-Meta-Web-Listings-CSS: listing.css`` will make listings link to
+the .../listing.css style sheet. If you "view source" in your browser on a
+listing page, you will see the well defined document structure that can be
+styled.
+
+Example usage of this middleware via ``swift``:
+
+ Make the container publicly readable::
+
+ swift post -r '.r:*' container
+
+ You should be able to get objects directly, but no index.html resolution or
+ listings.
+
+ Set an index file directive::
+
+ swift post -m 'web-index:index.html' container
+
+ You should be able to hit paths that have an index.html without needing to
+ type the index.html part.
+
+ Turn on listings::
+
+ swift post -m 'web-listings: true' container
+
+ Now you should see object listings for paths and pseudo paths that have no
+ index.html.
+
+ Enable a custom listings style sheet::
+
+ swift post -m 'web-listings-css:listings.css' container
+
+ Set an error file::
+
+ swift post -m 'web-error:error.html' container
+
+ Now 401's should load 401error.html, 404's should load 404error.html, etc.
+"""
+
+
+try:
+ import simplejson as json
+except ImportError:
+ import json
+
+import cgi
+import time
+from urllib import unquote, quote as urllib_quote
+
+from webob import Response
+from webob.exc import HTTPMovedPermanently, HTTPNotFound
+
+from swift.common.utils import cache_from_env, get_logger, human_readable, \
+ split_path, TRUE_VALUES
+from swift.common.wsgi import make_pre_authed_env, make_pre_authed_request, \
+ WSGIContext
+from swift.common.http import is_success, is_redirection, HTTP_NOT_FOUND
+
+
+def quote(value, safe='/'):
+ """
+ Patched version of urllib.quote that encodes utf-8 strings before quoting
+ """
+ if isinstance(value, unicode):
+ value = value.encode('utf-8')
+ return urllib_quote(value, safe)
+
+
+class _StaticWebContext(WSGIContext):
+ """
+ The Static Web WSGI middleware filter; serves container data as a
+ static web site. See `staticweb`_ for an overview.
+
+ This _StaticWebContext is used by StaticWeb with each request
+ that might need to be handled to make keeping contextual
+ information about the request a bit simpler than storing it in
+ the WSGI env.
+ """
+
+ def __init__(self, staticweb, version, account, container, obj):
+ WSGIContext.__init__(self, staticweb.app)
+ self.version = version
+ self.account = account
+ self.container = container
+ self.obj = obj
+ self.app = staticweb.app
+ self.cache_timeout = staticweb.cache_timeout
+ self.logger = staticweb.logger
+ self.access_logger = staticweb.access_logger
+ self.log_headers = staticweb.log_headers
+ self.agent = '%(orig)s StaticWeb'
+ # Results from the last call to self._get_container_info.
+ self._index = self._error = self._listings = self._listings_css = None
+
+ def _error_response(self, response, env, start_response):
+ """
+ Sends the error response to the remote client, possibly resolving a
+ custom error response body based on x-container-meta-web-error.
+
+ :param response: The error response we should default to sending.
+ :param env: The original request WSGI environment.
+ :param start_response: The WSGI start_response hook.
+ """
+ self._log_response(env, self._get_status_int())
+ if not self._error:
+ start_response(self._response_status, self._response_headers,
+ self._response_exc_info)
+ return response
+ save_response_status = self._response_status
+ save_response_headers = self._response_headers
+ save_response_exc_info = self._response_exc_info
+ resp = self._app_call(make_pre_authed_env(env, 'GET',
+ '/%s/%s/%s/%s%s' % (self.version, self.account, self.container,
+ self._get_status_int(), self._error),
+ self.agent))
+ if is_success(self._get_status_int()):
+ start_response(save_response_status, self._response_headers,
+ self._response_exc_info)
+ return resp
+ start_response(save_response_status, save_response_headers,
+ save_response_exc_info)
+ return response
+
+ def _get_container_info(self, env):
+ """
+ Retrieves x-container-meta-web-index, x-container-meta-web-error,
+ x-container-meta-web-listings, and x-container-meta-web-listings-css
+ from memcache or from the cluster and stores the result in memcache and
+ in self._index, self._error, self._listings, and self._listings_css.
+
+ :param env: The WSGI environment dict.
+ """
+ self._index = self._error = self._listings = self._listings_css = None
+ memcache_client = cache_from_env(env)
+ if memcache_client:
+ memcache_key = '/staticweb/%s/%s/%s' % (self.version, self.account,
+ self.container)
+ cached_data = memcache_client.get(memcache_key)
+ if cached_data:
+ (self._index, self._error, self._listings,
+ self._listings_css) = cached_data
+ return
+ resp = make_pre_authed_request(env, 'HEAD',
+ '/%s/%s/%s' % (self.version, self.account, self.container),
+ agent=self.agent).get_response(self.app)
+ if is_success(resp.status_int):
+ self._index = \
+ resp.headers.get('x-container-meta-web-index', '').strip()
+ self._error = \
+ resp.headers.get('x-container-meta-web-error', '').strip()
+ self._listings = \
+ resp.headers.get('x-container-meta-web-listings', '').strip()
+ self._listings_css = \
+ resp.headers.get('x-container-meta-web-listings-css',
+ '').strip()
+ if memcache_client:
+ memcache_client.set(memcache_key,
+ (self._index, self._error, self._listings,
+ self._listings_css),
+ timeout=self.cache_timeout)
+
+ def _listing(self, env, start_response, prefix=None):
+ """
+ Sends an HTML object listing to the remote client.
+
+ :param env: The original WSGI environment dict.
+ :param start_response: The original WSGI start_response hook.
+ :param prefix: Any prefix desired for the container listing.
+ """
+ if self._listings.lower() not in TRUE_VALUES:
+ resp = HTTPNotFound()(env, self._start_response)
+ return self._error_response(resp, env, start_response)
+ tmp_env = make_pre_authed_env(env, 'GET',
+ '/%s/%s/%s' % (self.version, self.account, self.container),
+ self.agent)
+ tmp_env['QUERY_STRING'] = 'delimiter=/&format=json'
+ if prefix:
+ tmp_env['QUERY_STRING'] += '&prefix=%s' % quote(prefix)
+ else:
+ prefix = ''
+ resp = self._app_call(tmp_env)
+ if not is_success(self._get_status_int()):
+ return self._error_response(resp, env, start_response)
+ listing = None
+ body = ''.join(resp)
+ if body:
+ listing = json.loads(body)
+ if not listing:
+ resp = HTTPNotFound()(env, self._start_response)
+ return self._error_response(resp, env, start_response)
+ headers = {'Content-Type': 'text/html; charset=UTF-8'}
+ body = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 ' \
+ 'Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n' \
+ '<html>\n' \
+ ' <head>\n' \
+ ' <title>Listing of %s</title>\n' % \
+ cgi.escape(env['PATH_INFO'])
+ if self._listings_css:
+ body += ' <link rel="stylesheet" type="text/css" ' \
+ 'href="%s" />\n' % (self._build_css_path(prefix))
+ else:
+ body += ' <style type="text/css">\n' \
+ ' h1 {font-size: 1em; font-weight: bold;}\n' \
+ ' th {text-align: left; padding: 0px 1em 0px 1em;}\n' \
+ ' td {padding: 0px 1em 0px 1em;}\n' \
+ ' a {text-decoration: none;}\n' \
+ ' </style>\n'
+ body += ' </head>\n' \
+ ' <body>\n' \
+ ' <h1 id="title">Listing of %s</h1>\n' \
+ ' <table id="listing">\n' \
+ ' <tr id="heading">\n' \
+ ' <th class="colname">Name</th>\n' \
+ ' <th class="colsize">Size</th>\n' \
+ ' <th class="coldate">Date</th>\n' \
+ ' </tr>\n' % \
+ cgi.escape(env['PATH_INFO'])
+ if prefix:
+ body += ' <tr id="parent" class="item">\n' \
+ ' <td class="colname"><a href="../">../</a></td>\n' \
+ ' <td class="colsize">&nbsp;</td>\n' \
+ ' <td class="coldate">&nbsp;</td>\n' \
+ ' </tr>\n'
+ for item in listing:
+ if 'subdir' in item:
+ subdir = item['subdir']
+ if prefix:
+ subdir = subdir[len(prefix):]
+ body += ' <tr class="item subdir">\n' \
+ ' <td class="colname"><a href="%s">%s</a></td>\n' \
+ ' <td class="colsize">&nbsp;</td>\n' \
+ ' <td class="coldate">&nbsp;</td>\n' \
+ ' </tr>\n' % \
+ (quote(subdir), cgi.escape(subdir))
+ for item in listing:
+ if 'name' in item:
+ name = item['name']
+ if prefix:
+ name = name[len(prefix):]
+ body += ' <tr class="item %s">\n' \
+ ' <td class="colname"><a href="%s">%s</a></td>\n' \
+ ' <td class="colsize">%s</td>\n' \
+ ' <td class="coldate">%s</td>\n' \
+ ' </tr>\n' % \
+ (' '.join('type-' + cgi.escape(t.lower(), quote=True)
+ for t in item['content_type'].split('/')),
+ quote(name), cgi.escape(name),
+ human_readable(item['bytes']),
+ cgi.escape(item['last_modified']).split('.')[0].
+ replace('T', ' '))
+ body += ' </table>\n' \
+ ' </body>\n' \
+ '</html>\n'
+ resp = Response(headers=headers, body=body)
+ self._log_response(env, resp.status_int)
+ return resp(env, start_response)
+
+ def _build_css_path(self, prefix=''):
+ """
+ Constructs a relative path from a given prefix within the container.
+ URLs and paths starting with '/' are not modified.
+
+ :param prefix: The prefix for the container listing.
+ """
+ if self._listings_css.startswith(('/', 'http://', 'https://')):
+ css_path = quote(self._listings_css, ':/')
+ else:
+ css_path = '../' * prefix.count('/') + quote(self._listings_css)
+ return css_path
+
+ def handle_container(self, env, start_response):
+ """
+ Handles a possible static web request for a container.
+
+ :param env: The original WSGI environment dict.
+ :param start_response: The original WSGI start_response hook.
+ """
+ self._get_container_info(env)
+ if not self._listings and not self._index:
+ if env.get('HTTP_X_WEB_MODE', 'f').lower() in TRUE_VALUES:
+ return HTTPNotFound()(env, start_response)
+ return self.app(env, start_response)
+ if env['PATH_INFO'][-1] != '/':
+ resp = HTTPMovedPermanently(
+ location=(env['PATH_INFO'] + '/'))
+ self._log_response(env, resp.status_int)
+ return resp(env, start_response)
+ if not self._index:
+ return self._listing(env, start_response)
+ tmp_env = dict(env)
+ tmp_env['HTTP_USER_AGENT'] = \
+ '%s StaticWeb' % env.get('HTTP_USER_AGENT')
+ tmp_env['PATH_INFO'] += self._index
+ resp = self._app_call(tmp_env)
+ status_int = self._get_status_int()
+ if status_int == HTTP_NOT_FOUND:
+ return self._listing(env, start_response)
+ elif not is_success(self._get_status_int()) or \
+ not is_redirection(self._get_status_int()):
+ return self._error_response(resp, env, start_response)
+ start_response(self._response_status, self._response_headers,
+ self._response_exc_info)
+ return resp
+
+ def handle_object(self, env, start_response):
+ """
+ Handles a possible static web request for an object. This object could
+ resolve into an index or listing request.
+
+ :param env: The original WSGI environment dict.
+ :param start_response: The original WSGI start_response hook.
+ """
+ tmp_env = dict(env)
+ tmp_env['HTTP_USER_AGENT'] = \
+ '%s StaticWeb' % env.get('HTTP_USER_AGENT')
+ resp = self._app_call(tmp_env)
+ status_int = self._get_status_int()
+ if is_success(status_int) or is_redirection(status_int):
+ start_response(self._response_status, self._response_headers,
+ self._response_exc_info)
+ return resp
+ if status_int != HTTP_NOT_FOUND:
+ return self._error_response(resp, env, start_response)
+ self._get_container_info(env)
+ if not self._listings and not self._index:
+ return self.app(env, start_response)
+ status_int = HTTP_NOT_FOUND
+ if self._index:
+ tmp_env = dict(env)
+ tmp_env['HTTP_USER_AGENT'] = \
+ '%s StaticWeb' % env.get('HTTP_USER_AGENT')
+ if tmp_env['PATH_INFO'][-1] != '/':
+ tmp_env['PATH_INFO'] += '/'
+ tmp_env['PATH_INFO'] += self._index
+ resp = self._app_call(tmp_env)
+ status_int = self._get_status_int()
+ if is_success(status_int) or is_redirection(status_int):
+ if env['PATH_INFO'][-1] != '/':
+ resp = HTTPMovedPermanently(
+ location=env['PATH_INFO'] + '/')
+ self._log_response(env, resp.status_int)
+ return resp(env, start_response)
+ start_response(self._response_status, self._response_headers,
+ self._response_exc_info)
+ return resp
+ if status_int == HTTP_NOT_FOUND:
+ if env['PATH_INFO'][-1] != '/':
+ tmp_env = make_pre_authed_env(env, 'GET',
+ '/%s/%s/%s' % (self.version, self.account,
+ self.container),
+ self.agent)
+ tmp_env['QUERY_STRING'] = 'limit=1&format=json&delimiter' \
+ '=/&limit=1&prefix=%s' % quote(self.obj + '/')
+ resp = self._app_call(tmp_env)
+ body = ''.join(resp)
+ if not is_success(self._get_status_int()) or not body or \
+ not json.loads(body):
+ resp = HTTPNotFound()(env, self._start_response)
+ return self._error_response(resp, env, start_response)
+ resp = HTTPMovedPermanently(location=env['PATH_INFO'] +
+ '/')
+ self._log_response(env, resp.status_int)
+ return resp(env, start_response)
+ return self._listing(env, start_response, self.obj)
+
+ def _log_response(self, env, status_int):
+ """
+ Logs an access line for StaticWeb responses; use when the next app in
+ the pipeline will not be handling the final response to the remote
+ user.
+
+ Assumes that the request and response bodies are 0 bytes or very near 0
+ so no bytes transferred are tracked or logged.
+
+ This does mean that the listings responses that actually do transfer
+ content will not be logged with any bytes transferred, but in counter
+ to that the full bytes for the underlying listing will be logged by the
+ proxy even if the remote client disconnects early for the StaticWeb
+ listing.
+
+ I didn't think the extra complexity of getting the bytes transferred
+ exactly correct for these requests was worth it, but perhaps someone
+ else will think it is.
+
+ To get things exact, this filter would need to use an
+ eventlet.posthooks logger like the proxy does and any log processing
+ systems would need to ignore some (but not all) proxy requests made by
+ StaticWeb if they were just interested in the bytes transferred to the
+ remote client.
+ """
+ trans_time = '%.4f' % (time.time() -
+ env.get('staticweb.start_time', time.time()))
+ the_request = quote(unquote(env['PATH_INFO']))
+ if env.get('QUERY_STRING'):
+ the_request = the_request + '?' + env['QUERY_STRING']
+ # remote user for zeus
+ client = env.get('HTTP_X_CLUSTER_CLIENT_IP')
+ if not client and 'HTTP_X_FORWARDED_FOR' in env:
+ # remote user for other lbs
+ client = env['HTTP_X_FORWARDED_FOR'].split(',')[0].strip()
+ logged_headers = None
+ if self.log_headers:
+ logged_headers = '\n'.join('%s: %s' % (k, v)
+ for k, v in req.headers.items())
+ self.access_logger.info(' '.join(quote(str(x)) for x in (
+ client or '-',
+ env.get('REMOTE_ADDR', '-'),
+ time.strftime('%d/%b/%Y/%H/%M/%S', time.gmtime()),
+ env['REQUEST_METHOD'],
+ the_request,
+ env['SERVER_PROTOCOL'],
+ status_int,
+ env.get('HTTP_REFERER', '-'),
+ env.get('HTTP_USER_AGENT', '-'),
+ env.get('HTTP_X_AUTH_TOKEN', '-'),
+ '-',
+ '-',
+ env.get('HTTP_ETAG', '-'),
+ env.get('swift.trans_id', '-'),
+ logged_headers or '-',
+ trans_time)))
+
+
+class StaticWeb(object):
+ """
+ The Static Web WSGI middleware filter; serves container data as a static
+ web site. See `staticweb`_ for an overview.
+
+ :param app: The next WSGI application/filter in the paste.deploy pipeline.
+ :param conf: The filter configuration dict.
+ """
+
+ def __init__(self, app, conf):
+ #: The next WSGI application/filter in the paste.deploy pipeline.
+ self.app = app
+ #: The filter configuration dict.
+ self.conf = conf
+ #: The seconds to cache the x-container-meta-web-* headers.,
+ self.cache_timeout = int(conf.get('cache_timeout', 300))
+ #: Logger for this filter.
+ self.logger = get_logger(conf, log_route='staticweb')
+ access_log_conf = {}
+ for key in ('log_facility', 'log_name', 'log_level'):
+ value = conf.get('access_' + key, conf.get(key, None))
+ if value:
+ access_log_conf[key] = value
+ #: Web access logger for this filter.
+ self.access_logger = get_logger(access_log_conf,
+ log_route='staticweb-access')
+ #: Indicates whether full HTTP headers should be logged or not.
+ self.log_headers = conf.get('log_headers', 'f').lower() in TRUE_VALUES
+
+ def __call__(self, env, start_response):
+ """
+ Main hook into the WSGI paste.deploy filter/app pipeline.
+
+ :param env: The WSGI environment dict.
+ :param start_response: The WSGI start_response hook.
+ """
+ env['staticweb.start_time'] = time.time()
+ try:
+ (version, account, container, obj) = \
+ split_path(env['PATH_INFO'], 2, 4, True)
+ except ValueError:
+ return self.app(env, start_response)
+ if env['REQUEST_METHOD'] in ('PUT', 'POST') and container and not obj:
+ memcache_client = cache_from_env(env)
+ if memcache_client:
+ memcache_key = \
+ '/staticweb/%s/%s/%s' % (version, account, container)
+ memcache_client.delete(memcache_key)
+ return self.app(env, start_response)
+ if env['REQUEST_METHOD'] not in ('HEAD', 'GET'):
+ return self.app(env, start_response)
+ if env.get('REMOTE_USER') and \
+ env.get('HTTP_X_WEB_MODE', 'f').lower() not in TRUE_VALUES:
+ return self.app(env, start_response)
+ if not container:
+ return self.app(env, start_response)
+ context = _StaticWebContext(self, version, account, container, obj)
+ if obj:
+ return context.handle_object(env, start_response)
+ return context.handle_container(env, start_response)
+
+
+def filter_factory(global_conf, **local_conf):
+ """ Returns a Static Web WSGI filter for use with paste.deploy. """
+ conf = global_conf.copy()
+ conf.update(local_conf)
+
+ def staticweb_filter(app):
+ return StaticWeb(app, conf)
+ return staticweb_filter
View
490 swift/common/middleware/tempurl.py
@@ -0,0 +1,490 @@
+# Copyright (c) 2010-2012 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+TempURL Middleware
+
+Allows the creation of URLs to provide temporary access to objects.
+
+For example, a website may wish to provide a link to download a large
+object in Swift, but the Swift account has no public access. The
+website can generate a URL that will provide GET access for a limited
+time to the resource. When the web browser user clicks on the link,
+the browser will download the object directly from Swift, obviating
+the need for the website to act as a proxy for the request.
+
+If the user were to share the link with all his friends, or
+accidentally post it on a forum, etc. the direct access would be
+limited to the expiration time set when the website created the link.
+
+To create such temporary URLs, first an X-Account-Meta-Temp-URL-Key
+header must be set on the Swift account. Then, an HMAC-SHA1 (RFC 2104)
+signature is generated using the HTTP method to allow (GET or PUT),
+the Unix timestamp the access should be allowed until, the full path
+to the object, and the key set on the account.
+
+For example, here is code generating the signature for a GET for 60
+seconds on /v1/AUTH_account/container/object::
+
+ import hmac
+ from hashlib import sha1
+ from time import time
+ method = 'GET'
+ expires = int(time() + 60)
+ path = '/v1/AUTH_account/container/object'
+ key = 'mykey'
+ hmac_body = '%s\\n%s\\n%s' % (method, expires, path)
+ sig = hmac.new(key, hmac_body, sha1).hexdigest()
+
+Be certain to use the full path, from the /v1/ onward.
+
+Let's say the sig ends up equaling
+da39a3ee5e6b4b0d3255bfef95601890afd80709 and expires ends up
+1323479485. Then, for example, the website could provide a link to::
+
+ https://swift-cluster.example.com/v1/AUTH_account/container/object?
+ temp_url_sig=da39a3ee5e6b4b0d3255bfef95601890afd80709&
+ temp_url_expires=1323479485
+
+Any alteration of the resource path or query arguments would result
+in 401 Unauthorized. Similary, a PUT where GET was the allowed method
+would 401. HEAD is allowed if GET or PUT is allowed.
+
+Using this in combination with browser form post translation
+middleware could also allow direct-from-browser uploads to specific
+locations in Swift.
+
+Note that changing the X-Account-Meta-Temp-URL-Key will invalidate
+any previously generated temporary URLs within 60 seconds (the
+memcache time for the key).
+"""
+
+__all__ = ['TempURL', 'filter_factory',
+ 'DEFAULT_INCOMING_REMOVE_HEADERS',
+ 'DEFAULT_INCOMING_ALLOW_HEADERS',
+ 'DEFAULT_OUTGOING_REMOVE_HEADERS',
+ 'DEFAULT_OUTGOING_ALLOW_HEADERS']
+
+
+import hmac
+from hashlib import sha1
+from os.path import basename
+from StringIO import StringIO
+from time import gmtime, strftime, time
+from urllib import quote, unquote
+from urlparse import parse_qs
+
+from swift.common.utils import get_logger
+from swift.common.wsgi import make_pre_authed_env
+from swift.common.http import HTTP_UNAUTHORIZED
+
+
+#: Default headers to remove from incoming requests. Simply a whitespace
+#: delimited list of header names and names can optionally end with '*' to
+#: indicate a prefix match. DEFAULT_INCOMING_ALLOW_HEADERS is a list of
+#: exceptions to these removals.
+DEFAULT_INCOMING_REMOVE_HEADERS = 'x-timestamp'
+
+#: Default headers as exceptions to DEFAULT_INCOMING_REMOVE_HEADERS. Simply a
+#: whitespace delimited list of header names and names can optionally end with
+#: '*' to indicate a prefix match.
+DEFAULT_INCOMING_ALLOW_HEADERS = ''
+
+#: Default headers to remove from outgoing responses. Simply a whitespace
+#: delimited list of header names and names can optionally end with '*' to
+#: indicate a prefix match. DEFAULT_OUTGOING_ALLOW_HEADERS is a list of
+#: exceptions to these removals.
+DEFAULT_OUTGOING_REMOVE_HEADERS = 'x-object-meta-*'
+
+#: Default headers as exceptions to DEFAULT_OUTGOING_REMOVE_HEADERS. Simply a
+#: whitespace delimited list of header names and names can optionally end with
+#: '*' to indicate a prefix match.
+DEFAULT_OUTGOING_ALLOW_HEADERS = 'x-object-meta-public-*'
+
+
+class TempURL(object):
+ """
+ WSGI Middleware to grant temporary URLs specific access to Swift
+ resources. See the overview for more information.
+
+ This middleware understands the following configuration settings::
+
+ incoming_remove_headers
+ The headers to remove from incoming requests. Simply a
+ whitespace delimited list of header names and names can
+ optionally end with '*' to indicate a prefix match.
+ incoming_allow_headers is a list of exceptions to these
+ removals.
+ Default: x-timestamp
+
+ incoming_allow_headers
+ The headers allowed as exceptions to
+ incoming_remove_headers. Simply a whitespace delimited
+ list of header names and names can optionally end with
+ '*' to indicate a prefix match.
+ Default: None
+
+ outgoing_remove_headers
+ The headers to remove from outgoing responses. Simply a
+ whitespace delimited list of header names and names can
+ optionally end with '*' to indicate a prefix match.
+ outgoing_allow_headers is a list of exceptions to these
+ removals.
+ Default: x-object-meta-*
+
+ outgoing_allow_headers
+ The headers allowed as exceptions to
+ outgoing_remove_headers. Simply a whitespace delimited
+ list of header names and names can optionally end with
+ '*' to indicate a prefix match.
+ Default: x-object-meta-public-*
+
+ :param app: The next WSGI filter or app in the paste.deploy
+ chain.
+ :param conf: The configuration dict for the middleware.
+ """
+
+ def __init__(self, app, conf):
+ #: The next WSGI application/filter in the paste.deploy pipeline.
+ self.app = app
+ #: The filter configuration dict.
+ self.conf = conf
+ #: The logger to use with this middleware.
+ self.logger = get_logger(conf, log_route='tempurl')
+
+ headers = DEFAULT_INCOMING_REMOVE_HEADERS
+ if 'incoming_remove_headers' in conf:
+ headers = conf['incoming_remove_headers']
+ headers = \
+ ['HTTP_' + h.upper().replace('-', '_') for h in headers.split()]
+ #: Headers to remove from incoming requests. Uppercase WSGI env style,
+ #: like `HTTP_X_PRIVATE`.
+ self.incoming_remove_headers = [h for h in headers if h[-1] != '*']
+ #: Header with match prefixes to remove from incoming requests.
+ #: Uppercase WSGI env style, like `HTTP_X_SENSITIVE_*`.
+ self.incoming_remove_headers_startswith = \
+ [h[:-1] for h in headers if h[-1] == '*']
+
+ headers = DEFAULT_INCOMING_ALLOW_HEADERS
+ if 'incoming_allow_headers' in conf:
+ headers = conf['incoming_allow_headers']
+ headers = \
+ ['HTTP_' + h.upper().replace('-', '_') for h in headers.split()]
+ #: Headers to allow in incoming requests. Uppercase WSGI env style,
+ #: like `HTTP_X_MATCHES_REMOVE_PREFIX_BUT_OKAY`.
+ self.incoming_allow_headers = [h for h in headers if h[-1] != '*']
+ #: Header with match prefixes to allow in incoming requests. Uppercase
+ #: WSGI env style, like `HTTP_X_MATCHES_REMOVE_PREFIX_BUT_OKAY_*`.
+ self.incoming_allow_headers_startswith = \
+ [h[:-1] for h in headers if h[-1] == '*']
+
+ headers = DEFAULT_OUTGOING_REMOVE_HEADERS
+ if 'outgoing_remove_headers' in conf:
+ headers = conf['outgoing_remove_headers']
+ headers = [h.lower() for h in headers.split()]
+ #: Headers to remove from outgoing responses. Lowercase, like
+ #: `x-account-meta-temp-url-key`.
+ self.outgoing_remove_headers = [h for h in headers if h[-1] != '*']
+ #: Header with match prefixes to remove from outgoing responses.
+ #: Lowercase, like `x-account-meta-private-*`.
+ self.outgoing_remove_headers_startswith = \
+ [h[:-1] for h in headers if h[-1] == '*']
+
+ headers = DEFAULT_OUTGOING_ALLOW_HEADERS
+ if 'outgoing_allow_headers' in conf:
+ headers = conf['outgoing_allow_headers']
+ headers = [h.lower() for h in headers.split()]
+ #: Headers to allow in outgoing responses. Lowercase, like
+ #: `x-matches-remove-prefix-but-okay`.
+ self.outgoing_allow_headers = [h for h in headers if h[-1] != '*']
+ #: Header with match prefixes to allow in outgoing responses.
+ #: Lowercase, like `x-matches-remove-prefix-but-okay-*`.
+ self.outgoing_allow_headers_startswith = \
+ [h[:-1] for h in headers if h[-1] == '*']
+ #: HTTP user agent to use for subrequests.
+ self.agent = '%(orig)s TempURL'
+
+ def __call__(self, env, start_response):
+ """
+ Main hook into the WSGI paste.deploy filter/app pipeline.
+
+ :param env: The WSGI environment dict.
+ :param start_response: The WSGI start_response hook.
+ :returns: Response as per WSGI.
+ """
+ temp_url_sig, temp_url_expires = self._get_temp_url_info(env)
+ if temp_url_sig is None and temp_url_expires is None:
+ return self.app(env, start_response)
+ if not temp_url_sig or not temp_url_expires:
+ return self._invalid(env, start_response)
+ account = self._get_account(env)
+ if not account:
+ return self._invalid(env, start_response)
+ key = self._get_key(env, account)
+ if not key:
+ return self._invalid(env, start_response)
+ if env['REQUEST_METHOD'] == 'HEAD':
+ hmac_val = self._get_hmac(env, temp_url_expires, key,
+ request_method='GET')
+ if temp_url_sig != hmac_val:
+ hmac_val = self._get_hmac(env, temp_url_expires, key,
+ request_method='PUT')
+ if temp_url_sig != hmac_val:
+ return self._invalid(env, start_response)
+ else:
+ hmac_val = self._get_hmac(env, temp_url_expires, key)
+ if temp_url_sig != hmac_val:
+ return self._invalid(env, start_response)
+ self._clean_incoming_headers(env)
+ env['swift.authorize'] = lambda req: None
+ env['swift.authorize_override'] = True
+ env['REMOTE_USER'] = '.wsgi.tempurl'
+
+ def _start_response(status, headers, exc_info=None):
+ headers = self._clean_outgoing_headers(headers)
+ if env['REQUEST_METHOD'] == 'GET':
+ already = False
+ for h, v in headers:
+ if h.lower() == 'content-disposition':
+ already = True
+ break
+ if not already:
+ headers.append(('Content-Disposition',
+ 'attachment; filename=%s' %
+ (quote(basename(env['PATH_INFO'])))))
+ return start_response(status, headers, exc_info)
+
+ return self.app(env, _start_response)
+
+ def _get_account(self, env):
+ """
+ Returns just the account for the request, if it's an object GET, PUT,
+ or HEAD request; otherwise, None is returned.
+
+ :param env: The WSGI environment for the request.
+ :returns: Account str or None.
+ """
+ account = None
+ if env['REQUEST_METHOD'] in ('GET', 'PUT', 'HEAD'):
+ parts = env['PATH_INFO'].split('/', 4)
+ # Must be five parts, ['', 'v1', 'a', 'c', 'o'], must be a v1
+ # request, have account, container, and object values, and the
+ # object value can't just have '/'s.
+ if len(parts) == 5 and not parts[0] and parts[1] == 'v1' and \
+ parts[2] and parts[3] and parts[4].strip('/'):
+ account = parts[2]
+ return account
+
+ def _get_temp_url_info(self, env):
+ """
+ Returns the provided temporary URL parameters (sig, expires),
+ if given and syntactically valid. Either sig or expires could
+ be None if not provided. If provided, expires is also
+ converted to an int if possible or 0 if not, and checked for
+ expiration (returns 0 if expired).
+