diff --git a/README b/README
deleted file mode 100644
index 79bc200..0000000
--- a/README
+++ /dev/null
@@ -1,58 +0,0 @@
-# apt-transport-s3
-
-### Table of Contents
-1. [License & Copyright](#license & copyright)
-2. [Requirements](#requirements)
-3. [Configuration](#configuration)
-4. [Usage](#usage)
-5. [Contribution](#contribution)
-
-## apt-transport-s3
-Allow to have a privately hosted apt repository on S3. Access keys are read from
-`/etc/apt/s3auth.conf` file or IAM role if machine is hosted on AWS or has
-access to AWS metadata server on 169.254.169.254.
-
-## License & Copyright
- # Copyright (C) 2014 Bashton Ltd.
- #
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or
- # (at your option) any later version.
-
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
-
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
-
-
-## Requirements
-### Additional package dependencies (except installed by default in Debian)
-1. python-configobj
-
-## Configuration
-/etc/apt/s3auth.conf or IAM role
-can provide credentials required for using private apt repositories.
-
-Example of s3auth.conf file:
-```
-AccessKeyId = myaccesskey
-SecretAccessKey = mysecretaccesskey
-Token = ''
-```
-
-Token should be empty string.
-
-## Usage
-Install the .deb package from the releases page. The bucket repo should be
-specified using an s3:// prefix, for example:
-
-`deb s3://aptbucketname.s3.amazonaws.com/repo/ trusty main contrib non-free`
-
-## Contribution
-If you want to contribute a patch via PR please create it against development
-branch. Patches via email are welcome as well.
diff --git a/README.md b/README.md
deleted file mode 120000
index 100b938..0000000
--- a/README.md
+++ /dev/null
@@ -1 +0,0 @@
-README
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..84b1f68
--- /dev/null
+++ b/README.md
@@ -0,0 +1,80 @@
+# apt-transport-s3
+
+### Table of Contents
+1. [License & Copyright](#license & copyright)
+2. [Requirements](#requirements)
+3. [Configuration](#configuration)
+4. [Usage](#usage)
+5. [Contribution](#contribution)
+
+## apt-transport-s3
+Allow to have a privately hosted apt repository on S3. Access keys are read from
+`/etc/apt/s3auth.conf` file or IAM role if machine is hosted on AWS or has
+access to AWS metadata server on 169.254.169.254. They are also taken from the
+usual environment variables.
+
+## License & Copyright
+ # Copyright (C) 2014 Bashton Ltd.
+ #
+ # This program is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or
+ # (at your option) any later version.
+
+ # This program is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ # GNU General Public License for more details.
+
+ # You should have received a copy of the GNU General Public License
+ # along with this program; if not, write to the Free Software
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+
+
+## Requirements
+### Additional package dependencies (except installed by default in Debian)
+1. python-configobj
+
+## Configuration
+/etc/apt/s3auth.conf or IAM role
+can provide credentials required for using private apt repositories.
+
+NOTE: Region MUST match the region the buckets are stored in and if not defined defaults to us-east-1.
+
+Example of s3auth.conf file:
+```
+AccessKeyId = myaccesskey
+SecretAccessKey = mysecretaccesskey
+Region = 'us-east-1'
+```
+
+## Usage
+Install the .deb package from the releases page. The bucket repo should be
+specified using an s3:// prefix, for example:
+
+`deb s3://aptbucketname/repo/ trusty main contrib non-free`
+
+if you need to use a proxy to connect to the internet you can specify this
+as an APT configuration directive (for example in
+/etc/apt/apt.conf.d/90apt-transport-s3)
+
+`Acquire::http::Proxy "http://myproxy:3128/";`
+
+## Testing
+The module will run in interactive mode. It accepts on `stdin` and outputs on `stdout`. The messages it accepts on stdin
+are in the following format and [documented here](http://www.fifi.org/doc/libapt-pkg-doc/method.html/index.html#abstract).
+
+```
+600 URI Acquire
+URI:s3://my-s3-repository/project-a/dists/trusty/main/binary-amd64/Packages
+Filename:Packages.downloaded
+Fail-Ignore:true
+Index-File:true
+
+```
+
+This message will trigger an s3 get from the above bucket and key and save it to Filename. It needs a blank line after the message to trigger the processing by the s3 method.
+
+## Contribution
+If you want to contribute a patch via PR please create it against development
+branch. Patches via email are welcome as well.
diff --git a/s3 b/s3
index d2e6649..da5f340 100755
--- a/s3
+++ b/s3
@@ -13,6 +13,8 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
+#
+# https://github.com/BashtonLtd/apt-transport-s3
import urllib2
import urlparse
@@ -22,9 +24,17 @@ import hmac
import json
import sys
import os
+import datetime
+import xml.etree.ElementTree as ET
+import socket
+import ssl
from configobj import ConfigObj
-import syslog
+RETRIES = 5
+
+
+def wait_time(c):
+ return pow(2, c) - 1
class AWSCredentials(object):
@@ -32,7 +42,9 @@ class AWSCredentials(object):
Class for dealing with IAM role credentials from meta-data server and later
on to deal with boto/aws config provided keys
"""
- def __init__(self):
+
+ def __init__(self, config_file=None):
+ self.conf_file = config_file
host = 'http://169.254.169.254'
path = '/latest/meta-data/iam/security-credentials/'
self.meta_data_uri = urlparse.urljoin(host, path)
@@ -42,18 +54,30 @@ class AWSCredentials(object):
request = urllib2.Request(self.meta_data_uri)
response = None
- try:
- response = urllib2.urlopen(request, None, 5)
- self.iamrole = response.read()
- except urllib2.URLError as e:
- if hasattr(e, 'reason'):
- raise Exception("URL error reason: %s, probable cause is that\
- you don't have IAM role on this machine" % e.reason)
- elif hasattr(e, 'code'):
- raise Exception("Server error code: %s" % e.code)
- finally:
- if response:
- response.close()
+
+ for i in range(0, RETRIES):
+ try:
+ response = urllib2.urlopen(request, None, 10)
+ self.iamrole = response.read()
+ break
+ except ssl.SSLError as e:
+ if 'timed out' in e.message:
+ time.sleep(wait_time(i + 1))
+ else:
+ raise e
+ except socket.timeout:
+ time.sleep(wait_time(i + 1))
+ except urllib2.URLError as e:
+ if hasattr(e, 'reason'):
+ raise Exception("URL error reason: %s, probable cause is that\
+ you don't have IAM role on this machine" % e.reason)
+ elif hasattr(e, 'code'):
+ raise Exception("Server error code: %s" % e.code)
+ finally:
+ if response:
+ response.close()
+ else:
+ raise Exception("GetRole request timed out")
def __load_config(self):
"""
@@ -63,15 +87,13 @@ class AWSCredentials(object):
SecretAccessKey = mysecretkey
Token = '' # this can/have to be empty
"""
- _CONF_FILE = '/etc/apt/s3auth.conf'
# Checking if 'file' exists, if it does read it
- if os.path.isfile(os.path.expanduser(_CONF_FILE)):
- config = ConfigObj(os.path.expanduser(_CONF_FILE))
+ if os.path.isfile(os.path.expanduser(self.conf_file)):
+ config = ConfigObj(os.path.expanduser(self.conf_file))
return config
else:
- raise Exception("Config file: %s doesn't exist" % _CONF_FILE)
- syslog.syslog("Config file: %s doesn't exist" % _CONF_FILE)
+ raise Exception("Config file: %s doesn't exist" % self.conf_file)
def get_credentials(self):
"""
@@ -79,81 +101,95 @@ class AWSCredentials(object):
Note: This method should be explicitly called after constructing new
object, as in 'explicit is better than implicit'.
"""
- data = None
+ data = {}
try:
data = self.__load_config()
except:
pass
- if data is None:
+ self.region = data.get('Region', 'us-east-1')
+ if self.region is None or self.region == '':
+ raise Exception("Region required")
+ sys.stderr.write("Region: {}\n".format(self.region))
+
+ if data.get("AccessKeyId") is not None:
+ sys.stderr.write("Using config file for credentials\n")
+
+ if data.get("AccessKeyId") is None:
+ data['AccessKeyId'] = os.environ.get("AWS_ACCESS_KEY_ID", None)
+ data['SecretAccessKey'] = os.environ.get("AWS_SECRET_ACCESS_KEY", None)
+ data['Token'] = os.environ.get("AWS_SESSION_TOKEN", None)
+ if data.get("AccessKeyId") is not None:
+ sys.stderr.write("Using environment variables for credentials\n")
+
+ if data.get("AccessKeyId") is None:
self.__get_role()
request = urllib2.Request(
urlparse.urljoin(self.meta_data_uri, self.iamrole)
- )
+ )
response = None
- try:
- response = urllib2.urlopen(request, None, 30)
- data = json.loads(response.read())
- except urllib2.URLError as e:
- if hasattr(e, 'reason'):
- raise Exception("URL error reason: %s" % e.reason)
- elif hasattr(e, 'code'):
- raise Exception("Server error code: %s" % e.code)
- finally:
- if response:
- response.close()
+ for i in range(0, RETRIES):
+ try:
+ response = urllib2.urlopen(request, None, 30)
+ data = json.loads(response.read())
+ break
+ except ssl.SSLError as e:
+ if 'timed out' in e.message:
+ time.sleep(wait_time(i + 1))
+ else:
+ raise e
+ except socket.timeout:
+ time.sleep(wait_time(i + 1))
+ except urllib2.URLError as e:
+ if hasattr(e, 'reason'):
+ raise Exception("URL error reason: %s" % e.reason)
+ elif hasattr(e, 'code'):
+ raise Exception("Server error code: %s" % e.code)
+ finally:
+ if response:
+ response.close()
+ else:
+ raise Exception("GetCredentials request timed out")
self.access_key = data['AccessKeyId']
+ if self.access_key is None or self.access_key == '':
+ raise Exception("AccessKeyId required")
+
self.secret_key = data['SecretAccessKey']
+ if self.secret_key is None or self.secret_key == '':
+ raise Exception("SecretAccessKey required")
+
self.token = data['Token']
- def sign(self, request, timeval=None):
- """
- Attach a valid S3 signature to request.
- request - instance of Request
- """
- date = time.strftime("%a, %d %b %Y %H:%M:%S GMT",
- timeval or time.gmtime())
- request.add_header('Date', date)
- host = request.get_host()
-
- # TODO: bucket name finding is ugly, I should find a way to support
- # both naming conventions: http://bucket.s3.amazonaws.com/ and
- # http://s3.amazonaws.com/bucket/
- try:
- pos = host.find(".s3")
- assert pos != -1
- bucket = host[:pos]
- except:
- raise Exception("Can't establish bucket name based on the hostname:\
- %s" % host)
-
- resource = "/%s%s" % (bucket, request.get_selector(), )
- amz_headers = 'x-amz-security-token:%s\n' % self.token
- sigstring = ("%(method)s\n\n\n%(date)s\n"
- "%(canon_amzn_headers)s%(canon_amzn_resource)s") % ({
- 'method': request.get_method(),
- 'date': request.headers.get('Date'),
- 'canon_amzn_headers': amz_headers,
- 'canon_amzn_resource': resource})
- digest = hmac.new(
- str(self.secret_key),
- str(sigstring),
- hashlib.sha1).digest()
- signature = digest.encode('base64').strip()
- return signature
-
- def urlopen(self, url, **kwargs):
- """urlopen(url) open the remote file and return a file object."""
+ def v4Sign(self, key, msg):
+ return hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest()
+
+ def getSignatureKey(self, dateStamp, serviceName):
+ kDate = self.v4Sign(('AWS4' + self.secret_key).encode('utf-8'), dateStamp)
+ kRegion = self.v4Sign(kDate, self.region)
+ kService = self.v4Sign(kRegion, serviceName)
+ kSigning = self.v4Sign(kService, 'aws4_request')
+ return kSigning
+
+ def uriopen(self, uri):
+ """uriopen(uri) open the remote file and return a file object."""
try:
- return urllib2.urlopen(self._request(url), None, 30)
+ return urllib2.urlopen(self._request(uri), None, 30)
except urllib2.HTTPError as e:
# HTTPError is a "file like object" similar to what
# urllib2.urlopen returns, so return it and let caller
# deal with the error code
+ if e.code == 400:
+ # token errors are buried in 400 messages so expose
+ xmlResponse = ET.fromstring(e.read())
+ if xmlResponse is not None:
+ e.msg = "{} - {}".format(e, xmlResponse.find("Message").text)
+ if e.code == 301:
+ e.msg = "{} - Set s3auth.conf region to match bucket 'Region': bucket may not be in {}".format(e, self.region)
+
return e
# For other errors, throw an exception directly
except urllib2.URLError as e:
@@ -164,18 +200,118 @@ class AWSCredentials(object):
except urllib2.socket.timeout:
raise Exception("Socket timeout")
- def _request(self, url):
- request = urllib2.Request(url)
- request.add_header('x-amz-security-token', self.token)
- signature = self.sign(request)
- request.add_header(
- 'Authorization', "AWS {0}:{1}".format(
- self.access_key,
- signature
- ).rstrip()
+ def _request(self, uri):
+ uri_parsed = urlparse.urlparse(uri)
+ if '.' in uri_parsed.netloc:
+ raise Exception("uri should not include fully qualified domain name for bucket")
+
+ # quote path for +, ~, and spaces
+ # see bugs.launchpad.net #1003633 and #1086997
+ scheme = 'https'
+ host = 's3.{}.amazonaws.com'.format(self.region)
+ bucket = uri_parsed.netloc
+ path = '/{}{}'.format(bucket, self._quote(uri_parsed.path, '+~ '))
+
+ s3url = urlparse.urlunparse(
+ (
+ scheme,
+ host,
+ path,
+ '',
+ '',
+ ''
+ )
)
+
+ request = urllib2.Request(s3url)
+
+ request.add_header('x-amz-content-sha256', self._payload_hash(request))
+
+ # Create a date for headers and the credential string
+ amzdate = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
+
+ request.add_header('x-amz-date', amzdate)
+
+ if self.token is not None and self.token != '':
+ request.add_header('x-amz-security-token', self.token)
+
+ canonical_request = self._canonical_request(request, host, amzdate)
+
+ authorization_header = self._authorization_header(canonical_request, amzdate)
+
+ request.add_header('Authorization', authorization_header)
+
return request
+ def _authorization_header(self, canonical_request, amzdate):
+ datestamp = amzdate.split('T')[0]
+
+ algorithm = 'AWS4-HMAC-SHA256'
+ credential_scope = datestamp + '/' + self.region + '/s3/aws4_request'
+
+ string_to_sign = algorithm + '\n' \
+ + amzdate + '\n' \
+ + credential_scope + '\n' \
+ + canonical_request
+
+ signing_key = self.getSignatureKey(datestamp, 's3')
+ signature = hmac.new(signing_key, string_to_sign.encode('utf-8'), hashlib.sha256).hexdigest()
+
+ authorization_header = "{} Credential={}/{}, SignedHeaders={}, Signature={}".format(
+ algorithm,
+ self.access_key,
+ credential_scope,
+ self._signed_headers(),
+ signature
+ )
+
+ return authorization_header
+
+ def _canonical_request(self, request, host, amzdate):
+
+ canonical_uri = urlparse.unquote(request.get_selector())
+ canonical_uri = self._quote(canonical_uri, '+')
+ canonical_querystring = ''
+
+ canonical_headers = 'host:' + host + '\n' \
+ + 'x-amz-content-sha256:' + self._payload_hash(request) + '\n' \
+ + 'x-amz-date:' + amzdate + '\n'
+
+ if self.token is not None and self.token != '':
+ canonical_headers += 'x-amz-security-token:' + self.token + '\n'
+
+ canonical_request = request.get_method() + '\n' \
+ + canonical_uri + '\n' \
+ + canonical_querystring + '\n' \
+ + canonical_headers + '\n' \
+ + self._signed_headers() + '\n' \
+ + self._payload_hash(request)
+
+ return hashlib.sha256(canonical_request).hexdigest()
+
+ def _signed_headers(self):
+ signed_headers = 'host;x-amz-content-sha256;x-amz-date'
+ if self.token is not None and self.token != '':
+ signed_headers += ';x-amz-security-token'
+ return signed_headers
+
+ def _payload_hash(self, request):
+ payload = request.get_data()
+ if payload is None:
+ payload = ''
+
+ return hashlib.sha256(payload).hexdigest()
+
+ # We need to be able to quote specific characters to support S3
+ # lookups, something urllib and friends don't do easily
+ def _quote(self, s, unsafe):
+ res = list(s)
+ for i in range(len(res)):
+ c = res[i]
+ if c in unsafe:
+ res[i] = '%%%02X' % ord(c)
+ return ''.join(res)
+
class APTMessage(object):
MESSAGE_CODES = {
@@ -217,8 +353,8 @@ class APTMessage(object):
class S3_method(object):
__eof = False
- def __init__(self):
- self.iam = AWSCredentials()
+ def __init__(self, config_file='/etc/apt/s3auth.conf'):
+ self.iam = AWSCredentials(config_file)
self.iam.get_credentials()
self.send_capabilities()
@@ -250,15 +386,19 @@ class S3_method(object):
return result
if line == '\n':
return result
- s = line.split(":", 1)
- result[s[0]] = s[1].strip()
+ (item, value) = line.split(":", 1)
+ if not result.get(item):
+ result[item] = []
+ result[item].append(value.strip())
+ return result
def send(self, code, headers):
message = APTMessage(code, headers)
sys.stdout.write(message.encode())
def send_capabilities(self):
- self.send(100, {'Version': '1.0', 'Single-Instance': 'true'})
+ self.send(100, {
+ 'Version': '1.1', 'Single-Instance': 'true', 'Send-Config': 'true'})
def send_status(self, headers):
self.send(102, headers)
@@ -278,7 +418,12 @@ class S3_method(object):
message = self._read_message()
if message is None:
return 0
- if message['_number'] == 600:
+ if message['_number'] == 601:
+ try:
+ self.configure(message)
+ except Exception, e:
+ self.fail(e.__class__.__name__ + ": " + str(e))
+ elif message['_number'] == 600:
try:
self.fetch(message)
except Exception, e:
@@ -286,6 +431,15 @@ class S3_method(object):
else:
return 100
+ # configure
+ def configure(self, message):
+ for item in message['Config-Item']:
+ if item.startswith('Acquire::http::Proxy'):
+ (key, value) = item.split('=',1)
+ if key == 'Acquire::http::Proxy':
+ os.environ['http_proxy'] = value
+ os.environ['https_proxy'] = value
+
# We need to be able to quote specific characters to support S3
# lookups, something urllib and friends don't do easily
def quote(self, s, unsafe):
@@ -297,48 +451,81 @@ class S3_method(object):
return ''.join(res)
def fetch(self, msg):
- self.uri = msg['URI']
- self.uri_parsed = urlparse.urlparse(self.uri)
- # quote path for +, ~, and spaces
- # see bugs.launchpad.net #1003633 and #1086997
- self.uri_updated = 'https://' + self.uri_parsed.netloc +\
- self.quote(self.uri_parsed.path, '+~ ')
- self.filename = msg['Filename']
+ self.uri = msg['URI'][0]
+
+ self.filename = msg['Filename'][0]
- response = self.iam.urlopen(self.uri_updated)
self.send_status({'URI': self.uri, 'Message': 'Waiting for headers'})
+ for i in range(0, RETRIES):
+ try:
+ response = self.iam.uriopen(self.uri)
+ except ssl.SSLError as e:
+ if 'timed out' in e.message:
+ time.sleep(wait_time(i + 1))
+ continue
+ else:
+ raise e
+ except socket.timeout:
+ time.sleep(wait_time(i + 1))
+ continue
- if response.code != 200:
- self.send_uri_failure({
- 'URI': self.uri,
- 'Message': str(response.code) + ' ' + response.msg,
- 'FailReason': 'HttpError' + str(response.code)})
- while True:
- data = response.read(4096)
- if not len(data):
- break
- response.close()
- return
+ self.send_status({'URI': self.uri, 'Message': 'Waiting for headers'})
- self.send_uri_start({
- 'URI': self.uri,
- 'Size': response.headers.getheader('content-length'),
- 'Last-Modified': response.headers.getheader('last-modified')})
+ if response.code != 200:
+ self.send_uri_failure({
+ 'URI': self.uri,
+ 'Message': str(response.code) + ' ' + response.msg,
+ 'FailReason': 'HttpError' + str(response.code)})
+ try:
+ while True:
+ data = response.read(4096)
+ if not len(data):
+ break
+ except ssl.SSLError as e:
+ if 'timed out' in e.message:
+ pass
+ else:
+ raise e
+ except socket.timeout:
+ pass
+ finally:
+ response.close()
- f = open(self.filename, "w")
- hash_sha256 = hashlib.sha256()
- hash_sha512 = hashlib.sha512()
- hash_md5 = hashlib.md5()
- while True:
- data = response.read(4096)
- if not len(data):
+ return
+
+ self.send_uri_start({
+ 'URI': self.uri,
+ 'Size': response.headers.getheader('content-length'),
+ 'Last-Modified': response.headers.getheader('last-modified')})
+
+ f = open(self.filename, "w")
+ hash_sha256 = hashlib.sha256()
+ hash_sha512 = hashlib.sha512()
+ hash_md5 = hashlib.md5()
+ try:
+ while True:
+ data = response.read(4096)
+ if not len(data):
+ break
+ hash_sha256.update(data)
+ hash_sha512.update(data)
+ hash_md5.update(data)
+ f.write(data)
break
- hash_sha256.update(data)
- hash_sha512.update(data)
- hash_md5.update(data)
- f.write(data)
- response.close()
- f.close()
+ except ssl.SSLError as e:
+ if 'timed out' in e.message:
+ time.sleep(wait_time(i + 1))
+ else:
+ raise e
+ except socket.timeout:
+ time.sleep(wait_time(i + 1))
+
+ finally:
+ response.close()
+ f.close()
+
+ else:
+ raise Exception("Fetch request timed out")
self.send_uri_done({
'URI': self.uri,
@@ -352,7 +539,10 @@ class S3_method(object):
if __name__ == '__main__':
try:
- method = S3_method()
+ config = '/etc/apt/s3auth.conf'
+ if len(sys.argv) == 2 and os.path.isfile(sys.argv[1]):
+ config = sys.argv[1]
+ method = S3_method(config)
ret = method.run()
sys.exit(ret)
except KeyboardInterrupt:
diff --git a/s3auth.conf b/s3auth.conf
new file mode 100644
index 0000000..21ec1ef
--- /dev/null
+++ b/s3auth.conf
@@ -0,0 +1 @@
+Region='eu-west-2'
\ No newline at end of file