diff --git a/README b/README deleted file mode 100644 index 79bc200..0000000 --- a/README +++ /dev/null @@ -1,58 +0,0 @@ -# apt-transport-s3 - -### Table of Contents -1. [License & Copyright](#license & copyright) -2. [Requirements](#requirements) -3. [Configuration](#configuration) -4. [Usage](#usage) -5. [Contribution](#contribution) - -## apt-transport-s3 -Allow to have a privately hosted apt repository on S3. Access keys are read from -`/etc/apt/s3auth.conf` file or IAM role if machine is hosted on AWS or has -access to AWS metadata server on 169.254.169.254. - -## License & Copyright - # Copyright (C) 2014 Bashton Ltd. - # - # This program is free software; you can redistribute it and/or modify - # it under the terms of the GNU General Public License as published by - # the Free Software Foundation; either version 2 of the License, or - # (at your option) any later version. - - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. - - # You should have received a copy of the GNU General Public License - # along with this program; if not, write to the Free Software - # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - - -## Requirements -### Additional package dependencies (except installed by default in Debian) -1. python-configobj - -## Configuration -/etc/apt/s3auth.conf or IAM role -can provide credentials required for using private apt repositories. - -Example of s3auth.conf file: -``` -AccessKeyId = myaccesskey -SecretAccessKey = mysecretaccesskey -Token = '' -``` - -Token should be empty string. - -## Usage -Install the .deb package from the releases page. The bucket repo should be -specified using an s3:// prefix, for example: - -`deb s3://aptbucketname.s3.amazonaws.com/repo/ trusty main contrib non-free` - -## Contribution -If you want to contribute a patch via PR please create it against development -branch. Patches via email are welcome as well. diff --git a/README.md b/README.md deleted file mode 120000 index 100b938..0000000 --- a/README.md +++ /dev/null @@ -1 +0,0 @@ -README \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..84b1f68 --- /dev/null +++ b/README.md @@ -0,0 +1,80 @@ +# apt-transport-s3 + +### Table of Contents +1. [License & Copyright](#license & copyright) +2. [Requirements](#requirements) +3. [Configuration](#configuration) +4. [Usage](#usage) +5. [Contribution](#contribution) + +## apt-transport-s3 +Allow to have a privately hosted apt repository on S3. Access keys are read from +`/etc/apt/s3auth.conf` file or IAM role if machine is hosted on AWS or has +access to AWS metadata server on 169.254.169.254. They are also taken from the +usual environment variables. + +## License & Copyright + # Copyright (C) 2014 Bashton Ltd. + # + # This program is free software; you can redistribute it and/or modify + # it under the terms of the GNU General Public License as published by + # the Free Software Foundation; either version 2 of the License, or + # (at your option) any later version. + + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + # GNU General Public License for more details. + + # You should have received a copy of the GNU General Public License + # along with this program; if not, write to the Free Software + # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + +## Requirements +### Additional package dependencies (except installed by default in Debian) +1. python-configobj + +## Configuration +/etc/apt/s3auth.conf or IAM role +can provide credentials required for using private apt repositories. + +NOTE: Region MUST match the region the buckets are stored in and if not defined defaults to us-east-1. + +Example of s3auth.conf file: +``` +AccessKeyId = myaccesskey +SecretAccessKey = mysecretaccesskey +Region = 'us-east-1' +``` + +## Usage +Install the .deb package from the releases page. The bucket repo should be +specified using an s3:// prefix, for example: + +`deb s3://aptbucketname/repo/ trusty main contrib non-free` + +if you need to use a proxy to connect to the internet you can specify this +as an APT configuration directive (for example in +/etc/apt/apt.conf.d/90apt-transport-s3) + +`Acquire::http::Proxy "http://myproxy:3128/";` + +## Testing +The module will run in interactive mode. It accepts on `stdin` and outputs on `stdout`. The messages it accepts on stdin +are in the following format and [documented here](http://www.fifi.org/doc/libapt-pkg-doc/method.html/index.html#abstract). + +``` +600 URI Acquire +URI:s3://my-s3-repository/project-a/dists/trusty/main/binary-amd64/Packages +Filename:Packages.downloaded +Fail-Ignore:true +Index-File:true + +``` + +This message will trigger an s3 get from the above bucket and key and save it to Filename. It needs a blank line after the message to trigger the processing by the s3 method. + +## Contribution +If you want to contribute a patch via PR please create it against development +branch. Patches via email are welcome as well. diff --git a/s3 b/s3 index d2e6649..da5f340 100755 --- a/s3 +++ b/s3 @@ -13,6 +13,8 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . +# +# https://github.com/BashtonLtd/apt-transport-s3 import urllib2 import urlparse @@ -22,9 +24,17 @@ import hmac import json import sys import os +import datetime +import xml.etree.ElementTree as ET +import socket +import ssl from configobj import ConfigObj -import syslog +RETRIES = 5 + + +def wait_time(c): + return pow(2, c) - 1 class AWSCredentials(object): @@ -32,7 +42,9 @@ class AWSCredentials(object): Class for dealing with IAM role credentials from meta-data server and later on to deal with boto/aws config provided keys """ - def __init__(self): + + def __init__(self, config_file=None): + self.conf_file = config_file host = 'http://169.254.169.254' path = '/latest/meta-data/iam/security-credentials/' self.meta_data_uri = urlparse.urljoin(host, path) @@ -42,18 +54,30 @@ class AWSCredentials(object): request = urllib2.Request(self.meta_data_uri) response = None - try: - response = urllib2.urlopen(request, None, 5) - self.iamrole = response.read() - except urllib2.URLError as e: - if hasattr(e, 'reason'): - raise Exception("URL error reason: %s, probable cause is that\ - you don't have IAM role on this machine" % e.reason) - elif hasattr(e, 'code'): - raise Exception("Server error code: %s" % e.code) - finally: - if response: - response.close() + + for i in range(0, RETRIES): + try: + response = urllib2.urlopen(request, None, 10) + self.iamrole = response.read() + break + except ssl.SSLError as e: + if 'timed out' in e.message: + time.sleep(wait_time(i + 1)) + else: + raise e + except socket.timeout: + time.sleep(wait_time(i + 1)) + except urllib2.URLError as e: + if hasattr(e, 'reason'): + raise Exception("URL error reason: %s, probable cause is that\ + you don't have IAM role on this machine" % e.reason) + elif hasattr(e, 'code'): + raise Exception("Server error code: %s" % e.code) + finally: + if response: + response.close() + else: + raise Exception("GetRole request timed out") def __load_config(self): """ @@ -63,15 +87,13 @@ class AWSCredentials(object): SecretAccessKey = mysecretkey Token = '' # this can/have to be empty """ - _CONF_FILE = '/etc/apt/s3auth.conf' # Checking if 'file' exists, if it does read it - if os.path.isfile(os.path.expanduser(_CONF_FILE)): - config = ConfigObj(os.path.expanduser(_CONF_FILE)) + if os.path.isfile(os.path.expanduser(self.conf_file)): + config = ConfigObj(os.path.expanduser(self.conf_file)) return config else: - raise Exception("Config file: %s doesn't exist" % _CONF_FILE) - syslog.syslog("Config file: %s doesn't exist" % _CONF_FILE) + raise Exception("Config file: %s doesn't exist" % self.conf_file) def get_credentials(self): """ @@ -79,81 +101,95 @@ class AWSCredentials(object): Note: This method should be explicitly called after constructing new object, as in 'explicit is better than implicit'. """ - data = None + data = {} try: data = self.__load_config() except: pass - if data is None: + self.region = data.get('Region', 'us-east-1') + if self.region is None or self.region == '': + raise Exception("Region required") + sys.stderr.write("Region: {}\n".format(self.region)) + + if data.get("AccessKeyId") is not None: + sys.stderr.write("Using config file for credentials\n") + + if data.get("AccessKeyId") is None: + data['AccessKeyId'] = os.environ.get("AWS_ACCESS_KEY_ID", None) + data['SecretAccessKey'] = os.environ.get("AWS_SECRET_ACCESS_KEY", None) + data['Token'] = os.environ.get("AWS_SESSION_TOKEN", None) + if data.get("AccessKeyId") is not None: + sys.stderr.write("Using environment variables for credentials\n") + + if data.get("AccessKeyId") is None: self.__get_role() request = urllib2.Request( urlparse.urljoin(self.meta_data_uri, self.iamrole) - ) + ) response = None - try: - response = urllib2.urlopen(request, None, 30) - data = json.loads(response.read()) - except urllib2.URLError as e: - if hasattr(e, 'reason'): - raise Exception("URL error reason: %s" % e.reason) - elif hasattr(e, 'code'): - raise Exception("Server error code: %s" % e.code) - finally: - if response: - response.close() + for i in range(0, RETRIES): + try: + response = urllib2.urlopen(request, None, 30) + data = json.loads(response.read()) + break + except ssl.SSLError as e: + if 'timed out' in e.message: + time.sleep(wait_time(i + 1)) + else: + raise e + except socket.timeout: + time.sleep(wait_time(i + 1)) + except urllib2.URLError as e: + if hasattr(e, 'reason'): + raise Exception("URL error reason: %s" % e.reason) + elif hasattr(e, 'code'): + raise Exception("Server error code: %s" % e.code) + finally: + if response: + response.close() + else: + raise Exception("GetCredentials request timed out") self.access_key = data['AccessKeyId'] + if self.access_key is None or self.access_key == '': + raise Exception("AccessKeyId required") + self.secret_key = data['SecretAccessKey'] + if self.secret_key is None or self.secret_key == '': + raise Exception("SecretAccessKey required") + self.token = data['Token'] - def sign(self, request, timeval=None): - """ - Attach a valid S3 signature to request. - request - instance of Request - """ - date = time.strftime("%a, %d %b %Y %H:%M:%S GMT", - timeval or time.gmtime()) - request.add_header('Date', date) - host = request.get_host() - - # TODO: bucket name finding is ugly, I should find a way to support - # both naming conventions: http://bucket.s3.amazonaws.com/ and - # http://s3.amazonaws.com/bucket/ - try: - pos = host.find(".s3") - assert pos != -1 - bucket = host[:pos] - except: - raise Exception("Can't establish bucket name based on the hostname:\ - %s" % host) - - resource = "/%s%s" % (bucket, request.get_selector(), ) - amz_headers = 'x-amz-security-token:%s\n' % self.token - sigstring = ("%(method)s\n\n\n%(date)s\n" - "%(canon_amzn_headers)s%(canon_amzn_resource)s") % ({ - 'method': request.get_method(), - 'date': request.headers.get('Date'), - 'canon_amzn_headers': amz_headers, - 'canon_amzn_resource': resource}) - digest = hmac.new( - str(self.secret_key), - str(sigstring), - hashlib.sha1).digest() - signature = digest.encode('base64').strip() - return signature - - def urlopen(self, url, **kwargs): - """urlopen(url) open the remote file and return a file object.""" + def v4Sign(self, key, msg): + return hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest() + + def getSignatureKey(self, dateStamp, serviceName): + kDate = self.v4Sign(('AWS4' + self.secret_key).encode('utf-8'), dateStamp) + kRegion = self.v4Sign(kDate, self.region) + kService = self.v4Sign(kRegion, serviceName) + kSigning = self.v4Sign(kService, 'aws4_request') + return kSigning + + def uriopen(self, uri): + """uriopen(uri) open the remote file and return a file object.""" try: - return urllib2.urlopen(self._request(url), None, 30) + return urllib2.urlopen(self._request(uri), None, 30) except urllib2.HTTPError as e: # HTTPError is a "file like object" similar to what # urllib2.urlopen returns, so return it and let caller # deal with the error code + if e.code == 400: + # token errors are buried in 400 messages so expose + xmlResponse = ET.fromstring(e.read()) + if xmlResponse is not None: + e.msg = "{} - {}".format(e, xmlResponse.find("Message").text) + if e.code == 301: + e.msg = "{} - Set s3auth.conf region to match bucket 'Region': bucket may not be in {}".format(e, self.region) + return e # For other errors, throw an exception directly except urllib2.URLError as e: @@ -164,18 +200,118 @@ class AWSCredentials(object): except urllib2.socket.timeout: raise Exception("Socket timeout") - def _request(self, url): - request = urllib2.Request(url) - request.add_header('x-amz-security-token', self.token) - signature = self.sign(request) - request.add_header( - 'Authorization', "AWS {0}:{1}".format( - self.access_key, - signature - ).rstrip() + def _request(self, uri): + uri_parsed = urlparse.urlparse(uri) + if '.' in uri_parsed.netloc: + raise Exception("uri should not include fully qualified domain name for bucket") + + # quote path for +, ~, and spaces + # see bugs.launchpad.net #1003633 and #1086997 + scheme = 'https' + host = 's3.{}.amazonaws.com'.format(self.region) + bucket = uri_parsed.netloc + path = '/{}{}'.format(bucket, self._quote(uri_parsed.path, '+~ ')) + + s3url = urlparse.urlunparse( + ( + scheme, + host, + path, + '', + '', + '' + ) ) + + request = urllib2.Request(s3url) + + request.add_header('x-amz-content-sha256', self._payload_hash(request)) + + # Create a date for headers and the credential string + amzdate = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') + + request.add_header('x-amz-date', amzdate) + + if self.token is not None and self.token != '': + request.add_header('x-amz-security-token', self.token) + + canonical_request = self._canonical_request(request, host, amzdate) + + authorization_header = self._authorization_header(canonical_request, amzdate) + + request.add_header('Authorization', authorization_header) + return request + def _authorization_header(self, canonical_request, amzdate): + datestamp = amzdate.split('T')[0] + + algorithm = 'AWS4-HMAC-SHA256' + credential_scope = datestamp + '/' + self.region + '/s3/aws4_request' + + string_to_sign = algorithm + '\n' \ + + amzdate + '\n' \ + + credential_scope + '\n' \ + + canonical_request + + signing_key = self.getSignatureKey(datestamp, 's3') + signature = hmac.new(signing_key, string_to_sign.encode('utf-8'), hashlib.sha256).hexdigest() + + authorization_header = "{} Credential={}/{}, SignedHeaders={}, Signature={}".format( + algorithm, + self.access_key, + credential_scope, + self._signed_headers(), + signature + ) + + return authorization_header + + def _canonical_request(self, request, host, amzdate): + + canonical_uri = urlparse.unquote(request.get_selector()) + canonical_uri = self._quote(canonical_uri, '+') + canonical_querystring = '' + + canonical_headers = 'host:' + host + '\n' \ + + 'x-amz-content-sha256:' + self._payload_hash(request) + '\n' \ + + 'x-amz-date:' + amzdate + '\n' + + if self.token is not None and self.token != '': + canonical_headers += 'x-amz-security-token:' + self.token + '\n' + + canonical_request = request.get_method() + '\n' \ + + canonical_uri + '\n' \ + + canonical_querystring + '\n' \ + + canonical_headers + '\n' \ + + self._signed_headers() + '\n' \ + + self._payload_hash(request) + + return hashlib.sha256(canonical_request).hexdigest() + + def _signed_headers(self): + signed_headers = 'host;x-amz-content-sha256;x-amz-date' + if self.token is not None and self.token != '': + signed_headers += ';x-amz-security-token' + return signed_headers + + def _payload_hash(self, request): + payload = request.get_data() + if payload is None: + payload = '' + + return hashlib.sha256(payload).hexdigest() + + # We need to be able to quote specific characters to support S3 + # lookups, something urllib and friends don't do easily + def _quote(self, s, unsafe): + res = list(s) + for i in range(len(res)): + c = res[i] + if c in unsafe: + res[i] = '%%%02X' % ord(c) + return ''.join(res) + class APTMessage(object): MESSAGE_CODES = { @@ -217,8 +353,8 @@ class APTMessage(object): class S3_method(object): __eof = False - def __init__(self): - self.iam = AWSCredentials() + def __init__(self, config_file='/etc/apt/s3auth.conf'): + self.iam = AWSCredentials(config_file) self.iam.get_credentials() self.send_capabilities() @@ -250,15 +386,19 @@ class S3_method(object): return result if line == '\n': return result - s = line.split(":", 1) - result[s[0]] = s[1].strip() + (item, value) = line.split(":", 1) + if not result.get(item): + result[item] = [] + result[item].append(value.strip()) + return result def send(self, code, headers): message = APTMessage(code, headers) sys.stdout.write(message.encode()) def send_capabilities(self): - self.send(100, {'Version': '1.0', 'Single-Instance': 'true'}) + self.send(100, { + 'Version': '1.1', 'Single-Instance': 'true', 'Send-Config': 'true'}) def send_status(self, headers): self.send(102, headers) @@ -278,7 +418,12 @@ class S3_method(object): message = self._read_message() if message is None: return 0 - if message['_number'] == 600: + if message['_number'] == 601: + try: + self.configure(message) + except Exception, e: + self.fail(e.__class__.__name__ + ": " + str(e)) + elif message['_number'] == 600: try: self.fetch(message) except Exception, e: @@ -286,6 +431,15 @@ class S3_method(object): else: return 100 + # configure + def configure(self, message): + for item in message['Config-Item']: + if item.startswith('Acquire::http::Proxy'): + (key, value) = item.split('=',1) + if key == 'Acquire::http::Proxy': + os.environ['http_proxy'] = value + os.environ['https_proxy'] = value + # We need to be able to quote specific characters to support S3 # lookups, something urllib and friends don't do easily def quote(self, s, unsafe): @@ -297,48 +451,81 @@ class S3_method(object): return ''.join(res) def fetch(self, msg): - self.uri = msg['URI'] - self.uri_parsed = urlparse.urlparse(self.uri) - # quote path for +, ~, and spaces - # see bugs.launchpad.net #1003633 and #1086997 - self.uri_updated = 'https://' + self.uri_parsed.netloc +\ - self.quote(self.uri_parsed.path, '+~ ') - self.filename = msg['Filename'] + self.uri = msg['URI'][0] + + self.filename = msg['Filename'][0] - response = self.iam.urlopen(self.uri_updated) self.send_status({'URI': self.uri, 'Message': 'Waiting for headers'}) + for i in range(0, RETRIES): + try: + response = self.iam.uriopen(self.uri) + except ssl.SSLError as e: + if 'timed out' in e.message: + time.sleep(wait_time(i + 1)) + continue + else: + raise e + except socket.timeout: + time.sleep(wait_time(i + 1)) + continue - if response.code != 200: - self.send_uri_failure({ - 'URI': self.uri, - 'Message': str(response.code) + ' ' + response.msg, - 'FailReason': 'HttpError' + str(response.code)}) - while True: - data = response.read(4096) - if not len(data): - break - response.close() - return + self.send_status({'URI': self.uri, 'Message': 'Waiting for headers'}) - self.send_uri_start({ - 'URI': self.uri, - 'Size': response.headers.getheader('content-length'), - 'Last-Modified': response.headers.getheader('last-modified')}) + if response.code != 200: + self.send_uri_failure({ + 'URI': self.uri, + 'Message': str(response.code) + ' ' + response.msg, + 'FailReason': 'HttpError' + str(response.code)}) + try: + while True: + data = response.read(4096) + if not len(data): + break + except ssl.SSLError as e: + if 'timed out' in e.message: + pass + else: + raise e + except socket.timeout: + pass + finally: + response.close() - f = open(self.filename, "w") - hash_sha256 = hashlib.sha256() - hash_sha512 = hashlib.sha512() - hash_md5 = hashlib.md5() - while True: - data = response.read(4096) - if not len(data): + return + + self.send_uri_start({ + 'URI': self.uri, + 'Size': response.headers.getheader('content-length'), + 'Last-Modified': response.headers.getheader('last-modified')}) + + f = open(self.filename, "w") + hash_sha256 = hashlib.sha256() + hash_sha512 = hashlib.sha512() + hash_md5 = hashlib.md5() + try: + while True: + data = response.read(4096) + if not len(data): + break + hash_sha256.update(data) + hash_sha512.update(data) + hash_md5.update(data) + f.write(data) break - hash_sha256.update(data) - hash_sha512.update(data) - hash_md5.update(data) - f.write(data) - response.close() - f.close() + except ssl.SSLError as e: + if 'timed out' in e.message: + time.sleep(wait_time(i + 1)) + else: + raise e + except socket.timeout: + time.sleep(wait_time(i + 1)) + + finally: + response.close() + f.close() + + else: + raise Exception("Fetch request timed out") self.send_uri_done({ 'URI': self.uri, @@ -352,7 +539,10 @@ class S3_method(object): if __name__ == '__main__': try: - method = S3_method() + config = '/etc/apt/s3auth.conf' + if len(sys.argv) == 2 and os.path.isfile(sys.argv[1]): + config = sys.argv[1] + method = S3_method(config) ret = method.run() sys.exit(ret) except KeyboardInterrupt: diff --git a/s3auth.conf b/s3auth.conf new file mode 100644 index 0000000..21ec1ef --- /dev/null +++ b/s3auth.conf @@ -0,0 +1 @@ +Region='eu-west-2' \ No newline at end of file