Skip to content
Fetching contributors…
Cannot retrieve contributors at this time
1598 lines (1387 sloc) 50.8 KB
# Copyright (C) 2012-2016 The Python Software Foundation.
import codecs
from collections import deque
import contextlib
import csv
from glob import iglob as std_iglob
import io
import json
import logging
import os
import py_compile
import re
import shutil
import socket
import ssl
import subprocess
import sys
import tarfile
import tempfile
import textwrap
import threading
except ImportError:
import dummy_threading as threading
import time
from . import DistlibException
from .compat import (string_types, text_type, shutil, raw_input, StringIO,
cache_from_source, urlopen, urljoin, httplib, xmlrpclib,
splittype, HTTPHandler, HTTPSHandler as BaseHTTPSHandler,
BaseConfigurator, valid_ident, Container, configparser,
URLError, match_hostname, CertificateError, ZipFile,
logger = logging.getLogger(__name__)
# Requirement parsing code for name + optional constraints + optional extras
# e.g. 'foo >= 1.2, < 2.0 [bar, baz]'
# The regex can seem a bit hairy, so we build it up out of smaller pieces
# which are manageable.
COMMA = r'\s*,\s*'
COMMA_RE = re.compile(COMMA)
IDENT = r'(\w|[.-])+'
EXTRA_IDENT = r'(\*|:(\*|\w+):|' + IDENT + ')'
VERSPEC = IDENT + r'\*?'
RELOP = '([<>=!~]=)|[<>]'
# The first relop is optional - if absent, will be taken as '~='
BARE_CONSTRAINTS = ('(' + RELOP + r')?\s*(' + VERSPEC + ')(' + COMMA + '(' +
RELOP + r')\s*(' + VERSPEC + '))*')
DIRECT_REF = '(from\s+(?P<diref>.*))'
# Either the bare constraints or the bare constraints in parentheses
r')\s*\)|(?P<c2>' + BARE_CONSTRAINTS + '\s*)')
EXTRAS = r'\[\s*(?P<ex>' + EXTRA_LIST + r')?\s*\]'
REQUIREMENT = ('(?P<dn>' + IDENT + r')\s*(' + EXTRAS + r'\s*)?(\s*' +
# Used to scan through the constraints
RELOP_IDENT = '(?P<op>' + RELOP + r')\s*(?P<vn>' + VERSPEC + ')'
def parse_requirement(s):
def get_constraint(m):
d = m.groupdict()
return d['op'], d['vn']
result = None
m = REQUIREMENT_RE.match(s)
if m:
d = m.groupdict()
name = d['dn']
cons = d['c1'] or d['c2']
if not d['diref']:
url = None
# direct reference
cons = None
url = d['diref'].strip()
if not cons:
cons = None
constr = ''
rs = d['dn']
if cons[0] not in '<>!=':
cons = '~=' + cons
iterator = RELOP_IDENT_RE.finditer(cons)
cons = [get_constraint(m) for m in iterator]
rs = '%s (%s)' % (name, ', '.join(['%s %s' % con for con in cons]))
if not d['ex']:
extras = None
extras = COMMA_RE.split(d['ex'])
result = Container(name=name, constraints=cons, extras=extras,
requirement=rs, source=s, url=url)
return result
def get_resources_dests(resources_root, rules):
"""Find destinations for resources files"""
def get_rel_path(base, path):
# normalizes and returns a lstripped-/-separated path
base = base.replace(os.path.sep, '/')
path = path.replace(os.path.sep, '/')
assert path.startswith(base)
return path[len(base):].lstrip('/')
destinations = {}
for base, suffix, dest in rules:
prefix = os.path.join(resources_root, base)
for abs_base in iglob(prefix):
abs_glob = os.path.join(abs_base, suffix)
for abs_path in iglob(abs_glob):
resource_file = get_rel_path(resources_root, abs_path)
if dest is None: # remove the entry if it was here
destinations.pop(resource_file, None)
rel_path = get_rel_path(abs_base, abs_path)
rel_dest = dest.replace(os.path.sep, '/').rstrip('/')
destinations[resource_file] = rel_dest + '/' + rel_path
return destinations
def in_venv():
if hasattr(sys, 'real_prefix'):
# virtualenv venvs
result = True
# PEP 405 venvs
result = sys.prefix != getattr(sys, 'base_prefix', sys.prefix)
return result
def get_executable():
# The __PYVENV_LAUNCHER__ dance is apparently no longer needed, as
# changes to the stub launcher mean that sys.executable always points
# to the stub on OS X
# if sys.platform == 'darwin' and ('__PYVENV_LAUNCHER__'
# in os.environ):
# result = os.environ['__PYVENV_LAUNCHER__']
# else:
# result = sys.executable
# return result
result = os.path.normcase(sys.executable)
if not isinstance(result, text_type):
result = fsdecode(result)
return result
def proceed(prompt, allowed_chars, error_prompt=None, default=None):
p = prompt
while True:
s = raw_input(p)
p = prompt
if not s and default:
s = default
if s:
c = s[0].lower()
if c in allowed_chars:
if error_prompt:
p = '%c: %s\n%s' % (c, error_prompt, prompt)
return c
def extract_by_key(d, keys):
if isinstance(keys, string_types):
keys = keys.split()
result = {}
for key in keys:
if key in d:
result[key] = d[key]
return result
def read_exports(stream):
if sys.version_info[0] >= 3:
# needs to be a text stream
stream = codecs.getreader('utf-8')(stream)
# Try to load as JSON, falling back on legacy format
data =
stream = StringIO(data)
jdata = json.load(stream)
result = jdata['extensions']['python.exports']['exports']
for group, entries in result.items():
for k, v in entries.items():
s = '%s = %s' % (k, v)
entry = get_export_entry(s)
assert entry is not None
entries[k] = entry
return result
except Exception:, 0)
def read_stream(cp, stream):
if hasattr(cp, 'read_file'):
cp = configparser.ConfigParser()
read_stream(cp, stream)
except configparser.MissingSectionHeaderError:
data = textwrap.dedent(data)
stream = StringIO(data)
read_stream(cp, stream)
result = {}
for key in cp.sections():
result[key] = entries = {}
for name, value in cp.items(key):
s = '%s = %s' % (name, value)
entry = get_export_entry(s)
assert entry is not None
#entry.dist = self
entries[name] = entry
return result
def write_exports(exports, stream):
if sys.version_info[0] >= 3:
# needs to be a text stream
stream = codecs.getwriter('utf-8')(stream)
cp = configparser.ConfigParser()
for k, v in exports.items():
# TODO check k, v for valid values
for entry in v.values():
if entry.suffix is None:
s = entry.prefix
s = '%s:%s' % (entry.prefix, entry.suffix)
if entry.flags:
s = '%s [%s]' % (s, ', '.join(entry.flags))
cp.set(k,, s)
def tempdir():
td = tempfile.mkdtemp()
yield td
def chdir(d):
cwd = os.getcwd()
def socket_timeout(seconds=15):
cto = socket.getdefaulttimeout()
class cached_property(object):
def __init__(self, func):
self.func = func
#for attr in ('__name__', '__module__', '__doc__'):
# setattr(self, attr, getattr(func, attr, None))
def __get__(self, obj, cls=None):
if obj is None:
return self
value = self.func(obj)
object.__setattr__(obj, self.func.__name__, value)
#obj.__dict__[self.func.__name__] = value = self.func(obj)
return value
def convert_path(pathname):
"""Return 'pathname' as a name that will work on the native filesystem.
The path is split on '/' and put back together again using the current
directory separator. Needed because filenames in the setup script are
always supplied in Unix style, and have to be converted to the local
convention before we can actually use them in the filesystem. Raises
ValueError on non-Unix-ish systems if 'pathname' either starts or
ends with a slash.
if os.sep == '/':
return pathname
if not pathname:
return pathname
if pathname[0] == '/':
raise ValueError("path '%s' cannot be absolute" % pathname)
if pathname[-1] == '/':
raise ValueError("path '%s' cannot end with '/'" % pathname)
paths = pathname.split('/')
while os.curdir in paths:
if not paths:
return os.curdir
return os.path.join(*paths)
class FileOperator(object):
def __init__(self, dry_run=False):
self.dry_run = dry_run
self.ensured = set()
def _init_record(self):
self.record = False
self.files_written = set()
self.dirs_created = set()
def record_as_written(self, path):
if self.record:
def newer(self, source, target):
"""Tell if the target is newer than the source.
Returns true if 'source' exists and is more recently modified than
'target', or if 'source' exists and 'target' doesn't.
Returns false if both exist and 'target' is the same age or younger
than 'source'. Raise PackagingFileError if 'source' does not exist.
Note that this test is not very accurate: files created in the same
second will have the same "age".
if not os.path.exists(source):
raise DistlibException("file '%r' does not exist" %
if not os.path.exists(target):
return True
return os.stat(source).st_mtime > os.stat(target).st_mtime
def copy_file(self, infile, outfile, check=True):
"""Copy a file respecting dry-run and force flags.
self.ensure_dir(os.path.dirname(outfile))'Copying %s to %s', infile, outfile)
if not self.dry_run:
msg = None
if check:
if os.path.islink(outfile):
msg = '%s is a symlink' % outfile
elif os.path.exists(outfile) and not os.path.isfile(outfile):
msg = '%s is a non-regular file' % outfile
if msg:
raise ValueError(msg + ' which would be overwritten')
shutil.copyfile(infile, outfile)
def copy_stream(self, instream, outfile, encoding=None):
assert not os.path.isdir(outfile)
self.ensure_dir(os.path.dirname(outfile))'Copying stream %s to %s', instream, outfile)
if not self.dry_run:
if encoding is None:
outstream = open(outfile, 'wb')
outstream =, 'w', encoding=encoding)
shutil.copyfileobj(instream, outstream)
def write_binary_file(self, path, data):
if not self.dry_run:
with open(path, 'wb') as f:
def write_text_file(self, path, data, encoding):
if not self.dry_run:
with open(path, 'wb') as f:
def set_mode(self, bits, mask, files):
if == 'posix' or ( == 'java' and os._name == 'posix'):
# Set the executable bits (owner, group, and world) on
# all the files specified.
for f in files:
if self.dry_run:"changing mode of %s", f)
mode = (os.stat(f).st_mode | bits) & mask"changing mode of %s to %o", f, mode)
os.chmod(f, mode)
set_executable_mode = lambda s, f: s.set_mode(0o555, 0o7777, f)
def ensure_dir(self, path):
path = os.path.abspath(path)
if path not in self.ensured and not os.path.exists(path):
d, f = os.path.split(path)
self.ensure_dir(d)'Creating %s' % path)
if not self.dry_run:
if self.record:
def byte_compile(self, path, optimize=False, force=False, prefix=None):
dpath = cache_from_source(path, not optimize)'Byte-compiling %s to %s', path, dpath)
if not self.dry_run:
if force or self.newer(path, dpath):
if not prefix:
diagpath = None
assert path.startswith(prefix)
diagpath = path[len(prefix):]
py_compile.compile(path, dpath, diagpath, True) # raise error
return dpath
def ensure_removed(self, path):
if os.path.exists(path):
if os.path.isdir(path) and not os.path.islink(path):
logger.debug('Removing directory tree at %s', path)
if not self.dry_run:
if self.record:
if path in self.dirs_created:
if os.path.islink(path):
s = 'link'
s = 'file'
logger.debug('Removing %s %s', s, path)
if not self.dry_run:
if self.record:
if path in self.files_written:
def is_writable(self, path):
result = False
while not result:
if os.path.exists(path):
result = os.access(path, os.W_OK)
parent = os.path.dirname(path)
if parent == path:
path = parent
return result
def commit(self):
Commit recorded changes, turn off recording, return
assert self.record
result = self.files_written, self.dirs_created
return result
def rollback(self):
if not self.dry_run:
for f in list(self.files_written):
if os.path.exists(f):
# dirs should all be empty now, except perhaps for
# __pycache__ subdirs
# reverse so that subdirs appear before their parents
dirs = sorted(self.dirs_created, reverse=True)
for d in dirs:
flist = os.listdir(d)
if flist:
assert flist == ['__pycache__']
sd = os.path.join(d, flist[0])
os.rmdir(d) # should fail if non-empty
def resolve(module_name, dotted_path):
if module_name in sys.modules:
mod = sys.modules[module_name]
mod = __import__(module_name)
if dotted_path is None:
result = mod
parts = dotted_path.split('.')
result = getattr(mod, parts.pop(0))
for p in parts:
result = getattr(result, p)
return result
class ExportEntry(object):
def __init__(self, name, prefix, suffix, flags): = name
self.prefix = prefix
self.suffix = suffix
self.flags = flags
def value(self):
return resolve(self.prefix, self.suffix)
def __repr__(self):
return '<ExportEntry %s = %s:%s %s>' % (, self.prefix,
self.suffix, self.flags)
def __eq__(self, other):
if not isinstance(other, ExportEntry):
result = False
result = ( == and
self.prefix == other.prefix and
self.suffix == other.suffix and
self.flags == other.flags)
return result
__hash__ = object.__hash__
ENTRY_RE = re.compile(r'''(?P<name>(\w|[-.+])+)
''', re.VERBOSE)
def get_export_entry(specification):
m =
if not m:
result = None
if '[' in specification or ']' in specification:
raise DistlibException('Invalid specification '
'%r' % specification)
d = m.groupdict()
name = d['name']
path = d['callable']
colons = path.count(':')
if colons == 0:
prefix, suffix = path, None
if colons != 1:
raise DistlibException('Invalid specification '
'%r' % specification)
prefix, suffix = path.split(':')
flags = d['flags']
if flags is None:
if '[' in specification or ']' in specification:
raise DistlibException('Invalid specification '
'%r' % specification)
flags = []
flags = [f.strip() for f in flags.split(',')]
result = ExportEntry(name, prefix, suffix, flags)
return result
def get_cache_base(suffix=None):
Return the default base location for distlib caches. If the directory does
not exist, it is created. Use the suffix provided for the base directory,
and default to '.distlib' if it isn't provided.
On Windows, if LOCALAPPDATA is defined in the environment, then it is
assumed to be a directory, and will be the parent directory of the result.
On POSIX, and on Windows if LOCALAPPDATA is not defined, the user's home
directory - using os.expanduser('~') - will be the parent directory of
the result.
The result is just the directory '.distlib' in the parent directory as
determined above, or with the name specified with ``suffix``.
if suffix is None:
suffix = '.distlib'
if == 'nt' and 'LOCALAPPDATA' in os.environ:
result = os.path.expandvars('$localappdata')
# Assume posix, or old Windows
result = os.path.expanduser('~')
# we use 'isdir' instead of 'exists', because we want to
# fail if there's a file with that name
if os.path.isdir(result):
usable = os.access(result, os.W_OK)
if not usable:
logger.warning('Directory exists but is not writable: %s', result)
usable = True
except OSError:
logger.warning('Unable to create %s', result, exc_info=True)
usable = False
if not usable:
result = tempfile.mkdtemp()
logger.warning('Default location unusable, using %s', result)
return os.path.join(result, suffix)
def path_to_cache_dir(path):
Convert an absolute path to a directory name for use in a cache.
The algorithm used is:
#. On Windows, any ``':'`` in the drive is replaced with ``'---'``.
#. Any occurrence of ``os.sep`` is replaced with ``'--'``.
#. ``'.cache'`` is appended.
d, p = os.path.splitdrive(os.path.abspath(path))
if d:
d = d.replace(':', '---')
p = p.replace(os.sep, '--')
return d + p + '.cache'
def ensure_slash(s):
if not s.endswith('/'):
return s + '/'
return s
def parse_credentials(netloc):
username = password = None
if '@' in netloc:
prefix, netloc = netloc.split('@', 1)
if ':' not in prefix:
username = prefix
username, password = prefix.split(':', 1)
return username, password, netloc
def get_process_umask():
result = os.umask(0o22)
return result
def is_string_sequence(seq):
result = True
i = None
for i, s in enumerate(seq):
if not isinstance(s, string_types):
result = False
assert i is not None
return result
PROJECT_NAME_AND_VERSION = re.compile('([a-z0-9_]+([.-][a-z_][a-z0-9_]*)*)-'
'([a-z0-9_.+-]+)', re.I)
PYTHON_VERSION = re.compile(r'-py(\d\.?\d?)')
def split_filename(filename, project_name=None):
Extract name, version, python version from a filename (no extension)
Return name, version, pyver or None
result = None
pyver = None
m =
if m:
pyver =
filename = filename[:m.start()]
if project_name and len(filename) > len(project_name) + 1:
m = re.match(re.escape(project_name) + r'\b', filename)
if m:
n = m.end()
result = filename[:n], filename[n + 1:], pyver
if result is None:
m = PROJECT_NAME_AND_VERSION.match(filename)
if m:
result =,, pyver
return result
# Allow spaces in name because of legacy dists like "Twisted Core"
NAME_VERSION_RE = re.compile(r'(?P<name>[\w .-]+)\s*'
def parse_name_and_version(p):
A utility method used to get name and version from a string.
From e.g. a Provides-Dist value.
:param p: A value in a form 'foo (1.0)'
:return: The name and version as a tuple.
m = NAME_VERSION_RE.match(p)
if not m:
raise DistlibException('Ill-formed name/version string: \'%s\'' % p)
d = m.groupdict()
return d['name'].strip().lower(), d['ver']
def get_extras(requested, available):
result = set()
requested = set(requested or [])
available = set(available or [])
if '*' in requested:
result |= available
for r in requested:
if r == '-':
elif r.startswith('-'):
unwanted = r[1:]
if unwanted not in available:
logger.warning('undeclared extra: %s' % unwanted)
if unwanted in result:
if r not in available:
logger.warning('undeclared extra: %s' % r)
return result
# Extended metadata functionality
def _get_external_data(url):
result = {}
# urlopen might fail if it runs into redirections,
# because of Python issue #13696. Fixed in locators
# using a custom redirect handler.
resp = urlopen(url)
headers =
ct = headers.get('Content-Type')
if not ct.startswith('application/json'):
logger.debug('Unexpected response for JSON request: %s', ct)
reader = codecs.getreader('utf-8')(resp)
#data ='utf-8')
#result = json.loads(data)
result = json.load(reader)
except Exception as e:
logger.exception('Failed to get external data for %s: %s', url, e)
return result
_external_data_base_url = ''
def get_project_data(name):
url = '%s/%s/project.json' % (name[0].upper(), name)
url = urljoin(_external_data_base_url, url)
result = _get_external_data(url)
return result
def get_package_data(name, version):
url = '%s/%s/package-%s.json' % (name[0].upper(), name, version)
url = urljoin(_external_data_base_url, url)
return _get_external_data(url)
class Cache(object):
A class implementing a cache for resources that need to live in the file system
e.g. shared libraries. This class was moved from resources to here because it
could be used by other modules, e.g. the wheel module.
def __init__(self, base):
Initialise an instance.
:param base: The base directory where the cache should be located.
# we use 'isdir' instead of 'exists', because we want to
# fail if there's a file with that name
if not os.path.isdir(base):
if (os.stat(base).st_mode & 0o77) != 0:
logger.warning('Directory \'%s\' is not private', base)
self.base = os.path.abspath(os.path.normpath(base))
def prefix_to_dir(self, prefix):
Converts a resource prefix to a directory name in the cache.
return path_to_cache_dir(prefix)
def clear(self):
Clear the cache.
not_removed = []
for fn in os.listdir(self.base):
fn = os.path.join(self.base, fn)
if os.path.islink(fn) or os.path.isfile(fn):
elif os.path.isdir(fn):
except Exception:
return not_removed
class EventMixin(object):
A very simple publish/subscribe system.
def __init__(self):
self._subscribers = {}
def add(self, event, subscriber, append=True):
Add a subscriber for an event.
:param event: The name of an event.
:param subscriber: The subscriber to be added (and called when the
event is published).
:param append: Whether to append or prepend the subscriber to an
existing subscriber list for the event.
subs = self._subscribers
if event not in subs:
subs[event] = deque([subscriber])
sq = subs[event]
if append:
def remove(self, event, subscriber):
Remove a subscriber for an event.
:param event: The name of an event.
:param subscriber: The subscriber to be removed.
subs = self._subscribers
if event not in subs:
raise ValueError('No subscribers: %r' % event)
def get_subscribers(self, event):
Return an iterator for the subscribers for an event.
:param event: The event to return subscribers for.
return iter(self._subscribers.get(event, ()))
def publish(self, event, *args, **kwargs):
Publish a event and return a list of values returned by its
:param event: The event to publish.
:param args: The positional arguments to pass to the event's
:param kwargs: The keyword arguments to pass to the event's
result = []
for subscriber in self.get_subscribers(event):
value = subscriber(event, *args, **kwargs)
except Exception:
logger.exception('Exception during event publication')
value = None
logger.debug('publish %s: args = %s, kwargs = %s, result = %s',
event, args, kwargs, result)
return result
# Simple sequencing
class Sequencer(object):
def __init__(self):
self._preds = {}
self._succs = {}
self._nodes = set() # nodes with no preds/succs
def add_node(self, node):
def remove_node(self, node, edges=False):
if node in self._nodes:
if edges:
for p in set(self._preds.get(node, ())):
self.remove(p, node)
for s in set(self._succs.get(node, ())):
self.remove(node, s)
# Remove empties
for k, v in list(self._preds.items()):
if not v:
del self._preds[k]
for k, v in list(self._succs.items()):
if not v:
del self._succs[k]
def add(self, pred, succ):
assert pred != succ
self._preds.setdefault(succ, set()).add(pred)
self._succs.setdefault(pred, set()).add(succ)
def remove(self, pred, succ):
assert pred != succ
preds = self._preds[succ]
succs = self._succs[pred]
except KeyError:
raise ValueError('%r not a successor of anything' % succ)
except KeyError:
raise ValueError('%r not a successor of %r' % (succ, pred))
def is_step(self, step):
return (step in self._preds or step in self._succs or
step in self._nodes)
def get_steps(self, final):
if not self.is_step(final):
raise ValueError('Unknown: %r' % final)
result = []
todo = []
seen = set()
while todo:
step = todo.pop(0)
if step in seen:
# if a step was already seen,
# move it to the end (so it will appear earlier
# when reversed on return) ... but not for the
# final step, as that would be confusing for
# users
if step != final:
preds = self._preds.get(step, ())
return reversed(result)
def strong_connections(self):
index_counter = [0]
stack = []
lowlinks = {}
index = {}
result = []
graph = self._succs
def strongconnect(node):
# set the depth index for this node to the smallest unused index
index[node] = index_counter[0]
lowlinks[node] = index_counter[0]
index_counter[0] += 1
# Consider successors
successors = graph[node]
except Exception:
successors = []
for successor in successors:
if successor not in lowlinks:
# Successor has not yet been visited
lowlinks[node] = min(lowlinks[node],lowlinks[successor])
elif successor in stack:
# the successor is in the stack and hence in the current
# strongly connected component (SCC)
lowlinks[node] = min(lowlinks[node],index[successor])
# If `node` is a root node, pop the stack and generate an SCC
if lowlinks[node] == index[node]:
connected_component = []
while True:
successor = stack.pop()
if successor == node: break
component = tuple(connected_component)
# storing the result
for node in graph:
if node not in lowlinks:
return result
def dot(self):
result = ['digraph G {']
for succ in self._preds:
preds = self._preds[succ]
for pred in preds:
result.append(' %s -> %s;' % (pred, succ))
for node in self._nodes:
result.append(' %s;' % node)
return '\n'.join(result)
# Unarchiving functionality for zip, tar, tgz, tbz, whl
ARCHIVE_EXTENSIONS = ('.tar.gz', '.tar.bz2', '.tar', '.zip',
'.tgz', '.tbz', '.whl')
def unarchive(archive_filename, dest_dir, format=None, check=True):
def check_path(path):
if not isinstance(path, text_type):
path = path.decode('utf-8')
p = os.path.abspath(os.path.join(dest_dir, path))
if not p.startswith(dest_dir) or p[plen] != os.sep:
raise ValueError('path outside destination: %r' % p)
dest_dir = os.path.abspath(dest_dir)
plen = len(dest_dir)
archive = None
if format is None:
if archive_filename.endswith(('.zip', '.whl')):
format = 'zip'
elif archive_filename.endswith(('.tar.gz', '.tgz')):
format = 'tgz'
mode = 'r:gz'
elif archive_filename.endswith(('.tar.bz2', '.tbz')):
format = 'tbz'
mode = 'r:bz2'
elif archive_filename.endswith('.tar'):
format = 'tar'
mode = 'r'
raise ValueError('Unknown format for %r' % archive_filename)
if format == 'zip':
archive = ZipFile(archive_filename, 'r')
if check:
names = archive.namelist()
for name in names:
archive =, mode)
if check:
names = archive.getnames()
for name in names:
if format != 'zip' and sys.version_info[0] < 3:
# See Python issue 17153. If the dest path contains Unicode,
# tarfile extraction fails on Python 2.x if a member path name
# contains non-ASCII characters - it leads to an implicit
# bytes -> unicode conversion using ASCII to decode.
for tarinfo in archive.getmembers():
if not isinstance(, text_type): ='utf-8')
if archive:
def zip_dir(directory):
"""zip a directory tree into a BytesIO object"""
result = io.BytesIO()
dlen = len(directory)
with ZipFile(result, "w") as zf:
for root, dirs, files in os.walk(directory):
for name in files:
full = os.path.join(root, name)
rel = root[dlen:]
dest = os.path.join(rel, name)
zf.write(full, dest)
return result
# Simple progress bar
UNITS = ('', 'K', 'M', 'G','T','P')
class Progress(object):
unknown = 'UNKNOWN'
def __init__(self, minval=0, maxval=100):
assert maxval is None or maxval >= minval
self.min = self.cur = minval
self.max = maxval
self.started = None
self.elapsed = 0
self.done = False
def update(self, curval):
assert self.min <= curval
assert self.max is None or curval <= self.max
self.cur = curval
now = time.time()
if self.started is None:
self.started = now
self.elapsed = now - self.started
def increment(self, incr):
assert incr >= 0
self.update(self.cur + incr)
def start(self):
return self
def stop(self):
if self.max is not None:
self.done = True
def maximum(self):
return self.unknown if self.max is None else self.max
def percentage(self):
if self.done:
result = '100 %'
elif self.max is None:
result = ' ?? %'
v = 100.0 * (self.cur - self.min) / (self.max - self.min)
result = '%3d %%' % v
return result
def format_duration(self, duration):
if (duration <= 0) and self.max is None or self.cur == self.min:
result = '??:??:??'
#elif duration < 1:
# result = '--:--:--'
result = time.strftime('%H:%M:%S', time.gmtime(duration))
return result
def ETA(self):
if self.done:
prefix = 'Done'
t = self.elapsed
#import pdb; pdb.set_trace()
prefix = 'ETA '
if self.max is None:
t = -1
elif self.elapsed == 0 or (self.cur == self.min):
t = 0
#import pdb; pdb.set_trace()
t = float(self.max - self.min)
t /= self.cur - self.min
t = (t - 1) * self.elapsed
return '%s: %s' % (prefix, self.format_duration(t))
def speed(self):
if self.elapsed == 0:
result = 0.0
result = (self.cur - self.min) / self.elapsed
for unit in UNITS:
if result < 1000:
result /= 1000.0
return '%d %sB/s' % (result, unit)
# Glob functionality
RICH_GLOB = re.compile(r'\{([^}]*)\}')
_CHECK_RECURSIVE_GLOB = re.compile(r'[^/\\,{]\*\*|\*\*[^/\\,}]')
_CHECK_MISMATCH_SET = re.compile(r'^[^{]*\}|\{[^}]*$')
def iglob(path_glob):
"""Extended globbing function that supports ** and {opt1,opt2,opt3}."""
msg = """invalid glob %r: recursive glob "**" must be used alone"""
raise ValueError(msg % path_glob)
msg = """invalid glob %r: mismatching set marker '{' or '}'"""
raise ValueError(msg % path_glob)
return _iglob(path_glob)
def _iglob(path_glob):
rich_path_glob = RICH_GLOB.split(path_glob, 1)
if len(rich_path_glob) > 1:
assert len(rich_path_glob) == 3, rich_path_glob
prefix, set, suffix = rich_path_glob
for item in set.split(','):
for path in _iglob(''.join((prefix, item, suffix))):
yield path
if '**' not in path_glob:
for item in std_iglob(path_glob):
yield item
prefix, radical = path_glob.split('**', 1)
if prefix == '':
prefix = '.'
if radical == '':
radical = '*'
# we support both
radical = radical.lstrip('/')
radical = radical.lstrip('\\')
for path, dir, files in os.walk(prefix):
path = os.path.normpath(path)
for fn in _iglob(os.path.join(path, radical)):
yield fn
# HTTPSConnection which verifies certificates/matches domains
class HTTPSConnection(httplib.HTTPSConnection):
ca_certs = None # set this to the path to the certs file (.pem)
check_domain = True # only used if ca_certs is not None
# noinspection PyPropertyAccess
def connect(self):
sock = socket.create_connection((, self.port), self.timeout)
if getattr(self, '_tunnel_host', False):
self.sock = sock
if not hasattr(ssl, 'SSLContext'):
# For 2.x
if self.ca_certs:
cert_reqs = ssl.CERT_REQUIRED
cert_reqs = ssl.CERT_NONE
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file,
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
context.options |= ssl.OP_NO_SSLv2
if self.cert_file:
context.load_cert_chain(self.cert_file, self.key_file)
kwargs = {}
if self.ca_certs:
context.verify_mode = ssl.CERT_REQUIRED
if getattr(ssl, 'HAS_SNI', False):
kwargs['server_hostname'] =
self.sock = context.wrap_socket(sock, **kwargs)
if self.ca_certs and self.check_domain:
logger.debug('Host verified: %s',
except CertificateError:
class HTTPSHandler(BaseHTTPSHandler):
def __init__(self, ca_certs, check_domain=True):
self.ca_certs = ca_certs
self.check_domain = check_domain
def _conn_maker(self, *args, **kwargs):
This is called to create a connection instance. Normally you'd
pass a connection class to do_open, but it doesn't actually check for
a class, and just expects a callable. As long as we behave just as a
constructor would have, we should be OK. If it ever changes so that
we *must* pass a class, we'll create an UnsafeHTTPSConnection class
which just sets check_domain to False in the class definition, and
choose which one to pass to do_open.
result = HTTPSConnection(*args, **kwargs)
if self.ca_certs:
result.ca_certs = self.ca_certs
result.check_domain = self.check_domain
return result
def https_open(self, req):
return self.do_open(self._conn_maker, req)
except URLError as e:
if 'certificate verify failed' in str(e.reason):
raise CertificateError('Unable to verify server certificate '
'for %s' %
# To prevent against mixing HTTP traffic with HTTPS (examples: A Man-In-The-
# Middle proxy using HTTP listens on port 443, or an index mistakenly serves
# HTML containing a http://xyz link when it should be https://xyz),
# you can use the following handler class, which does not allow HTTP traffic.
# It works by inheriting from HTTPHandler - so build_opener won't add a
# handler for HTTP itself.
class HTTPSOnlyHandler(HTTPSHandler, HTTPHandler):
def http_open(self, req):
raise URLError('Unexpected HTTP request on what should be a secure '
'connection: %s' % req)
# XML-RPC with timeouts
_ver_info = sys.version_info[:2]
if _ver_info == (2, 6):
class HTTP(httplib.HTTP):
def __init__(self, host='', port=None, **kwargs):
if port == 0: # 0 means use port 0, not the default port
port = None
self._setup(self._connection_class(host, port, **kwargs))
class HTTPS(httplib.HTTPS):
def __init__(self, host='', port=None, **kwargs):
if port == 0: # 0 means use port 0, not the default port
port = None
self._setup(self._connection_class(host, port, **kwargs))
class Transport(xmlrpclib.Transport):
def __init__(self, timeout, use_datetime=0):
self.timeout = timeout
xmlrpclib.Transport.__init__(self, use_datetime)
def make_connection(self, host):
h, eh, x509 = self.get_host_info(host)
if _ver_info == (2, 6):
result = HTTP(h, timeout=self.timeout)
if not self._connection or host != self._connection[0]:
self._extra_headers = eh
self._connection = host, httplib.HTTPConnection(h)
result = self._connection[1]
return result
class SafeTransport(xmlrpclib.SafeTransport):
def __init__(self, timeout, use_datetime=0):
self.timeout = timeout
xmlrpclib.SafeTransport.__init__(self, use_datetime)
def make_connection(self, host):
h, eh, kwargs = self.get_host_info(host)
if not kwargs:
kwargs = {}
kwargs['timeout'] = self.timeout
if _ver_info == (2, 6):
result = HTTPS(host, None, **kwargs)
if not self._connection or host != self._connection[0]:
self._extra_headers = eh
self._connection = host, httplib.HTTPSConnection(h, None,
result = self._connection[1]
return result
class ServerProxy(xmlrpclib.ServerProxy):
def __init__(self, uri, **kwargs):
self.timeout = timeout = kwargs.pop('timeout', None)
# The above classes only come into play if a timeout
# is specified
if timeout is not None:
scheme, _ = splittype(uri)
use_datetime = kwargs.get('use_datetime', 0)
if scheme == 'https':
tcls = SafeTransport
tcls = Transport
kwargs['transport'] = t = tcls(timeout, use_datetime=use_datetime)
self.transport = t
xmlrpclib.ServerProxy.__init__(self, uri, **kwargs)
# CSV functionality. This is provided because on 2.x, the csv module can't
# handle Unicode. However, we need to deal with Unicode in e.g. RECORD files.
def _csv_open(fn, mode, **kwargs):
if sys.version_info[0] < 3:
mode += 'b'
kwargs['newline'] = ''
return open(fn, mode, **kwargs)
class CSVBase(object):
defaults = {
'delimiter': str(','), # The strs are used because we need native
'quotechar': str('"'), # str in the csv API (2.x won't take
'lineterminator': str('\n') # Unicode)
def __enter__(self):
return self
def __exit__(self, *exc_info):
class CSVReader(CSVBase):
def __init__(self, **kwargs):
if 'stream' in kwargs:
stream = kwargs['stream']
if sys.version_info[0] >= 3:
# needs to be a text stream
stream = codecs.getreader('utf-8')(stream) = stream
else: = _csv_open(kwargs['path'], 'r')
self.reader = csv.reader(, **self.defaults)
def __iter__(self):
return self
def next(self):
result = next(self.reader)
if sys.version_info[0] < 3:
for i, item in enumerate(result):
if not isinstance(item, text_type):
result[i] = item.decode('utf-8')
return result
__next__ = next
class CSVWriter(CSVBase):
def __init__(self, fn, **kwargs): = _csv_open(fn, 'w')
self.writer = csv.writer(, **self.defaults)
def writerow(self, row):
if sys.version_info[0] < 3:
r = []
for item in row:
if isinstance(item, text_type):
item = item.encode('utf-8')
row = r
# Configurator functionality
class Configurator(BaseConfigurator):
value_converters = dict(BaseConfigurator.value_converters)
value_converters['inc'] = 'inc_convert'
def __init__(self, config, base=None):
super(Configurator, self).__init__(config)
self.base = base or os.getcwd()
def configure_custom(self, config):
def convert(o):
if isinstance(o, (list, tuple)):
result = type(o)([convert(i) for i in o])
elif isinstance(o, dict):
if '()' in o:
result = self.configure_custom(o)
result = {}
for k in o:
result[k] = convert(o[k])
result = self.convert(o)
return result
c = config.pop('()')
if not callable(c):
c = self.resolve(c)
props = config.pop('.', None)
# Check for valid identifiers
args = config.pop('[]', ())
if args:
args = tuple([convert(o) for o in args])
items = [(k, convert(config[k])) for k in config if valid_ident(k)]
kwargs = dict(items)
result = c(*args, **kwargs)
if props:
for n, v in props.items():
setattr(result, n, convert(v))
return result
def __getitem__(self, key):
result = self.config[key]
if isinstance(result, dict) and '()' in result:
self.config[key] = result = self.configure_custom(result)
return result
def inc_convert(self, value):
"""Default converter for the inc:// protocol."""
if not os.path.isabs(value):
value = os.path.join(self.base, value)
with, 'r', encoding='utf-8') as f:
result = json.load(f)
return result
# Mixin for running subprocesses and capturing their output
class SubprocessMixin(object):
def __init__(self, verbose=False, progress=None):
self.verbose = verbose
self.progress = progress
def reader(self, stream, context):
Read lines from a subprocess' output stream and either pass to a progress
callable (if specified) or write progress information to sys.stderr.
progress = self.progress
verbose = self.verbose
while True:
s = stream.readline()
if not s:
if progress is not None:
progress(s, context)
if not verbose:
def run_command(self, cmd, **kwargs):
p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, **kwargs)
t1 = threading.Thread(target=self.reader, args=(p.stdout, 'stdout'))
t2 = threading.Thread(target=self.reader, args=(p.stderr, 'stderr'))
if self.progress is not None:
self.progress('done.', 'main')
elif self.verbose:
return p
You can’t perform that action at this time.