""" path.py - An object representing a path to a file or directory.
Example:
from path import path
d = path('/home/guido/bin')
for f in d.files('*.py'):
f.chmod(0755)
This module requires Python 2.2 or later.
URL: http://www.jorendorff.com/articles/python/path
Author: Jason Orendorff <jason.orendorff\x40gmail\x2ecom> (and others - see the url!)
Date: 9 Mar 2007
"""
# TODO
# - Tree-walking functions don't avoid symlink loops. Matt Harrison
# sent me a patch for this.
# - Bug in write_text(). It doesn't support Universal newline mode.
# - Better error message in listdir() when self isn't a
# directory. (On Windows, the error message really sucks.)
# - Make sure everything has a good docstring.
# - Add methods for regex find and replace.
# - guess_content_type() method?
# - Perhaps support arguments to touch().
from __future__ import generators
import sys, warnings, os, fnmatch, glob, shutil, codecs, md5, subprocess
__version__ = '2.2'
__all__ = ['path']
# Platform-specific support for path.owner
if os.name == 'nt':
try:
import win32security
except ImportError:
win32security = None
else:
try:
import pwd
except ImportError:
pwd = None
# Pre-2.3 support. Are unicode filenames supported?
_base = str
_getcwd = os.getcwd
try:
if os.path.supports_unicode_filenames:
_base = unicode
_getcwd = os.getcwdu
_filesystem_encoding = sys.getfilesystemencoding()
except AttributeError:
pass
# Pre-2.3 workaround for booleans
try:
True, False
except NameError:
True, False = 1, 0
# Pre-2.3 workaround for basestring.
try:
basestring
except NameError:
basestring = (str, unicode)
# Universal newline support
_textmode = 'r'
if hasattr(file, 'newlines'):
_textmode = 'U'
class TreeWalkWarning(Warning):
pass
class path(_base):
""" Represents a filesystem path.
For documentation on individual methods, consult their
counterparts in os.path.
"""
# --- Special Python methods.
def __repr__(self):
return 'path(%s)' % _base.__repr__(self)
# Adding a path and a string yields a path.
def __add__(self, more):
try:
resultStr = _base.__add__(self, more)
except TypeError: #Python bug
resultStr = NotImplemented
if resultStr is NotImplemented:
return resultStr
return self.__class__(resultStr)
def __radd__(self, other):
if isinstance(other, basestring):
return self.__class__(other.__add__(self))
else:
return NotImplemented
# The / operator joins paths.
def __div__(self, rel):
""" fp.__div__(rel) == fp / rel == fp.joinpath(rel)
Join two path components, adding a separator character if
needed.
"""
return self.__class__(os.path.join(self, rel))
# Make the / operator work even when true division is enabled.
__truediv__ = __div__
def getcwd(cls):
""" Return the current working directory as a path object. """
return cls(_getcwd())
getcwd = classmethod(getcwd)
# --- Operations on path strings.
isabs = os.path.isabs
def abspath(self): return self.__class__(os.path.abspath(self))
def normcase(self): return self.__class__(os.path.normcase(self))
def normpath(self): return self.__class__(os.path.normpath(self))
def realpath(self): return self.__class__(os.path.realpath(self))
def expanduser(self): return self.__class__(os.path.expanduser(self))
def expandvars(self): return self.__class__(os.path.expandvars(self))
def dirname(self): return self.__class__(os.path.dirname(self))
basename = os.path.basename
def expand(self):
""" Clean up a filename by calling expandvars(),
expanduser(), and normpath() on it.
This is commonly everything needed to clean up a filename
read from a configuration file, for example.
"""
return self.expandvars().expanduser().normpath()
def _get_namebase(self):
base, ext = os.path.splitext(self.name)
return base
def _get_ext(self):
f, ext = os.path.splitext(_base(self))
return ext
def _get_drive(self):
drive, r = os.path.splitdrive(self)
return self.__class__(drive)
parent = property(
dirname, None, None,
""" This path's parent directory, as a new path object.
For example, path('/usr/local/lib/libpython.so').parent == path('/usr/local/lib')
""")
name = property(
basename, None, None,
""" The name of this file or directory without the full path.
For example, path('/usr/local/lib/libpython.so').name == 'libpython.so'
""")
namebase = property(
_get_namebase, None, None,
""" The same as path.name, but with one file extension stripped off.
For example, path('/home/guido/python.tar.gz').name == 'python.tar.gz',
but path('/home/guido/python.tar.gz').namebase == 'python.tar'
""")
ext = property(
_get_ext, None, None,
""" The file extension, for example '.py'. """)
drive = property(
_get_drive, None, None,
""" The drive specifier, for example 'C:'.
This is always empty on systems that don't use drive specifiers.
""")
def splitpath(self):
""" p.splitpath() -> Return (p.parent, p.name). """
parent, child = os.path.split(self)
return self.__class__(parent), child
def splitdrive(self):
""" p.splitdrive() -> Return (p.drive, <the rest of p>).
Split the drive specifier from this path. If there is
no drive specifier, p.drive is empty, so the return value
is simply (path(''), p). This is always the case on Unix.
"""
drive, rel = os.path.splitdrive(self)
return self.__class__(drive), rel
def splitext(self):
""" p.splitext() -> Return (p.stripext(), p.ext).
Split the filename extension from this path and return
the two parts. Either part may be empty.
The extension is everything from '.' to the end of the
last path segment. This has the property that if
(a, b) == p.splitext(), then a + b == p.
"""
filename, ext = os.path.splitext(self)
return self.__class__(filename), ext
def stripext(self):
""" p.stripext() -> Remove one file extension from the path.
For example, path('/home/guido/python.tar.gz').stripext()
returns path('/home/guido/python.tar').
"""
return self.splitext()[0]
if hasattr(os.path, 'splitunc'):
def splitunc(self):
unc, rest = os.path.splitunc(self)
return self.__class__(unc), rest
def _get_uncshare(self):
unc, r = os.path.splitunc(self)
return self.__class__(unc)
uncshare = property(
_get_uncshare, None, None,
""" The UNC mount point for this path.
This is empty for paths on local drives. """)
def joinpath(self, *args):
""" Join two or more path components, adding a separator
character (os.sep) if needed. Returns a new path
object.
"""
return self.__class__(os.path.join(self, *args))
def splitall(self):
r""" Return a list of the path components in this path.
The first item in the list will be a path. Its value will be
either os.curdir, os.pardir, empty, or the root directory of
this path (for example, '/' or 'C:\\'). The other items in
the list will be strings.
path.path.joinpath(*result) will yield the original path.
"""
parts = []
loc = self
while loc != os.curdir and loc != os.pardir:
prev = loc
loc, child = prev.splitpath()
if loc == prev:
break
parts.append(child)
parts.append(loc)
parts.reverse()
return parts
def relpath(self):
""" Return this path as a relative path,
based from the current working directory.
"""
cwd = self.__class__(os.getcwd())
return cwd.relpathto(self)
def relpathto(self, dest):
""" Return a relative path from self to dest.
If there is no relative path from self to dest, for example if
they reside on different drives in Windows, then this returns
dest.abspath().
"""
origin = self.abspath()
dest = self.__class__(dest).abspath()
orig_list = origin.normcase().splitall()
# Don't normcase dest! We want to preserve the case.
dest_list = dest.splitall()
if orig_list[0] != os.path.normcase(dest_list[0]):
# Can't get here from there.
return dest
# Find the location where the two paths start to differ.
i = 0
for start_seg, dest_seg in zip(orig_list, dest_list):
if start_seg != os.path.normcase(dest_seg):
break
i += 1
# Now i is the point where the two paths diverge.
# Need a certain number of "os.pardir"s to work up
# from the origin to the point of divergence.
segments = [os.pardir] * (len(orig_list) - i)
# Need to add the diverging part of dest_list.
segments += dest_list[i:]
if len(segments) == 0:
# If they happen to be identical, use os.curdir.
relpath = os.curdir
else:
relpath = os.path.join(*segments)
return self.__class__(relpath)
# --- Listing, searching, walking, and matching
def listdir(self, pattern=None):
""" D.listdir() -> List of items in this directory.
Use D.files() or D.dirs() instead if you want a listing
of just files or just subdirectories.
The elements of the list are path objects.
With the optional 'pattern' argument, this only lists
items whose names match the given pattern.
"""
names = os.listdir(self)
if pattern is not None:
names = fnmatch.filter(names, pattern)
return [self / child for child in names]
def dirs(self, pattern=None):
""" D.dirs() -> List of this directory's subdirectories.
The elements of the list are path objects.
This does not walk recursively into subdirectories
(but see path.walkdirs).
With the optional 'pattern' argument, this only lists
directories whose names match the given pattern. For
example, d.dirs('build-*').
"""
return [p for p in self.listdir(pattern) if p.isdir()]
def files(self, pattern=None):
""" D.files() -> List of the files in this directory.
The elements of the list are path objects.
This does not walk into subdirectories (see path.walkfiles).
With the optional 'pattern' argument, this only lists files
whose names match the given pattern. For example,
d.files('*.pyc').
"""
return [p for p in self.listdir(pattern) if p.isfile()]
def walk(self, pattern=None, errors='strict'):
""" D.walk() -> iterator over files and subdirs, recursively.
The iterator yields path objects naming each child item of
this directory and its descendants. This requires that
D.isdir().
This performs a depth-first traversal of the directory tree.
Each directory is returned just before all its children.
The errors= keyword argument controls behavior when an
error occurs. The default is 'strict', which causes an
exception. The other allowed values are 'warn', which
reports the error via warnings.warn(), and 'ignore'.
"""
if errors not in ('strict', 'warn', 'ignore'):
raise ValueError("invalid errors parameter")
try:
childList = self.listdir()
except Exception:
if errors == 'ignore':
return
elif errors == 'warn':
warnings.warn(
"Unable to list directory '%s': %s"
% (self, sys.exc_info()[1]),
TreeWalkWarning)
return
else:
raise
for child in childList:
if pattern is None or child.fnmatch(pattern):
yield child
try:
isdir = child.isdir()
except Exception:
if errors == 'ignore':
isdir = False
elif errors == 'warn':
warnings.warn(
"Unable to access '%s': %s"
% (child, sys.exc_info()[1]),
TreeWalkWarning)
isdir = False
else:
raise
if isdir:
for item in child.walk(pattern, errors):
yield item
def walkdirs(self, pattern=None, errors='strict'):
""" D.walkdirs() -> iterator over subdirs, recursively.
With the optional 'pattern' argument, this yields only
directories whose names match the given pattern. For
example, mydir.walkdirs('*test') yields only directories
with names ending in 'test'.
The errors= keyword argument controls behavior when an
error occurs. The default is 'strict', which causes an
exception. The other allowed values are 'warn', which
reports the error via warnings.warn(), and 'ignore'.
"""
if errors not in ('strict', 'warn', 'ignore'):
raise ValueError("invalid errors parameter")
try:
dirs = self.dirs()
except Exception:
if errors == 'ignore':
return
elif errors == 'warn':
warnings.warn(
"Unable to list directory '%s': %s"
% (self, sys.exc_info()[1]),
TreeWalkWarning)
return
else:
raise
for child in dirs:
if pattern is None or child.fnmatch(pattern):
yield child
for subsubdir in child.walkdirs(pattern, errors):
yield subsubdir
def walkfiles(self, pattern=None, errors='strict'):
""" D.walkfiles() -> iterator over files in D, recursively.
The optional argument, pattern, limits the results to files
with names that match the pattern. For example,
mydir.walkfiles('*.tmp') yields only files with the .tmp
extension.
"""
if errors not in ('strict', 'warn', 'ignore'):
raise ValueError("invalid errors parameter")
try:
childList = self.listdir()
except Exception:
if errors == 'ignore':
return
elif errors == 'warn':
warnings.warn(
"Unable to list directory '%s': %s"
% (self, sys.exc_info()[1]),
TreeWalkWarning)
return
else:
raise
for child in childList:
try:
isfile = child.isfile()
isdir = not isfile and child.isdir()
except:
if errors == 'ignore'