From e8bb2ea11e7540cb38c1e9a9a1b1ca415ad7a97d Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Thu, 9 Nov 2017 18:06:48 +0000 Subject: [PATCH 1/8] added max_depth --- CHANGELOG.md | 14 ++++++++++ fs/_version.py | 2 +- fs/path.py | 24 +++++++++++++++++ fs/walk.py | 65 ++++++++++++++++++++++++++++++++++++---------- tests/test_path.py | 8 ++++++ tests/test_walk.py | 35 ++++++++++++++++++++----- 6 files changed, 128 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a9d85db..e736e11e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,20 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## Unreleased + +### Added + +- fs.parts + +### Fixed + +- Walk now yields Step named tuples as advertised + +### Added + +- Added max_depth parameter to fs.walk + ## [2.0.15] - 2017-11-05 ### Changed diff --git a/fs/_version.py b/fs/_version.py index 343a0e1b..96cb0f95 100644 --- a/fs/_version.py +++ b/fs/_version.py @@ -1,3 +1,3 @@ """Version, used in module and setup.py. """ -__version__ = "2.0.15" +__version__ = "2.0.16a0" diff --git a/fs/path.py b/fs/path.py index c67c180b..875e536e 100644 --- a/fs/path.py +++ b/fs/path.py @@ -32,6 +32,7 @@ "iteratepath", "join", "normpath", + "parts", "recursepath", "relativefrom", "relpath", @@ -256,6 +257,29 @@ def combine(path1, path2): return "{}/{}".format(path1.rstrip('/'), path2.lstrip('/')) +def parts(path): + """Split a path in to its component parts. + + Arguments: + path (str): Path to split in to parts. + + Returns: + list: List of components + + Example: + >>> parts('/foo/bar/baz') + ['/', 'foo', 'bar', 'baz'] + + """ + _path = normpath(path) + components = _path.strip('/') + + _parts = ['/' if _path.startswith('/') else './'] + if components: + _parts += components.split('/') + return _parts + + def split(path): """Split a path into (head, tail) pair. diff --git a/fs/walk.py b/fs/walk.py index eba38faf..96481118 100644 --- a/fs/walk.py +++ b/fs/walk.py @@ -19,6 +19,7 @@ from .path import abspath from .path import join from .path import normpath +from .path import parts Step = namedtuple('Step', 'path, dirs, files') @@ -131,7 +132,8 @@ def __init__(self, on_error=None, search="breadth", filter=None, - exclude_dirs=None): + exclude_dirs=None, + max_depth=None): if search not in ('breadth', 'depth'): raise ValueError("search must be 'breadth' or 'depth'") self.ignore_errors = ignore_errors @@ -153,6 +155,7 @@ def __init__(self, self.search = search self.filter = filter self.exclude_dirs = exclude_dirs + self.max_depth = max_depth super(Walker, self).__init__() @classmethod @@ -165,6 +168,14 @@ def _raise_errors(cls, path, error): """Callback to re-raise dir scan errors.""" return False + @classmethod + def _calculate_depth(cls, path): + """Calculate the 'depth' of a directory path (number of + components). + """ + _path = path.strip('/') + return _path.count('/') + 1 if _path else 0 + @classmethod def bind(cls, fs): """Bind a `Walker` instance to a given filesystem. @@ -208,7 +219,8 @@ def __repr__(self): on_error=(self.on_error, None), search=(self.search, 'breadth'), filter=(self.filter, None), - exclude_dirs=(self.exclude_dirs, None) + exclude_dirs=(self.exclude_dirs, None), + max_depth=(self.max_depth, None) ) def filter_files(self, fs, infos): @@ -233,23 +245,44 @@ def filter_files(self, fs, infos): ] - def check_open_dir(self, fs, info): + def check_open_dir(self, fs, path, info): """Check if a directory should be opened. Override to exclude directories from the walk. Arguments: fs (FS): A filesystem instance. - info (Info): A resource info object. + path (str): Path to directory. + info (Info): A resource info object for the directory. Returns: bool: `True` if the directory should be opened. """ + if self.exclude_dirs is None: return True return not fs.match(self.exclude_dirs, info.name) + def check_scan_dir(self, fs, path, info, depth): + """Check if a directory should be scanned. + + Override to omit scanning of certain directories. If a directory + is omitted, it will appear in the walk but its files and + sub-directories will not. + + Arguments: + fs (FS): A filesystem instance. + path (str): Path to directory. + info (Info): A resource info object for the directory. + depth (int): Number of directories recursed. + + Returns: + bool: `True` if the directory should be scanned. + + """ + return self.max_depth is None or depth < self.max_depth + def check_file(self, fs, info): """Check if a filename should be included. @@ -329,6 +362,7 @@ def _walk_breadth(self, fs, path, namespaces=None): queue = deque([path]) push = queue.appendleft pop = queue.pop + depth = self._calculate_depth(path) while queue: dir_path = pop() @@ -336,12 +370,14 @@ def _walk_breadth(self, fs, path, namespaces=None): files = [] for info in self._scan(fs, dir_path, namespaces=namespaces): if info.is_dir: - if self.check_open_dir(fs, info): + _depth = self._calculate_depth(dir_path) - depth + 1 + if self.check_open_dir(fs, dir_path, info): dirs.append(info) - push(join(dir_path, info.name)) + if self.check_scan_dir(fs, dir_path, info, _depth): + push(join(dir_path, info.name)) else: files.append(info) - yield ( + yield Step( dir_path, dirs, self.filter_files(fs, files) @@ -355,6 +391,7 @@ def _walk_depth(self, fs, path, namespaces=None): def scan(path): return self._scan(fs, path, namespaces=namespaces) + depth = self._calculate_depth(path) stack = [( path, scan(path), [], [] )] @@ -365,7 +402,7 @@ def scan(path): try: info = next(iter_files) except StopIteration: - yield ( + yield Step( dir_path, dirs, self.filter_files(fs, files) @@ -373,12 +410,14 @@ def scan(path): del stack[-1] else: if info.is_dir: - if self.check_open_dir(fs, info): + _depth = self._calculate_depth(dir_path) - depth + 1 + if self.check_open_dir(fs, dir_path, info): dirs.append(info) - _path = join(dir_path, info.name) - push(( - _path, scan(_path), [], [] - )) + if self.check_scan_dir(fs, dir_path, info, _depth): + _path = join(dir_path, info.name) + push(( + _path, scan(_path), [], [] + )) else: files.append(info) diff --git a/tests/test_path.py b/tests/test_path.py index 9e7b1ded..af39541d 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -109,6 +109,14 @@ def test_combine(self): self.assertEqual(combine('', 'bar'), 'bar') self.assertEqual(combine('foo', 'bar'), 'foo/bar') + def test_parts(self): + self.assertEqual(parts('/'), ['/']) + self.assertEqual(parts(''), ['./']) + self.assertEqual(parts('/foo'), ['/', 'foo']) + self.assertEqual(parts('/foo/bar'), ['/', 'foo', 'bar']) + self.assertEqual(parts('/foo/bar/'), ['/', 'foo', 'bar']) + self.assertEqual(parts('./foo/bar/'), ['./', 'foo', 'bar']) + def test_pathsplit(self): tests = [ ("a/b", ("a", "b")), diff --git a/tests/test_walk.py b/tests/test_walk.py index ece95cae..2e0da1b7 100644 --- a/tests/test_walk.py +++ b/tests/test_walk.py @@ -53,15 +53,38 @@ def test_repr(self): repr(self.fs.walk) def test_walk(self): - walk = [] - for path, dirs, files in self.fs.walk(): - walk.append(( + _walk = [] + for step in self.fs.walk(): + self.assertIsInstance(step, walk.Step) + path, dirs, files = step + _walk.append(( path, - [info.name for info in dirs], - [info.name for info in files] + sorted(info.name for info in dirs), + sorted(info.name for info in files) )) expected = [(u'/', [u'foo1', u'foo2', u'foo3'], []), (u'/foo1', [u'bar1'], [u'top1.txt', u'top2.txt']), (u'/foo2', [u'bar2'], [u'top3.txt']), (u'/foo3', [], []), (u'/foo1/bar1', [], []), (u'/foo2/bar2', [u'bar3'], []), (u'/foo2/bar2/bar3', [], [u'test.txt'])] - self.assertEqual(walk, expected) + self.assertEqual(_walk, expected) + + def test_walk_levels_1(self): + results = list(self.fs.walk(max_depth=1)) + self.assertEqual(len(results), 1) + dirs = sorted(info.name for info in results[0].dirs) + self.assertEqual(dirs, ['foo1', 'foo2', 'foo3']) + files = sorted(info.name for info in results[0].files) + self.assertEqual(files, []) + + def test_walk_levels_2(self): + _walk = [] + for step in self.fs.walk(max_depth=2): + self.assertIsInstance(step, walk.Step) + path, dirs, files = step + _walk.append(( + path, + sorted(info.name for info in dirs), + sorted(info.name for info in files) + )) + expected = [(u'/', [u'foo1', u'foo2', u'foo3'], []), (u'/foo1', [u'bar1'], [u'top1.txt', u'top2.txt']), (u'/foo2', [u'bar2'], [u'top3.txt']), (u'/foo3', [], [])] + self.assertEqual(_walk, expected) def test_walk_files(self): files = list(self.fs.walk.files()) From 256a4261a7490b429055805cef6970c0e95c440b Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Thu, 9 Nov 2017 18:15:12 +0000 Subject: [PATCH 2/8] Added test --- tests/test_walk.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_walk.py b/tests/test_walk.py index 2e0da1b7..24d33d52 100644 --- a/tests/test_walk.py +++ b/tests/test_walk.py @@ -65,6 +65,19 @@ def test_walk(self): expected = [(u'/', [u'foo1', u'foo2', u'foo3'], []), (u'/foo1', [u'bar1'], [u'top1.txt', u'top2.txt']), (u'/foo2', [u'bar2'], [u'top3.txt']), (u'/foo3', [], []), (u'/foo1/bar1', [], []), (u'/foo2/bar2', [u'bar3'], []), (u'/foo2/bar2/bar3', [], [u'test.txt'])] self.assertEqual(_walk, expected) + def test_walk_directory(self): + _walk = [] + for step in self.fs.walk('foo2'): + self.assertIsInstance(step, walk.Step) + path, dirs, files = step + _walk.append(( + path, + sorted(info.name for info in dirs), + sorted(info.name for info in files) + )) + expected = [(u'/foo2', [u'bar2'], [u'top3.txt']), (u'/foo2/bar2', [u'bar3'], []), (u'/foo2/bar2/bar3', [], [u'test.txt'])] + self.assertEqual(_walk, expected) + def test_walk_levels_1(self): results = list(self.fs.walk(max_depth=1)) self.assertEqual(len(results), 1) From 02accb56fa2420acd20186617f2410da4bc14f58 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Thu, 9 Nov 2017 18:16:57 +0000 Subject: [PATCH 3/8] unused import --- fs/walk.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/walk.py b/fs/walk.py index 96481118..d4140e83 100644 --- a/fs/walk.py +++ b/fs/walk.py @@ -19,7 +19,6 @@ from .path import abspath from .path import join from .path import normpath -from .path import parts Step = namedtuple('Step', 'path, dirs, files') From be36d32a73e562c4e88cb54b96bd442f468d9dca Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Thu, 9 Nov 2017 18:19:26 +0000 Subject: [PATCH 4/8] docstring --- fs/walk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/walk.py b/fs/walk.py index d4140e83..11595474 100644 --- a/fs/walk.py +++ b/fs/walk.py @@ -388,6 +388,7 @@ def _walk_depth(self, fs, path, namespaces=None): # No recursion! def scan(path): + """Perform scan.""" return self._scan(fs, path, namespaces=namespaces) depth = self._calculate_depth(path) From 8decbf90bc0c0d42ee0abd42ba081fe164d68a1a Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Thu, 9 Nov 2017 18:27:03 +0000 Subject: [PATCH 5/8] docs --- fs/walk.py | 5 +++++ tests/test_walk.py | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/walk.py b/fs/walk.py index 11595474..0bea5994 100644 --- a/fs/walk.py +++ b/fs/walk.py @@ -123,6 +123,7 @@ class Walker(WalkerBase): be returned if the final component matches one of the patterns. exclude_dirs (list, optional): A list of patterns that will be used to filter out directories from the walk. e.g. ``['*.svn', '*.git']``. + max_depth (int, optional): Maximum directory depth to walk. """ @@ -487,6 +488,7 @@ def walk(self, exclude_dirs (list): A list of patterns that will be used to filter out directories from the walk, e.g. ``['*.svn', '*.git']``. + max_depth (int, optional): Maximum directory depth to walk. Returns: ~collections.Iterator: an iterator of ``(, , )`` @@ -534,6 +536,7 @@ def files(self, path='/', **kwargs): exclude_dirs (list): A list of patterns that will be used to filter out directories from the walk, e.g. ``['*.svn', '*.git']``. + max_depth (int, optional): Maximum directory depth to walk. Returns: ~collections.Iterable: An iterable of file paths (absolute @@ -564,6 +567,7 @@ def dirs(self, path='/', **kwargs): exclude_dirs (list): A list of patterns that will be used to filter out directories from the walk, e.g. ``['*.svn', '*.git']``. + max_depth (int, optional): Maximum directory depth to walk. Returns: ~collections.iterable: an iterable of directory paths @@ -601,6 +605,7 @@ def info(self, path='/', namespaces=None, **kwargs): exclude_dirs (list): A list of patterns that will be used to filter out directories from the walk, e.g. ``['*.svn', '*.git']``. + max_depth (int, optional): Maximum directory depth to walk. Returns: ~collections.Iterable: an iterable yielding tuples of diff --git a/tests/test_walk.py b/tests/test_walk.py index 24d33d52..f6ac58b9 100644 --- a/tests/test_walk.py +++ b/tests/test_walk.py @@ -59,8 +59,8 @@ def test_walk(self): path, dirs, files = step _walk.append(( path, - sorted(info.name for info in dirs), - sorted(info.name for info in files) + [info.name for info in dirs], + [info.name for info in files] )) expected = [(u'/', [u'foo1', u'foo2', u'foo3'], []), (u'/foo1', [u'bar1'], [u'top1.txt', u'top2.txt']), (u'/foo2', [u'bar2'], [u'top3.txt']), (u'/foo3', [], []), (u'/foo1/bar1', [], []), (u'/foo2/bar2', [u'bar3'], []), (u'/foo2/bar2/bar3', [], [u'test.txt'])] self.assertEqual(_walk, expected) @@ -72,8 +72,8 @@ def test_walk_directory(self): path, dirs, files = step _walk.append(( path, - sorted(info.name for info in dirs), - sorted(info.name for info in files) + [info.name for info in dirs], + [info.name for info in files] )) expected = [(u'/foo2', [u'bar2'], [u'top3.txt']), (u'/foo2/bar2', [u'bar3'], []), (u'/foo2/bar2/bar3', [], [u'test.txt'])] self.assertEqual(_walk, expected) From 5a4a0c0bb2d95b641e3bad4fbc3032a011bd03ac Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Sat, 11 Nov 2017 15:52:26 +0000 Subject: [PATCH 6/8] factor out checks to make subclassing easier --- fs/walk.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/fs/walk.py b/fs/walk.py index 0bea5994..ea5c754e 100644 --- a/fs/walk.py +++ b/fs/walk.py @@ -244,6 +244,13 @@ def filter_files(self, fs, infos): if _check_file(fs, info) ] + def _check_open_dir(self, fs, path, info): + """Check if a directory should be considered in the walk. + """ + if (self.exclude_dirs is not None and + fs.match(self.exclude_dirs, info.name)): + return False + return self.check_open_dir(fs, path, info) def check_open_dir(self, fs, path, info): """Check if a directory should be opened. @@ -259,12 +266,15 @@ def check_open_dir(self, fs, path, info): bool: `True` if the directory should be opened. """ + return True - if self.exclude_dirs is None: - return True - return not fs.match(self.exclude_dirs, info.name) + def _check_scan_dir(self, fs, path, info, depth): + """Check if a directory contents should be scanned.""" + if self.max_depth is not None and depth >= self.max_depth: + return False + return self.check_scan_dir(fs, path, info) - def check_scan_dir(self, fs, path, info, depth): + def check_scan_dir(self, fs, path, info): """Check if a directory should be scanned. Override to omit scanning of certain directories. If a directory @@ -275,13 +285,12 @@ def check_scan_dir(self, fs, path, info, depth): fs (FS): A filesystem instance. path (str): Path to directory. info (Info): A resource info object for the directory. - depth (int): Number of directories recursed. Returns: bool: `True` if the directory should be scanned. """ - return self.max_depth is None or depth < self.max_depth + return True def check_file(self, fs, info): """Check if a filename should be included. @@ -371,9 +380,9 @@ def _walk_breadth(self, fs, path, namespaces=None): for info in self._scan(fs, dir_path, namespaces=namespaces): if info.is_dir: _depth = self._calculate_depth(dir_path) - depth + 1 - if self.check_open_dir(fs, dir_path, info): + if self._check_open_dir(fs, dir_path, info): dirs.append(info) - if self.check_scan_dir(fs, dir_path, info, _depth): + if self._check_scan_dir(fs, dir_path, info, _depth): push(join(dir_path, info.name)) else: files.append(info) @@ -412,9 +421,9 @@ def scan(path): else: if info.is_dir: _depth = self._calculate_depth(dir_path) - depth + 1 - if self.check_open_dir(fs, dir_path, info): + if self._check_open_dir(fs, dir_path, info): dirs.append(info) - if self.check_scan_dir(fs, dir_path, info, _depth): + if self._check_scan_dir(fs, dir_path, info, _depth): _path = join(dir_path, info.name) push(( _path, scan(_path), [], [] From 323dacebef89a0ef16a7932fd68ccd562ff9a6fc Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Sat, 11 Nov 2017 15:57:18 +0000 Subject: [PATCH 7/8] unused imports --- fs/zipfs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/zipfs.py b/fs/zipfs.py index b77e43f6..f439ed72 100644 --- a/fs/zipfs.py +++ b/fs/zipfs.py @@ -5,7 +5,6 @@ from __future__ import unicode_literals import zipfile -import stat from datetime import datetime From c1c0997bc1e1a3cfb3c3b7ec55ce29ad506a802a Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Sat, 11 Nov 2017 16:05:46 +0000 Subject: [PATCH 8/8] version bump --- CHANGELOG.md | 2 +- fs/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e736e11e..7ea7ba4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). -## Unreleased +## [2.0.16] - 2017-11-11 ### Added diff --git a/fs/_version.py b/fs/_version.py index 96cb0f95..19a4c468 100644 --- a/fs/_version.py +++ b/fs/_version.py @@ -1,3 +1,3 @@ """Version, used in module and setup.py. """ -__version__ = "2.0.16a0" +__version__ = "2.0.16"