From bfad7a6c2da32c7879609b18f21ef7f1751d3977 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Sun, 5 Aug 2018 14:51:42 +0100 Subject: [PATCH 01/13] implementation of glob --- fs/base.py | 7 +++ fs/glob.py | 115 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/wildcard.py | 11 ++--- 3 files changed, 128 insertions(+), 5 deletions(-) create mode 100644 fs/glob.py diff --git a/fs/base.py b/fs/base.py index ae35f1f0..06b7acdc 100644 --- a/fs/base.py +++ b/fs/base.py @@ -30,6 +30,7 @@ from . import tools from . import walk from . import wildcard +from .glob import Globber from .mode import validate_open_mode from .path import abspath from .path import join @@ -108,6 +109,12 @@ def __exit__( """ self.close() + @property + def glob(self): + """`~fs.glob.GLobber`: a globber object.. + """ + return Globber(self) + @property def walk(self): # type: (_F) -> BoundWalker[_F] diff --git a/fs/glob.py b/fs/glob.py new file mode 100644 index 00000000..58bab469 --- /dev/null +++ b/fs/glob.py @@ -0,0 +1,115 @@ +from __future__ import unicode_literals + +import re + +from ._repr import make_repr +from . import path +from . import wildcard + + +def _translate_glob(pattern, case_sensitive=True): + levels = 0 + recursive = False + re_patterns = [""] + for component in path.iteratepath(pattern): + if component == "**": + re_patterns.append(".*/?") + recursive = True + else: + re_patterns.append( + "/" + wildcard._translate(component, case_sensitive=case_sensitive) + ) + levels += 1 + re_glob = ( + "(?ms)" + "".join(re_patterns) + ("/\Z" if pattern.endswith("/") else "\Z") + ) + return ( + levels, + recursive, + re.compile(re_glob, re.IGNORECASE if case_sensitive else None), + ) + + +def _glob(fs, pattern, path="/", namespaces=None, case_sensitive=True): + levels, recursive, _re_glob = _translate_glob( + pattern, case_sensitive=case_sensitive + ) + for path, info in fs.walk.info( + path=path, namespaces=namespaces, max_depth=None if recursive else levels + ): + if info.is_dir: + path += "/" + if _re_glob.match(path): + yield path, info + + +class GlobGenerator(object): + def __init__(self, fs, pattern, path='/', namespaces=None, case_sensitive=True): + self.fs = fs + self.pattern = pattern + self.path = path + self.namespaces = namespaces + self.case_sensitive = case_sensitive + + def __repr__(self): + return make_repr( + self.__class__.__name__, + self.fs, + self.pattern, + path=(self.path, '/'), + namespaces=(self.namespaces, None), + case_sensitive=(self.case_sensitive, True), + ) + + def __iter__(self): + for path, info in _glob( + self.fs, + self.pattern, + path=self.path, + namespaces=self.namespaces, + case_sensitive=self.case_sensitive, + ): + yield path, info + + def count(self): + size = 0 + for path, info in _glob( + self.fs, + self.pattern, + path=self.path, + namespaces=['details'], + case_sensitive=self.case_sensitive, + ): + size += info.size + return size + + +class Globber(object): + + __slots__ = ["fs"] + + def __init__(self, fs): + self.fs = fs + + def __repr__(self): + return make_repr(self.__class__.__name__, self.fs) + + def __call__(self, pattern, path="/", namespaces=None, case_sensitive=True): + return GlobGenerator(self.fs, pattern, path, namespaces, case_sensitive) + + +if __name__ == "__main__": + + from fs import open_fs + + m = open_fs("~/projects/moya") + + print(m.glob) + + print(m.glob('*.py')) + + for info, path in m.glob('*/*.py'): + print(info) + + print(m.glob('**/*.py').count()) + diff --git a/fs/wildcard.py b/fs/wildcard.py index 1ddcb350..43427f5a 100644 --- a/fs/wildcard.py +++ b/fs/wildcard.py @@ -2,13 +2,14 @@ """ # Adapted from https://hg.python.org/cpython/file/2.7/Lib/fnmatch.py -from __future__ import unicode_literals +from __future__ import unicode_literals, print_function import re import typing from functools import partial from .lrucache import LRUCache +from . import path if False: # typing.TYPE_CHECKING from typing import Callable, Iterable, MutableMapping, Text, Tuple, Pattern @@ -33,7 +34,7 @@ def match(pattern, name): try: re_pat = _PATTERN_CACHE[(pattern, True)] except KeyError: - res = _translate(pattern) + res = "(?ms)" + _translate(pattern) + '\Z' _PATTERN_CACHE[(pattern, True)] = re_pat = re.compile(res) return re_pat.match(name) is not None @@ -53,7 +54,7 @@ def imatch(pattern, name): try: re_pat = _PATTERN_CACHE[(pattern, False)] except KeyError: - res = _translate(pattern, case_sensitive=False) + res = "(?ms)" + _translate(pattern, case_sensitive=False) + '\Z' _PATTERN_CACHE[(pattern, False)] = re_pat = re.compile(res, re.IGNORECASE) return re_pat.match(name) is not None @@ -152,7 +153,7 @@ def _translate(pattern, case_sensitive=True): c = pattern[i] i = i + 1 if c == "*": - res = res + ".*" + res = res + "[^/]*" elif c == "?": res = res + "." elif c == "[": @@ -175,4 +176,4 @@ def _translate(pattern, case_sensitive=True): res = "%s[%s]" % (res, stuff) else: res = res + re.escape(c) - return res + "\Z(?ms)" + return res From b8620c85ae03e857d3563e4eda16d11d70c61971 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Sun, 5 Aug 2018 15:23:12 +0100 Subject: [PATCH 02/13] typing, count and remove --- fs/base.py | 2 +- fs/glob.py | 55 ++++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 46 insertions(+), 11 deletions(-) diff --git a/fs/base.py b/fs/base.py index 06b7acdc..e2dbd844 100644 --- a/fs/base.py +++ b/fs/base.py @@ -111,7 +111,7 @@ def __exit__( @property def glob(self): - """`~fs.glob.GLobber`: a globber object.. + """`~fs.glob.Globber`: a globber object.. """ return Globber(self) diff --git a/fs/glob.py b/fs/glob.py index 58bab469..37123546 100644 --- a/fs/glob.py +++ b/fs/glob.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +from collections import namedtuple import re from ._repr import make_repr @@ -7,6 +8,15 @@ from . import wildcard +Counts = namedtuple("Counts", ["files", "directories", "data"]) + + +if False: # typing.TYPE_CHECKING + from typing import Iterator, List, Optional, Tuple + from .base import FS + from .info import Info + + def _translate_glob(pattern, case_sensitive=True): levels = 0 recursive = False @@ -35,7 +45,10 @@ def _glob(fs, pattern, path="/", namespaces=None, case_sensitive=True): pattern, case_sensitive=case_sensitive ) for path, info in fs.walk.info( - path=path, namespaces=namespaces, max_depth=None if recursive else levels + path=path, + namespaces=namespaces, + max_depth=None if recursive else levels, + search="depth" if pattern.endswith("/") else "breadth", ): if info.is_dir: path += "/" @@ -44,7 +57,8 @@ def _glob(fs, pattern, path="/", namespaces=None, case_sensitive=True): class GlobGenerator(object): - def __init__(self, fs, pattern, path='/', namespaces=None, case_sensitive=True): + def __init__(self, fs, pattern, path="/", namespaces=None, case_sensitive=True): + # type: (FS, str, str, Optional[List[str]], bool) -> None self.fs = fs self.pattern = pattern self.path = path @@ -56,12 +70,13 @@ def __repr__(self): self.__class__.__name__, self.fs, self.pattern, - path=(self.path, '/'), + path=(self.path, "/"), namespaces=(self.namespaces, None), case_sensitive=(self.case_sensitive, True), ) def __iter__(self): + # type: () -> Iterator[Tuple[str, Info]] for path, info in _glob( self.fs, self.pattern, @@ -72,16 +87,34 @@ def __iter__(self): yield path, info def count(self): - size = 0 + # type: () -> Tuple[int, int, int] + directories = 0 + files = 0 + data = 0 for path, info in _glob( self.fs, self.pattern, path=self.path, - namespaces=['details'], + namespaces=["details"], case_sensitive=self.case_sensitive, ): - size += info.size - return size + if info.is_dir: + directories += 1 + else: + files += 1 + data += info.size + return Counts(directories=directories, files=files, data=data) + + def remove(self): + # type: () -> int + removes = 0 + for path, info in self: + if info.is_dir: + self.fs.removedir(path) + else: + self.fs.remove(path) + removes += 1 + return removes class Globber(object): @@ -89,12 +122,14 @@ class Globber(object): __slots__ = ["fs"] def __init__(self, fs): + # type: (FS) -> None self.fs = fs def __repr__(self): return make_repr(self.__class__.__name__, self.fs) def __call__(self, pattern, path="/", namespaces=None, case_sensitive=True): + # type: (str, str, Optional[List[str]], bool) -> GlobGenerator return GlobGenerator(self.fs, pattern, path, namespaces, case_sensitive) @@ -106,10 +141,10 @@ def __call__(self, pattern, path="/", namespaces=None, case_sensitive=True): print(m.glob) - print(m.glob('*.py')) + print(m.glob("*.py")) - for info, path in m.glob('*/*.py'): + for info, path in m.glob("*/*.py"): print(info) - print(m.glob('**/*.py').count()) + print(m.glob("**/*.py").count()) From 9870590a55ecf1aff85a59e747c41d41511b9d83 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Sun, 5 Aug 2018 15:31:21 +0100 Subject: [PATCH 03/13] count lines --- fs/glob.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/glob.py b/fs/glob.py index 37123546..39b5a91e 100644 --- a/fs/glob.py +++ b/fs/glob.py @@ -9,7 +9,7 @@ Counts = namedtuple("Counts", ["files", "directories", "data"]) - +LineCounts = namedtuple("LineCounts", ["lines", "non_blank"]) if False: # typing.TYPE_CHECKING from typing import Iterator, List, Optional, Tuple @@ -87,7 +87,7 @@ def __iter__(self): yield path, info def count(self): - # type: () -> Tuple[int, int, int] + # type: () -> Counts directories = 0 files = 0 data = 0 @@ -105,6 +105,18 @@ def count(self): data += info.size return Counts(directories=directories, files=files, data=data) + def count_lines(self): + # type: () -> LineCounts + lines = 0 + non_blank = 0 + for path, info in self: + if info.is_file: + for line in self.fs.open(path): + lines += 1 + if line.rstrip(): + non_blank += 1 + return LineCounts(lines=lines, non_blank=non_blank) + def remove(self): # type: () -> int removes = 0 @@ -148,3 +160,5 @@ def __call__(self, pattern, path="/", namespaces=None, case_sensitive=True): print(m.glob("**/*.py").count()) + print(m.glob("*/*.py").count_lines()) + From e720dc4f88a80b86cb176febac5bb0bdb48f3793 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Sun, 5 Aug 2018 21:25:47 +0100 Subject: [PATCH 04/13] Add tests --- fs/glob.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/glob.py b/fs/glob.py index 39b5a91e..48ce8a54 100644 --- a/fs/glob.py +++ b/fs/glob.py @@ -36,7 +36,7 @@ def _translate_glob(pattern, case_sensitive=True): return ( levels, recursive, - re.compile(re_glob, re.IGNORECASE if case_sensitive else None), + re.compile(re_glob, 0 if case_sensitive else re.IGNORECASE), ) @@ -102,7 +102,7 @@ def count(self): directories += 1 else: files += 1 - data += info.size + data += info.size return Counts(directories=directories, files=files, data=data) def count_lines(self): @@ -122,7 +122,7 @@ def remove(self): removes = 0 for path, info in self: if info.is_dir: - self.fs.removedir(path) + self.fs.removetree(path) else: self.fs.remove(path) removes += 1 @@ -145,7 +145,7 @@ def __call__(self, pattern, path="/", namespaces=None, case_sensitive=True): return GlobGenerator(self.fs, pattern, path, namespaces, case_sensitive) -if __name__ == "__main__": +if __name__ == "__main__": # pragma: no cover from fs import open_fs From 80f14e8148cedcc2a7215d6db85aaeb8330fdee7 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Sun, 5 Aug 2018 22:01:02 +0100 Subject: [PATCH 05/13] tests --- CHANGELOG.md | 7 ++++ fs/_version.py | 2 +- fs/glob.py | 12 +++++- fs/test.py | 7 ++++ tests/test_glob.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 tests/test_glob.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c9f4f4e..1a10f741 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [2.1.0] - Unreleased + +### Added + +- fs.glob support + ## [2.0.27] - 2018-08-05 ### Fixed @@ -159,6 +165,7 @@ No changes, pushed wrong branch to PyPi. ## [2.0.8] - 2017-08-13 ### Added + - Lstat info namespace - Link info namespace - FS.islink method diff --git a/fs/_version.py b/fs/_version.py index 48aae0cc..1b36bd0a 100644 --- a/fs/_version.py +++ b/fs/_version.py @@ -1,3 +1,3 @@ """Version, used in module and setup.py. """ -__version__ = "2.0.27" +__version__ = "2.1.0" diff --git a/fs/glob.py b/fs/glob.py index 48ce8a54..ca05732f 100644 --- a/fs/glob.py +++ b/fs/glob.py @@ -48,7 +48,7 @@ def _glob(fs, pattern, path="/", namespaces=None, case_sensitive=True): path=path, namespaces=namespaces, max_depth=None if recursive else levels, - search="depth" if pattern.endswith("/") else "breadth", + search="depth", ): if info.is_dir: path += "/" @@ -86,6 +86,16 @@ def __iter__(self): ): yield path, info + def files(self): + for path, info in self: + if info.is_dir: + yield path + + def dirs(self): + for path, info in self: + if info.is_file: + yield path + def count(self): # type: () -> Counts directories = 0 diff --git a/fs/test.py b/fs/test.py index 6c4b413f..da69547c 100644 --- a/fs/test.py +++ b/fs/test.py @@ -22,6 +22,7 @@ from fs import ResourceType, Seek from fs import errors from fs import walk +from fs import Glob from fs.opener import open_fs from fs.subfs import ClosingSubFS, SubFS @@ -1796,3 +1797,9 @@ def test_case_sensitive(self): self.assert_isdir("foo") self.assert_isdir("Foo") self.assert_isfile("fOO") + + def test_glob(self): + self.assertIsInstance( + self.fs.glob, + glob.Globber + ) diff --git a/tests/test_glob.py b/tests/test_glob.py new file mode 100644 index 00000000..d14bb8bc --- /dev/null +++ b/tests/test_glob.py @@ -0,0 +1,97 @@ +from __future__ import unicode_literals + +import unittest + +from fs import glob +from fs import open_fs + + +class TestGlob(unittest.TestCase): + + def setUp(self): + fs = self.fs = open_fs('mem://') + fs.settext('foo.py', 'Hello, World') + fs.touch('bar.py') + fs.touch('baz.py') + fs.makedirs('egg') + fs.settext('egg/foo.py', 'from fs import open_fs') + fs.touch('egg/foo.pyc') + fs.makedirs('a/b/c/').settext('foo.py', 'import fs') + repr(fs.glob) + + def test_count_1dir(self): + globber = glob.Globber(self.fs) + counts = globber('*.py').count() + self.assertEqual( + counts, + glob.Counts(files=3, directories=0, data=12) + ) + repr(globber('*.py')) + + def test_count_2dir(self): + globber = glob.Globber(self.fs) + counts = globber('*/*.py').count() + self.assertEqual( + counts, + glob.Counts(files=1, directories=0, data=22) + ) + + def test_count_recurse_dir(self): + globber = glob.Globber(self.fs) + counts = globber('**/*.py').count() + self.assertEqual( + counts, + glob.Counts(files=5, directories=0, data=43) + ) + + def test_count_lines(self): + globber = glob.Globber(self.fs) + line_counts = globber('**/*.py').count_lines() + self.assertEqual( + line_counts, + glob.LineCounts(lines=3, non_blank=3) + ) + + def test_count_dirs(self): + globber = glob.Globber(self.fs) + counts = globber('**/?/').count() + self.assertEqual( + counts, + glob.Counts(files=0, directories=3, data=0) + ) + + def test_count_all(self): + globber = glob.Globber(self.fs) + counts = globber('**').count() + self.assertEqual( + counts, + glob.Counts(files=6, directories=4, data=43) + ) + counts = globber('**/').count() + self.assertEqual( + counts, + glob.Counts(files=0, directories=4, data=0) + ) + + def test_remove(self): + globber = glob.Globber(self.fs) + self.assertTrue(self.fs.exists('egg/foo.pyc')) + removed_count = globber('**/*.pyc').remove() + self.assertEqual(removed_count, 1) + self.assertFalse(self.fs.exists('egg/foo.pyc')) + + def test_remove_dir(self): + globber = glob.Globber(self.fs) + self.assertTrue(self.fs.exists('egg/foo.pyc')) + removed_count = globber('**/?/').remove() + self.assertEqual(removed_count, 3) + self.assertFalse(self.fs.exists('a')) + self.assertTrue(self.fs.exists('egg')) + + def test_remove_all(self): + globber = glob.Globber(self.fs) + globber('**').remove() + self.assertEqual( + sorted(self.fs.listdir('/')), + [] + ) From a7d1b96ae264894beb7bc616a0b35eea0ae800d2 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Sun, 5 Aug 2018 22:16:20 +0100 Subject: [PATCH 06/13] Refactor iter --- fs/glob.py | 35 +++++++++++++++++------------------ fs/test.py | 2 +- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/fs/glob.py b/fs/glob.py index ca05732f..402df483 100644 --- a/fs/glob.py +++ b/fs/glob.py @@ -40,7 +40,7 @@ def _translate_glob(pattern, case_sensitive=True): ) -def _glob(fs, pattern, path="/", namespaces=None, case_sensitive=True): +def _glob(fs, pattern, search="breadth", path="/", namespaces=None, case_sensitive=True): levels, recursive, _re_glob = _translate_glob( pattern, case_sensitive=case_sensitive ) @@ -48,7 +48,7 @@ def _glob(fs, pattern, path="/", namespaces=None, case_sensitive=True): path=path, namespaces=namespaces, max_depth=None if recursive else levels, - search="depth", + search=search, ): if info.is_dir: path += "/" @@ -75,23 +75,29 @@ def __repr__(self): case_sensitive=(self.case_sensitive, True), ) - def __iter__(self): - # type: () -> Iterator[Tuple[str, Info]] - for path, info in _glob( + def _make_iter(self, search="breadth", namespaces=None): + # type: (str) -> Iterator[str, Tuple[str, Info]] + return _glob( self.fs, self.pattern, + search=search, path=self.path, - namespaces=self.namespaces, + namespaces=namespaces or self.namespaces, case_sensitive=self.case_sensitive, - ): - yield path, info + ) + + def __iter__(self): + # type: () -> Iterator[Tuple[str, Info]] + return self._make_iter() def files(self): + # type: () -> Iterator[str] for path, info in self: if info.is_dir: yield path def dirs(self): + # type: () -> Iterator[str] for path, info in self: if info.is_file: yield path @@ -101,13 +107,7 @@ def count(self): directories = 0 files = 0 data = 0 - for path, info in _glob( - self.fs, - self.pattern, - path=self.path, - namespaces=["details"], - case_sensitive=self.case_sensitive, - ): + for path, info in self._make_iter(namespaces=['details']): if info.is_dir: directories += 1 else: @@ -119,7 +119,7 @@ def count_lines(self): # type: () -> LineCounts lines = 0 non_blank = 0 - for path, info in self: + for path, info in self._make_iter(): if info.is_file: for line in self.fs.open(path): lines += 1 @@ -130,7 +130,7 @@ def count_lines(self): def remove(self): # type: () -> int removes = 0 - for path, info in self: + for path, info in self._make_iter(search='depth'): if info.is_dir: self.fs.removetree(path) else: @@ -140,7 +140,6 @@ def remove(self): class Globber(object): - __slots__ = ["fs"] def __init__(self, fs): diff --git a/fs/test.py b/fs/test.py index da69547c..d065dd0f 100644 --- a/fs/test.py +++ b/fs/test.py @@ -22,7 +22,7 @@ from fs import ResourceType, Seek from fs import errors from fs import walk -from fs import Glob +from fs import glob from fs.opener import open_fs from fs.subfs import ClosingSubFS, SubFS From 5d227e925076314f10e163e84ae8203052997a0d Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Sat, 11 Aug 2018 11:08:46 +0100 Subject: [PATCH 07/13] docs --- docs/source/conf.py | 2 + docs/source/globbing.rst | 56 +++++++++++ docs/source/index.rst | 1 + docs/source/reference.rst | 1 + docs/source/reference/glob.rst | 5 + fs/glob.py | 176 ++++++++++++++++++++++----------- fs/wildcard.py | 5 +- requirements.txt | 2 +- tests/test_glob.py | 116 +++++++++++----------- 9 files changed, 244 insertions(+), 120 deletions(-) create mode 100644 docs/source/globbing.rst create mode 100644 docs/source/reference/glob.rst diff --git a/docs/source/conf.py b/docs/source/conf.py index f3b85c68..97e58dac 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -302,3 +302,5 @@ # If true, do not generate a @detailmenu in the "Top" node's menu. #texinfo_no_detailmenu = False + +napoleon_include_special_with_doc = True \ No newline at end of file diff --git a/docs/source/globbing.rst b/docs/source/globbing.rst new file mode 100644 index 00000000..085e89a5 --- /dev/null +++ b/docs/source/globbing.rst @@ -0,0 +1,56 @@ +.. _globbing: + +Globbing +======== + +Globbinng is the process of matching paths according to the rules used +by the Unix shell. + +Generally speaking, you can think of a glob pattern as a path containing +one or more wildcard patterns. For instance ``"*.py"`` is a valid glob +pattern that will match all Python files in the current directory. + + +Matching Files and Directories +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``*`` + Matches all files in the current directory. +``*.py`` + Matches all .py file in the current directory. +``*.py?`` + Matches all .py files and .pyi, .pyc etc in the currenct directory. +``project/*.py`` + Matches all .py files in a directory called ``project``. +``*/*.py`` + Matches all .py files in any sub directory. +``**/*.py`` + Recursively matches all .py files. + + +Matching Directories +~~~~~~~~~~~~~~~~~~~~ + +You can specify that you only want to match a directory by appending +a forward slash to the pattern. + +``**/.git/`` + Recursively matches all the git directories. + + +Glob Interface +============== + +PyFilesystem supports globbing via the ``glob`` object on every FS instance. +Here's how you might use it to find all the Python files in your filesystem:: + + for path, info in my_fs.glob("**/*.py"): + print(path) + +If you call ``.glob`` with a pattern it will return an iterator of every +path and corresponding :class:`~fs.info.Info` object of any matching path. + + +Glob Methods +~~~~~~~~~~~~ + diff --git a/docs/source/index.rst b/docs/source/index.rst index cd007d78..a393ff26 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -17,6 +17,7 @@ Contents: info.rst openers.rst walking.rst + globbing.rst builtin.rst implementers.rst extension.rst diff --git a/docs/source/reference.rst b/docs/source/reference.rst index 634263f7..0cc860d7 100644 --- a/docs/source/reference.rst +++ b/docs/source/reference.rst @@ -9,6 +9,7 @@ Reference reference/copy.rst reference/enums.rst reference/errors.rst + reference/glob.rst reference/info_objects.rst reference/filesize.rst reference/mirror.rst diff --git a/docs/source/reference/glob.rst b/docs/source/reference/glob.rst new file mode 100644 index 00000000..172f2ea8 --- /dev/null +++ b/docs/source/reference/glob.rst @@ -0,0 +1,5 @@ +fs.glob +======= + +.. automodule:: fs.glob + :members: diff --git a/fs/glob.py b/fs/glob.py index 402df483..1b717293 100644 --- a/fs/glob.py +++ b/fs/glob.py @@ -3,11 +3,17 @@ from collections import namedtuple import re +from .lrucache import LRUCache from ._repr import make_repr from . import path from . import wildcard +_PATTERN_CACHE = LRUCache( + 1000 +) # type: LRUCache[Tuple[Text, bool], Tuple[int, bool, Pattern]] + + Counts = namedtuple("Counts", ["files", "directories", "data"]) LineCounts = namedtuple("LineCounts", ["lines", "non_blank"]) @@ -30,9 +36,7 @@ def _translate_glob(pattern, case_sensitive=True): "/" + wildcard._translate(component, case_sensitive=case_sensitive) ) levels += 1 - re_glob = ( - "(?ms)" + "".join(re_patterns) + ("/\Z" if pattern.endswith("/") else "\Z") - ) + re_glob = "(?ms)^" + "".join(re_patterns) + ("/$" if pattern.endswith("/") else "$") return ( levels, recursive, @@ -40,30 +44,55 @@ def _translate_glob(pattern, case_sensitive=True): ) -def _glob(fs, pattern, search="breadth", path="/", namespaces=None, case_sensitive=True): - levels, recursive, _re_glob = _translate_glob( - pattern, case_sensitive=case_sensitive - ) - for path, info in fs.walk.info( - path=path, - namespaces=namespaces, - max_depth=None if recursive else levels, - search=search, - ): - if info.is_dir: - path += "/" - if _re_glob.match(path): - yield path, info +def match(pattern, path): + try: + levels, recursive, re_pattern = _PATTERN_CACHE[(pattern, True)] + except KeyError: + levels, recursive, re_pattern = _translate_glob(pattern, case_sensitive=True) + _PATTERN_CACHE[(pattern, True)] = (levels, recursive, re_pattern) + return bool(re_pattern.match(path)) -class GlobGenerator(object): - def __init__(self, fs, pattern, path="/", namespaces=None, case_sensitive=True): - # type: (FS, str, str, Optional[List[str]], bool) -> None +def imatch(pattern, path): + try: + levels, recursive, re_pattern = _PATTERN_CACHE[(pattern, False)] + except KeyError: + levels, recursive, re_pattern = _translate_glob(pattern, case_sensitive=True) + _PATTERN_CACHE[(pattern, False)] = (levels, recursive, re_pattern) + return bool(re_pattern.match(path)) + + +class Globber(object): + """A generator of glob results. + + Arguments: + fs (~fs.base.FS): A filesystem object + pattern (str): A glob pattern, e.g. ``"**/*.py"` + namespaces (list): A list of additional info namespaces. + case_sensitive (bool): If ``True``, the path matching will be + case *sensitive* i.e. ``"FOO.py"`` and ``"foo.py"`` will + be different, otherwise path matching will be case *insensitive*. + exclude_dirs (list): A list of patterns to exclude when searching, + e.g. ``["*.git"]``. + + """ + + def __init__( + self, + fs, + pattern, + path="/", + namespaces=None, + case_sensitive=True, + exclude_dirs=None, + ): + # type: (FS, str, str, Optional[List[str]], bool, Optional[List[str]]) -> None self.fs = fs self.pattern = pattern self.path = path self.namespaces = namespaces self.case_sensitive = case_sensitive + self.exclude_dirs = exclude_dirs def __repr__(self): return make_repr( @@ -73,41 +102,56 @@ def __repr__(self): path=(self.path, "/"), namespaces=(self.namespaces, None), case_sensitive=(self.case_sensitive, True), + exclude_dirs=(self.exclude_dirs, None), ) def _make_iter(self, search="breadth", namespaces=None): - # type: (str) -> Iterator[str, Tuple[str, Info]] - return _glob( - self.fs, - self.pattern, - search=search, + try: + levels, recursive, re_pattern = _PATTERN_CACHE[ + (self.pattern, self.case_sensitive) + ] + except KeyError: + levels, recursive, re_pattern = _translate_glob( + self.pattern, case_sensitive=self.case_sensitive + ) + + for path, info in self.fs.walk.info( path=self.path, namespaces=namespaces or self.namespaces, - case_sensitive=self.case_sensitive, - ) + max_depth=None if recursive else levels, + search=search, + exclude_dirs=self.exclude_dirs, + ): + if info.is_dir: + path += "/" + if re_pattern.match(path): + yield path, info def __iter__(self): # type: () -> Iterator[Tuple[str, Info]] return self._make_iter() - def files(self): - # type: () -> Iterator[str] - for path, info in self: - if info.is_dir: - yield path + def count(self): + """Count files / directories / data in matched paths. - def dirs(self): - # type: () -> Iterator[str] - for path, info in self: - if info.is_file: - yield path + Example:: - def count(self): + counts = Globber(my_fs, '*.py').counts() + print(f"files={counts.files}") + print(f"directories={counts.directories}") + print(f"data={count.data} bytes") + + Returns: + `~Counts`: A named tuple containing results. + + + + """ # type: () -> Counts directories = 0 files = 0 data = 0 - for path, info in self._make_iter(namespaces=['details']): + for path, info in self._make_iter(namespaces=["details"]): if info.is_dir: directories += 1 else: @@ -130,7 +174,7 @@ def count_lines(self): def remove(self): # type: () -> int removes = 0 - for path, info in self._make_iter(search='depth'): + for path, info in self._make_iter(search="depth"): if info.is_dir: self.fs.removetree(path) else: @@ -139,7 +183,14 @@ def remove(self): return removes -class Globber(object): +class BoundGlobber(object): + """An object which searches a filesystem for paths matching a *glob* + pattern. + + Arguments: + fs (FS): A filesystem object. + + """ __slots__ = ["fs"] def __init__(self, fs): @@ -149,25 +200,32 @@ def __init__(self, fs): def __repr__(self): return make_repr(self.__class__.__name__, self.fs) - def __call__(self, pattern, path="/", namespaces=None, case_sensitive=True): - # type: (str, str, Optional[List[str]], bool) -> GlobGenerator - return GlobGenerator(self.fs, pattern, path, namespaces, case_sensitive) - - -if __name__ == "__main__": # pragma: no cover - - from fs import open_fs - - m = open_fs("~/projects/moya") - - print(m.glob) - - print(m.glob("*.py")) + def __call__( + self, pattern, path="/", namespaces=None, case_sensitive=True, exclude_dirs=None + ): + """A generator of glob results. - for info, path in m.glob("*/*.py"): - print(info) + Arguments: + pattern (str): A glob pattern, e.g. ``"**/*.py"`` + namespaces (list): A list of additional info namespaces. + case_sensitive (bool): If ``True``, the path matching will be + case *sensitive* i.e. ``"FOO.py"`` and ``"foo.py"`` will + be different, otherwise path matching will be case **insensitive**. + exclude_dirs (list): A list of patterns to exclude when searching, + e.g. ``["*.git"]``. - print(m.glob("**/*.py").count()) + Returns: + ~fs.flob.Globber: An object that may be iterated over, + yielding tuples of ``(path, info)`` for matching paths. - print(m.glob("*/*.py").count_lines()) + """ + # type: (str, str, Optional[List[str]], bool, Optional[List[str]]) -> Globber + return GlobMatcher( + self.fs, + pattern, + path, + namespaces=namespaces, + case_sensitive=case_sensitive, + exclude_dirs=exclude_dirs, + ) diff --git a/fs/wildcard.py b/fs/wildcard.py index 43427f5a..a56be0b0 100644 --- a/fs/wildcard.py +++ b/fs/wildcard.py @@ -15,8 +15,7 @@ from typing import Callable, Iterable, MutableMapping, Text, Tuple, Pattern -_MAXCACHE = 1000 -_PATTERN_CACHE = LRUCache(_MAXCACHE) # type: LRUCache[Tuple[Text, bool], Pattern] +_PATTERN_CACHE = LRUCache(1000) # type: LRUCache[Tuple[Text, bool], Pattern] def match(pattern, name): @@ -106,7 +105,7 @@ def get_matcher(patterns, case_sensitive): Arguments: patterns (list): A list of wildcard pattern. e.g. ``["*.py", "*.pyc"]`` - case_sensitive (bool): If `True`, then the callable will be case + case_sensitive (bool): If ``True``, then the callable will be case sensitive, otherwise it will be case insensitive. Returns: diff --git a/requirements.txt b/requirements.txt index 668c6793..0b0438f9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -appdirs==1.4.0 +appdirs~=1.4.3 backports.os==0.1.1; python_version == '2.7' enum34==1.1.6 ; python_version < '3.4' pytz diff --git a/tests/test_glob.py b/tests/test_glob.py index d14bb8bc..9104bfc8 100644 --- a/tests/test_glob.py +++ b/tests/test_glob.py @@ -7,91 +7,93 @@ class TestGlob(unittest.TestCase): - def setUp(self): - fs = self.fs = open_fs('mem://') - fs.settext('foo.py', 'Hello, World') - fs.touch('bar.py') - fs.touch('baz.py') - fs.makedirs('egg') - fs.settext('egg/foo.py', 'from fs import open_fs') - fs.touch('egg/foo.pyc') - fs.makedirs('a/b/c/').settext('foo.py', 'import fs') + fs = self.fs = open_fs("mem://") + fs.settext("foo.py", "Hello, World") + fs.touch("bar.py") + fs.touch("baz.py") + fs.makedirs("egg") + fs.settext("egg/foo.py", "from fs import open_fs") + fs.touch("egg/foo.pyc") + fs.makedirs("a/b/c/").settext("foo.py", "import fs") repr(fs.glob) + def test_match(self): + tests = [ + ("*.?y", "/test.py", True), + ("*.py", "/test.py", True), + ("*.py", "/test.pc", False), + ("*.py", "/foo/test.py", False), + ("foo/*.py", "/foo/test.py", True), + ("foo/*.py", "/bar/foo/test.py", False), + ("?oo/*.py", "/foo/test.py", True), + ("*/*.py", "/foo/test.py", True), + ("foo/*.py", "/bar/foo/test.py", False), + ("**/foo/*.py", "/bar/foo/test.py", True), + ("foo/**/bar/*.py", "/foo/bar/test.py", True), + ("foo/**/bar/*.py", "/foo/baz/egg/bar/test.py", True), + ("foo/**/bar/*.py", "/foo/baz/egg/bar/egg/test.py", False), + ("**", "/test.py", True), + ("**", "/test", True), + ("**", "/test/", True), + ("**/", "/test/", True), + ("**/", "/test.py", False), + ] + for pattern, path, expected in tests: + self.assertEqual(glob.match(pattern, path), expected) + # Run a second time to test cache + for pattern, path, expected in tests: + self.assertEqual(glob.match(pattern, path), expected) + def test_count_1dir(self): globber = glob.Globber(self.fs) - counts = globber('*.py').count() - self.assertEqual( - counts, - glob.Counts(files=3, directories=0, data=12) - ) - repr(globber('*.py')) + counts = globber("*.py").count() + self.assertEqual(counts, glob.Counts(files=3, directories=0, data=12)) + repr(globber("*.py")) def test_count_2dir(self): globber = glob.Globber(self.fs) - counts = globber('*/*.py').count() - self.assertEqual( - counts, - glob.Counts(files=1, directories=0, data=22) - ) + counts = globber("*/*.py").count() + self.assertEqual(counts, glob.Counts(files=1, directories=0, data=22)) def test_count_recurse_dir(self): globber = glob.Globber(self.fs) - counts = globber('**/*.py').count() - self.assertEqual( - counts, - glob.Counts(files=5, directories=0, data=43) - ) + counts = globber("**/*.py").count() + self.assertEqual(counts, glob.Counts(files=5, directories=0, data=43)) def test_count_lines(self): globber = glob.Globber(self.fs) - line_counts = globber('**/*.py').count_lines() - self.assertEqual( - line_counts, - glob.LineCounts(lines=3, non_blank=3) - ) + line_counts = globber("**/*.py").count_lines() + self.assertEqual(line_counts, glob.LineCounts(lines=3, non_blank=3)) def test_count_dirs(self): globber = glob.Globber(self.fs) - counts = globber('**/?/').count() - self.assertEqual( - counts, - glob.Counts(files=0, directories=3, data=0) - ) + counts = globber("**/?/").count() + self.assertEqual(counts, glob.Counts(files=0, directories=3, data=0)) def test_count_all(self): globber = glob.Globber(self.fs) - counts = globber('**').count() - self.assertEqual( - counts, - glob.Counts(files=6, directories=4, data=43) - ) - counts = globber('**/').count() - self.assertEqual( - counts, - glob.Counts(files=0, directories=4, data=0) - ) + counts = globber("**").count() + self.assertEqual(counts, glob.Counts(files=6, directories=4, data=43)) + counts = globber("**/").count() + self.assertEqual(counts, glob.Counts(files=0, directories=4, data=0)) def test_remove(self): globber = glob.Globber(self.fs) - self.assertTrue(self.fs.exists('egg/foo.pyc')) - removed_count = globber('**/*.pyc').remove() + self.assertTrue(self.fs.exists("egg/foo.pyc")) + removed_count = globber("**/*.pyc").remove() self.assertEqual(removed_count, 1) - self.assertFalse(self.fs.exists('egg/foo.pyc')) + self.assertFalse(self.fs.exists("egg/foo.pyc")) def test_remove_dir(self): globber = glob.Globber(self.fs) - self.assertTrue(self.fs.exists('egg/foo.pyc')) - removed_count = globber('**/?/').remove() + self.assertTrue(self.fs.exists("egg/foo.pyc")) + removed_count = globber("**/?/").remove() self.assertEqual(removed_count, 3) - self.assertFalse(self.fs.exists('a')) - self.assertTrue(self.fs.exists('egg')) + self.assertFalse(self.fs.exists("a")) + self.assertTrue(self.fs.exists("egg")) def test_remove_all(self): globber = glob.Globber(self.fs) - globber('**').remove() - self.assertEqual( - sorted(self.fs.listdir('/')), - [] - ) + globber("**").remove() + self.assertEqual(sorted(self.fs.listdir("/")), []) From cf7b20e33efd47f54dc4a717c79470aba5436b14 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Sat, 11 Aug 2018 17:42:09 +0100 Subject: [PATCH 08/13] docs and refactor --- docs/source/globbing.rst | 46 +++++++++++++++-------- docs/source/guide.rst | 14 +++++++ fs/_bulk.py | 1 - fs/base.py | 28 ++++---------- fs/glob.py | 80 +++++++++++++++++++++++++++++++++------- fs/test.py | 2 +- tests/test_copy.py | 6 +++ tests/test_errors.py | 1 + tests/test_glob.py | 18 ++++----- 9 files changed, 135 insertions(+), 61 deletions(-) diff --git a/docs/source/globbing.rst b/docs/source/globbing.rst index 085e89a5..084383ac 100644 --- a/docs/source/globbing.rst +++ b/docs/source/globbing.rst @@ -3,17 +3,27 @@ Globbing ======== -Globbinng is the process of matching paths according to the rules used +Globbing is the process of matching paths according to the rules used by the Unix shell. Generally speaking, you can think of a glob pattern as a path containing -one or more wildcard patterns. For instance ``"*.py"`` is a valid glob -pattern that will match all Python files in the current directory. +one or more wildcard patterns, separated by forward slashes. Matching Files and Directories ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In a glob pattern, A ``*`` means match anything text in a filename. A ``?`` +matches any single character. A ``**`` matches any number of subdirectories, +making the glob *recusrive*. If the glob pattern ends in a ``/``, it will +only match directory paths, otherwise it will match files and directories. + +.. note:: + A recursive glob requires that PyFilesystem scan a lot of files, + and can potentially be slow for large (or network based) filesystems. + +Here's a summary of glob patterns: + ``*`` Matches all files in the current directory. ``*.py`` @@ -26,31 +36,35 @@ Matching Files and Directories Matches all .py files in any sub directory. ``**/*.py`` Recursively matches all .py files. - - -Matching Directories -~~~~~~~~~~~~~~~~~~~~ - -You can specify that you only want to match a directory by appending -a forward slash to the pattern. - ``**/.git/`` Recursively matches all the git directories. Glob Interface -============== +~~~~~~~~~~~~~~ -PyFilesystem supports globbing via the ``glob`` object on every FS instance. -Here's how you might use it to find all the Python files in your filesystem:: +PyFilesystem supports globbing via the ``glob`` attribute on every FS +instance, which is an instance of :class:`~fs.glob.BoundGlobber`. Here's +how you might use it to find all the Python files in your filesystem:: for path, info in my_fs.glob("**/*.py"): print(path) -If you call ``.glob`` with a pattern it will return an iterator of every -path and corresponding :class:`~fs.info.Info` object of any matching path. +Calling ``.glob`` with a pattern will return an iterator of every +path and corresponding :class:`~fs.info.Info` for each matched file and +directory. Glob Methods ~~~~~~~~~~~~ +In addition to iterating over the results, you can also call methods on +the :class:`~fs.glob.Globber` which apply to every matched path. + +For instance, here is how you can use glob to remove all ``.pyc`` files +from a project directory:: + + >>> import fs + >>> fs.open_fs('~/projects/my_project').glob('**/*.pyc').remove() + 29 + diff --git a/docs/source/guide.rst b/docs/source/guide.rst index e6500a5c..6d35d962 100644 --- a/docs/source/guide.rst +++ b/docs/source/guide.rst @@ -196,6 +196,20 @@ The ``walk`` attribute on FS objects is instance of a :class:`~fs.walk.BoundWalk See :ref:`walking` for more information on walking directories. +Globbing +~~~~~~~~ + +*Globbing* is a slightly higher level way of scanning filesystem. Paths can be filtered by a *glob* pattern, which is similar to a wildcard (such as *.py), but can match multiple levels of a directory structure. + +Here's an example of globbing, which removes all the ``.pyc`` files in your project directory:: + + >>> from fs import open_fs + >>> open_fs('~/project').glob('**/*.pyc').remove() + 62 + +See :ref:`globbing` for more information. + + Moving and Copying ~~~~~~~~~~~~~~~~~~ diff --git a/fs/_bulk.py b/fs/_bulk.py index 580908db..751f0f58 100644 --- a/fs/_bulk.py +++ b/fs/_bulk.py @@ -78,7 +78,6 @@ def __init__(self, num_workers=4): # type: (int) -> None if num_workers < 0: raise ValueError("num_workers must be >= 0") - self.num_workers = num_workers self.queue = None # type: Optional[Queue[_Task]] self.workers = [] # type: List[_Worker] self.errors = [] # type: List[Exception] diff --git a/fs/base.py b/fs/base.py index e2dbd844..ee5f4e17 100644 --- a/fs/base.py +++ b/fs/base.py @@ -6,35 +6,23 @@ """ -from __future__ import absolute_import -from __future__ import print_function -from __future__ import unicode_literals +from __future__ import absolute_import, print_function, unicode_literals import abc +import itertools import os import threading import time import typing -from functools import partial - from contextlib import closing -import itertools +from functools import partial import six -from . import copy -from . import errors -from . import fsencode -from . import iotools -from . import move -from . import tools -from . import walk -from . import wildcard -from .glob import Globber +from . import copy, errors, fsencode, iotools, move, tools, walk, wildcard +from .glob import BoundGlobber from .mode import validate_open_mode -from .path import abspath -from .path import join -from .path import normpath +from .path import abspath, join, normpath from .time import datetime_to_epoch from .walk import Walker @@ -111,9 +99,9 @@ def __exit__( @property def glob(self): - """`~fs.glob.Globber`: a globber object.. + """`~fs.glob.BoundGlobber`: a globber object.. """ - return Globber(self) + return BoundGlobber(self) @property def walk(self): diff --git a/fs/glob.py b/fs/glob.py index 1b717293..cbada442 100644 --- a/fs/glob.py +++ b/fs/glob.py @@ -45,6 +45,23 @@ def _translate_glob(pattern, case_sensitive=True): def match(pattern, path): + # type: (str, str) -> bool + """Compare a glob pattern with a path (case sensitive). + + Arguments: + pattern (str): A glob pattern. + path (str): A path. + + Returns: + bool: ``True`` if the path matches the pattern. + + Example: + + >>> from fs.glob import match + >>> match("**/*.py", "/fs/glob.py") + True + + """ try: levels, recursive, re_pattern = _PATTERN_CACHE[(pattern, True)] except KeyError: @@ -54,6 +71,17 @@ def match(pattern, path): def imatch(pattern, path): + # type: (str, str) -> bool + """Compare a glob pattern with a path (case insensitive). + + Arguments: + pattern (str): A glob pattern. + path (str): A path. + + Returns: + bool: ``True`` if the path matches the pattern. + + """ try: levels, recursive, re_pattern = _PATTERN_CACHE[(pattern, False)] except KeyError: @@ -67,7 +95,8 @@ class Globber(object): Arguments: fs (~fs.base.FS): A filesystem object - pattern (str): A glob pattern, e.g. ``"**/*.py"` + pattern (str): A glob pattern, e.g. ``"**/*.py"`` + path (str): A path to a directory in the filesystem. namespaces (list): A list of additional info namespaces. case_sensitive (bool): If ``True``, the path matching will be case *sensitive* i.e. ``"FOO.py"`` and ``"foo.py"`` will @@ -134,18 +163,14 @@ def __iter__(self): def count(self): """Count files / directories / data in matched paths. - Example:: - - counts = Globber(my_fs, '*.py').counts() - print(f"files={counts.files}") - print(f"directories={counts.directories}") - print(f"data={count.data} bytes") + Example: + >>> import fs + >>> fs.open_fs('~/projects').glob('**/*.py').count() + Counts(files=18519, directories=0, data=206690458) Returns: `~Counts`: A named tuple containing results. - - """ # type: () -> Counts directories = 0 @@ -160,18 +185,41 @@ def count(self): return Counts(directories=directories, files=files, data=data) def count_lines(self): + """Count the lines in the matched files. + + Returns: + `~LineCounts`: A named tuple containing line counts. + + Example: + >>> import fs + >>> fs.open_fs('~/projects').glob('**/*.py').count_lines() + LineCounts(lines=5767102, non_blank=4915110) + + """ + # type: () -> LineCounts lines = 0 non_blank = 0 for path, info in self._make_iter(): if info.is_file: - for line in self.fs.open(path): + for line in self.fs.open(path, "rb"): lines += 1 if line.rstrip(): non_blank += 1 return LineCounts(lines=lines, non_blank=non_blank) def remove(self): + """Removed all matched paths. + + Returns: + int: Number of file and directories removed. + + Example: + >>> import fs + >>> fs.open_fs('~/projects/my_project').glob('**/*.pyc').remove() + 29 + + """ # type: () -> int removes = 0 for path, info in self._make_iter(search="depth"): @@ -184,13 +232,16 @@ def remove(self): class BoundGlobber(object): - """An object which searches a filesystem for paths matching a *glob* - pattern. + """A :class:`~Globber` object bound to a filesystem. + + An instance of this object is available on every Filesystem object + as ``.glob``. Arguments: fs (FS): A filesystem object. """ + __slots__ = ["fs"] def __init__(self, fs): @@ -215,13 +266,14 @@ def __call__( e.g. ``["*.git"]``. Returns: - ~fs.flob.Globber: An object that may be iterated over, + `~Globber`: + An object that may be iterated over, yielding tuples of ``(path, info)`` for matching paths. """ # type: (str, str, Optional[List[str]], bool, Optional[List[str]]) -> Globber - return GlobMatcher( + return Globber( self.fs, pattern, path, diff --git a/fs/test.py b/fs/test.py index d065dd0f..04bbbb46 100644 --- a/fs/test.py +++ b/fs/test.py @@ -1801,5 +1801,5 @@ def test_case_sensitive(self): def test_glob(self): self.assertIsInstance( self.fs.glob, - glob.Globber + glob.BoundGlobber ) diff --git a/tests/test_copy.py b/tests/test_copy.py index f0958bcc..0bea13a2 100644 --- a/tests/test_copy.py +++ b/tests/test_copy.py @@ -29,6 +29,12 @@ def test_copy_fs(self): self.assertTrue(dst_fs.isdir("foo/bar")) self.assertTrue(dst_fs.isfile("test.txt")) + def test_copy_value_error(self): + src_fs = open_fs("mem://") + dst_fs = open_fs("mem://") + with self.assertRaises(ValueError): + fs.copy.copy_fs(src_fs, dst_ds, workers=-1) + def test_copy_dir(self): src_fs = open_fs("mem://") src_fs.makedirs("foo/bar") diff --git a/tests/test_errors.py b/tests/test_errors.py index 8599fb85..0b78fd15 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -30,6 +30,7 @@ def test_raise_in_multiprocessing(self): [errors.NoURL, "some_path", "some_purpose"], [errors.Unsupported], [errors.IllegalBackReference, "path"], + [errors.MissingInfoNamespace, "path"] ] try: pool = multiprocessing.Pool(1) diff --git a/tests/test_glob.py b/tests/test_glob.py index 9104bfc8..54719b3d 100644 --- a/tests/test_glob.py +++ b/tests/test_glob.py @@ -46,47 +46,47 @@ def test_match(self): self.assertEqual(glob.match(pattern, path), expected) def test_count_1dir(self): - globber = glob.Globber(self.fs) + globber = glob.BoundGlobber(self.fs) counts = globber("*.py").count() self.assertEqual(counts, glob.Counts(files=3, directories=0, data=12)) repr(globber("*.py")) def test_count_2dir(self): - globber = glob.Globber(self.fs) + globber = glob.BoundGlobber(self.fs) counts = globber("*/*.py").count() self.assertEqual(counts, glob.Counts(files=1, directories=0, data=22)) def test_count_recurse_dir(self): - globber = glob.Globber(self.fs) + globber = glob.BoundGlobber(self.fs) counts = globber("**/*.py").count() self.assertEqual(counts, glob.Counts(files=5, directories=0, data=43)) def test_count_lines(self): - globber = glob.Globber(self.fs) + globber = glob.BoundGlobber(self.fs) line_counts = globber("**/*.py").count_lines() self.assertEqual(line_counts, glob.LineCounts(lines=3, non_blank=3)) def test_count_dirs(self): - globber = glob.Globber(self.fs) + globber = glob.BoundGlobber(self.fs) counts = globber("**/?/").count() self.assertEqual(counts, glob.Counts(files=0, directories=3, data=0)) def test_count_all(self): - globber = glob.Globber(self.fs) + globber = glob.BoundGlobber(self.fs) counts = globber("**").count() self.assertEqual(counts, glob.Counts(files=6, directories=4, data=43)) counts = globber("**/").count() self.assertEqual(counts, glob.Counts(files=0, directories=4, data=0)) def test_remove(self): - globber = glob.Globber(self.fs) + globber = glob.BoundGlobber(self.fs) self.assertTrue(self.fs.exists("egg/foo.pyc")) removed_count = globber("**/*.pyc").remove() self.assertEqual(removed_count, 1) self.assertFalse(self.fs.exists("egg/foo.pyc")) def test_remove_dir(self): - globber = glob.Globber(self.fs) + globber = glob.BoundGlobber(self.fs) self.assertTrue(self.fs.exists("egg/foo.pyc")) removed_count = globber("**/?/").remove() self.assertEqual(removed_count, 3) @@ -94,6 +94,6 @@ def test_remove_dir(self): self.assertTrue(self.fs.exists("egg")) def test_remove_all(self): - globber = glob.Globber(self.fs) + globber = glob.BoundGlobber(self.fs) globber("**").remove() self.assertEqual(sorted(self.fs.listdir("/")), []) From 93ddd8ca10a3f0baf0917b99f30541ac5fb4e0c9 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Sat, 11 Aug 2018 17:48:27 +0100 Subject: [PATCH 09/13] polish --- fs/_bulk.py | 1 + fs/glob.py | 12 ++++++------ fs/wildcard.py | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/_bulk.py b/fs/_bulk.py index 751f0f58..580908db 100644 --- a/fs/_bulk.py +++ b/fs/_bulk.py @@ -78,6 +78,7 @@ def __init__(self, num_workers=4): # type: (int) -> None if num_workers < 0: raise ValueError("num_workers must be >= 0") + self.num_workers = num_workers self.queue = None # type: Optional[Queue[_Task]] self.workers = [] # type: List[_Worker] self.errors = [] # type: List[Exception] diff --git a/fs/glob.py b/fs/glob.py index cbada442..702357fe 100644 --- a/fs/glob.py +++ b/fs/glob.py @@ -5,7 +5,7 @@ from .lrucache import LRUCache from ._repr import make_repr -from . import path +from .path import iteratepath from . import wildcard @@ -27,7 +27,7 @@ def _translate_glob(pattern, case_sensitive=True): levels = 0 recursive = False re_patterns = [""] - for component in path.iteratepath(pattern): + for component in iteratepath(pattern): if component == "**": re_patterns.append(".*/?") recursive = True @@ -161,6 +161,7 @@ def __iter__(self): return self._make_iter() def count(self): + # type: () -> Counts """Count files / directories / data in matched paths. Example: @@ -172,7 +173,6 @@ def count(self): `~Counts`: A named tuple containing results. """ - # type: () -> Counts directories = 0 files = 0 data = 0 @@ -185,6 +185,7 @@ def count(self): return Counts(directories=directories, files=files, data=data) def count_lines(self): + # type: () -> LineCounts """Count the lines in the matched files. Returns: @@ -197,7 +198,6 @@ def count_lines(self): """ - # type: () -> LineCounts lines = 0 non_blank = 0 for path, info in self._make_iter(): @@ -209,6 +209,7 @@ def count_lines(self): return LineCounts(lines=lines, non_blank=non_blank) def remove(self): + # type: () -> int """Removed all matched paths. Returns: @@ -220,7 +221,6 @@ def remove(self): 29 """ - # type: () -> int removes = 0 for path, info in self._make_iter(search="depth"): if info.is_dir: @@ -254,6 +254,7 @@ def __repr__(self): def __call__( self, pattern, path="/", namespaces=None, case_sensitive=True, exclude_dirs=None ): + # type: (str, str, Optional[List[str]], bool, Optional[List[str]]) -> Globber """A generator of glob results. Arguments: @@ -272,7 +273,6 @@ def __call__( """ - # type: (str, str, Optional[List[str]], bool, Optional[List[str]]) -> Globber return Globber( self.fs, pattern, diff --git a/fs/wildcard.py b/fs/wildcard.py index a56be0b0..b9f58591 100644 --- a/fs/wildcard.py +++ b/fs/wildcard.py @@ -33,7 +33,7 @@ def match(pattern, name): try: re_pat = _PATTERN_CACHE[(pattern, True)] except KeyError: - res = "(?ms)" + _translate(pattern) + '\Z' + res = "(?ms)" + _translate(pattern) + r'\Z' _PATTERN_CACHE[(pattern, True)] = re_pat = re.compile(res) return re_pat.match(name) is not None @@ -53,7 +53,7 @@ def imatch(pattern, name): try: re_pat = _PATTERN_CACHE[(pattern, False)] except KeyError: - res = "(?ms)" + _translate(pattern, case_sensitive=False) + '\Z' + res = "(?ms)" + _translate(pattern, case_sensitive=False) + r'\Z' _PATTERN_CACHE[(pattern, False)] = re_pat = re.compile(res, re.IGNORECASE) return re_pat.match(name) is not None From dfe87caa75bd0c84949c44810213f8afa756bef1 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Sat, 11 Aug 2018 17:51:14 +0100 Subject: [PATCH 10/13] doc fix --- docs/source/guide.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/guide.rst b/docs/source/guide.rst index 6d35d962..073b2dfa 100644 --- a/docs/source/guide.rst +++ b/docs/source/guide.rst @@ -199,7 +199,7 @@ See :ref:`walking` for more information on walking directories. Globbing ~~~~~~~~ -*Globbing* is a slightly higher level way of scanning filesystem. Paths can be filtered by a *glob* pattern, which is similar to a wildcard (such as *.py), but can match multiple levels of a directory structure. +Closely related to walking a filesystem is *Globbing*, which is a slightly higher level way of scanning filesystems. Paths can be filtered by a *glob* pattern, which is similar to a wildcard (such as ``*.py``), but can match multiple levels of a directory structure. Here's an example of globbing, which removes all the ``.pyc`` files in your project directory:: From 25b1900ee4181aa5881f8887338614fca937bdb6 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Sat, 11 Aug 2018 17:51:59 +0100 Subject: [PATCH 11/13] doc --- docs/source/globbing.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/globbing.rst b/docs/source/globbing.rst index 084383ac..f6babe5b 100644 --- a/docs/source/globbing.rst +++ b/docs/source/globbing.rst @@ -40,8 +40,8 @@ Here's a summary of glob patterns: Recursively matches all the git directories. -Glob Interface -~~~~~~~~~~~~~~ +Interface +~~~~~~~~~ PyFilesystem supports globbing via the ``glob`` attribute on every FS instance, which is an instance of :class:`~fs.glob.BoundGlobber`. Here's @@ -55,8 +55,8 @@ path and corresponding :class:`~fs.info.Info` for each matched file and directory. -Glob Methods -~~~~~~~~~~~~ +Batch Methods +~~~~~~~~~~~~~ In addition to iterating over the results, you can also call methods on the :class:`~fs.glob.Globber` which apply to every matched path. From ce6acb7350f6258b2f292caf6419db29f55d550b Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Sat, 11 Aug 2018 17:53:59 +0100 Subject: [PATCH 12/13] doc fix --- docs/source/guide.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/guide.rst b/docs/source/guide.rst index 073b2dfa..a7c81b21 100644 --- a/docs/source/guide.rst +++ b/docs/source/guide.rst @@ -199,7 +199,7 @@ See :ref:`walking` for more information on walking directories. Globbing ~~~~~~~~ -Closely related to walking a filesystem is *Globbing*, which is a slightly higher level way of scanning filesystems. Paths can be filtered by a *glob* pattern, which is similar to a wildcard (such as ``*.py``), but can match multiple levels of a directory structure. +Closely related to walking a filesystem is *globbing*, which is a slightly higher level way of scanning filesystems. Paths can be filtered by a *glob* pattern, which is similar to a wildcard (such as ``*.py``), but can match multiple levels of a directory structure. Here's an example of globbing, which removes all the ``.pyc`` files in your project directory:: From 4c05e770c10ed5655f585a953fd3dc157af9d7ce Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Sat, 11 Aug 2018 20:03:09 +0100 Subject: [PATCH 13/13] fix and docs --- docs/source/globbing.rst | 14 ++++++++------ fs/glob.py | 14 ++++++++------ tests/test_copy.py | 2 +- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/docs/source/globbing.rst b/docs/source/globbing.rst index f6babe5b..c72392fc 100644 --- a/docs/source/globbing.rst +++ b/docs/source/globbing.rst @@ -47,12 +47,14 @@ PyFilesystem supports globbing via the ``glob`` attribute on every FS instance, which is an instance of :class:`~fs.glob.BoundGlobber`. Here's how you might use it to find all the Python files in your filesystem:: - for path, info in my_fs.glob("**/*.py"): - print(path) - -Calling ``.glob`` with a pattern will return an iterator of every -path and corresponding :class:`~fs.info.Info` for each matched file and -directory. + for match in my_fs.glob("**/*.py"): + print(f"{match.path} is {match.info.size} bytes long") + +Calling ``.glob`` with a pattern will return an iterator of +:class:`~fs.glob.GlobMatch` named tuples for each matching file or +directory. A glob match contains two attributes; ``path`` which is the +full path in the filesystem, and ``info`` which is an +:class:`fs.info.Info` info object for the matched resource. Batch Methods diff --git a/fs/glob.py b/fs/glob.py index 702357fe..09927952 100644 --- a/fs/glob.py +++ b/fs/glob.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from collections import namedtuple +from typing import Iterator, List import re from .lrucache import LRUCache @@ -13,7 +14,7 @@ 1000 ) # type: LRUCache[Tuple[Text, bool], Tuple[int, bool, Pattern]] - +GlobMatch = namedtuple('GlobMatch', ["path", "info"]) Counts = namedtuple("Counts", ["files", "directories", "data"]) LineCounts = namedtuple("LineCounts", ["lines", "non_blank"]) @@ -135,6 +136,7 @@ def __repr__(self): ) def _make_iter(self, search="breadth", namespaces=None): + # type: (str, List[str]) -> Iterator[GlobMatch] try: levels, recursive, re_pattern = _PATTERN_CACHE[ (self.pattern, self.case_sensitive) @@ -154,10 +156,11 @@ def _make_iter(self, search="breadth", namespaces=None): if info.is_dir: path += "/" if re_pattern.match(path): - yield path, info + yield GlobMatch(path, info) def __iter__(self): - # type: () -> Iterator[Tuple[str, Info]] + # type: () -> Iterator[GlobMatch] + """An iterator of :class:`fs.glob.GlobMatch` objects.""" return self._make_iter() def count(self): @@ -255,7 +258,7 @@ def __call__( self, pattern, path="/", namespaces=None, case_sensitive=True, exclude_dirs=None ): # type: (str, str, Optional[List[str]], bool, Optional[List[str]]) -> Globber - """A generator of glob results. + """Match resources on the bound filesystem againsts a glob pattern. Arguments: pattern (str): A glob pattern, e.g. ``"**/*.py"`` @@ -268,8 +271,7 @@ def __call__( Returns: `~Globber`: - An object that may be iterated over, - yielding tuples of ``(path, info)`` for matching paths. + An object that may be queried for the glob matches. """ diff --git a/tests/test_copy.py b/tests/test_copy.py index 0bea13a2..17e6e0da 100644 --- a/tests/test_copy.py +++ b/tests/test_copy.py @@ -33,7 +33,7 @@ def test_copy_value_error(self): src_fs = open_fs("mem://") dst_fs = open_fs("mem://") with self.assertRaises(ValueError): - fs.copy.copy_fs(src_fs, dst_ds, workers=-1) + fs.copy.copy_fs(src_fs, dst_fs, workers=-1) def test_copy_dir(self): src_fs = open_fs("mem://")