44import os
55import re
66import fnmatch
7+ import functools
78import itertools
9+ import operator
810import stat
911import sys
1012
11- __all__ = ["glob" , "iglob" , "escape" ]
13+
14+ __all__ = ["glob" , "iglob" , "escape" , "translate" ]
1215
1316def glob (pathname , * , root_dir = None , dir_fd = None , recursive = False ,
1417 include_hidden = False ):
@@ -104,8 +107,8 @@ def _iglob(pathname, root_dir, dir_fd, recursive, dironly,
104107
105108def _glob1 (dirname , pattern , dir_fd , dironly , include_hidden = False ):
106109 names = _listdir (dirname , dir_fd , dironly )
107- if include_hidden or not _ishidden (pattern ):
108- names = (x for x in names if include_hidden or not _ishidden (x ))
110+ if not ( include_hidden or _ishidden (pattern ) ):
111+ names = (x for x in names if not _ishidden (x ))
109112 return fnmatch .filter (names , pattern )
110113
111114def _glob0 (dirname , basename , dir_fd , dironly , include_hidden = False ):
@@ -119,12 +122,19 @@ def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False):
119122 return [basename ]
120123 return []
121124
122- # Following functions are not public but can be used by third-party code.
125+ _deprecated_function_message = (
126+ "{name} is deprecated and will be removed in Python {remove}. Use "
127+ "glob.glob and pass a directory to its root_dir argument instead."
128+ )
123129
124130def glob0 (dirname , pattern ):
131+ import warnings
132+ warnings ._deprecated ("glob.glob0" , _deprecated_function_message , remove = (3 , 15 ))
125133 return _glob0 (dirname , pattern , None , False )
126134
127135def glob1 (dirname , pattern ):
136+ import warnings
137+ warnings ._deprecated ("glob.glob1" , _deprecated_function_message , remove = (3 , 15 ))
128138 return _glob1 (dirname , pattern , None , False )
129139
130140# This helper function recursively yields relative pathnames inside a literal
@@ -249,4 +259,287 @@ def escape(pathname):
249259 return drive + pathname
250260
251261
262+ _special_parts = ('' , '.' , '..' )
252263_dir_open_flags = os .O_RDONLY | getattr (os , 'O_DIRECTORY' , 0 )
264+ _no_recurse_symlinks = object ()
265+
266+
267+ def translate (pat , * , recursive = False , include_hidden = False , seps = None ):
268+ """Translate a pathname with shell wildcards to a regular expression.
269+
270+ If `recursive` is true, the pattern segment '**' will match any number of
271+ path segments.
272+
273+ If `include_hidden` is true, wildcards can match path segments beginning
274+ with a dot ('.').
275+
276+ If a sequence of separator characters is given to `seps`, they will be
277+ used to split the pattern into segments and match path separators. If not
278+ given, os.path.sep and os.path.altsep (where available) are used.
279+ """
280+ if not seps :
281+ if os .path .altsep :
282+ seps = (os .path .sep , os .path .altsep )
283+ else :
284+ seps = os .path .sep
285+ escaped_seps = '' .join (map (re .escape , seps ))
286+ any_sep = f'[{ escaped_seps } ]' if len (seps ) > 1 else escaped_seps
287+ not_sep = f'[^{ escaped_seps } ]'
288+ if include_hidden :
289+ one_last_segment = f'{ not_sep } +'
290+ one_segment = f'{ one_last_segment } { any_sep } '
291+ any_segments = f'(?:.+{ any_sep } )?'
292+ any_last_segments = '.*'
293+ else :
294+ one_last_segment = f'[^{ escaped_seps } .]{ not_sep } *'
295+ one_segment = f'{ one_last_segment } { any_sep } '
296+ any_segments = f'(?:{ one_segment } )*'
297+ any_last_segments = f'{ any_segments } (?:{ one_last_segment } )?'
298+
299+ results = []
300+ parts = re .split (any_sep , pat )
301+ last_part_idx = len (parts ) - 1
302+ for idx , part in enumerate (parts ):
303+ if part == '*' :
304+ results .append (one_segment if idx < last_part_idx else one_last_segment )
305+ elif recursive and part == '**' :
306+ if idx < last_part_idx :
307+ if parts [idx + 1 ] != '**' :
308+ results .append (any_segments )
309+ else :
310+ results .append (any_last_segments )
311+ else :
312+ if part :
313+ if not include_hidden and part [0 ] in '*?' :
314+ results .append (r'(?!\.)' )
315+ results .extend (fnmatch ._translate (part , f'{ not_sep } *' , not_sep ))
316+ if idx < last_part_idx :
317+ results .append (any_sep )
318+ res = '' .join (results )
319+ return fr'(?s:{ res } )\Z'
320+
321+
322+ @functools .lru_cache (maxsize = 512 )
323+ def _compile_pattern (pat , sep , case_sensitive , recursive = True ):
324+ """Compile given glob pattern to a re.Pattern object (observing case
325+ sensitivity)."""
326+ flags = re .NOFLAG if case_sensitive else re .IGNORECASE
327+ regex = translate (pat , recursive = recursive , include_hidden = True , seps = sep )
328+ return re .compile (regex , flags = flags ).match
329+
330+
331+ class _Globber :
332+ """Class providing shell-style pattern matching and globbing.
333+ """
334+
335+ def __init__ (self , sep , case_sensitive , case_pedantic = False , recursive = False ):
336+ self .sep = sep
337+ self .case_sensitive = case_sensitive
338+ self .case_pedantic = case_pedantic
339+ self .recursive = recursive
340+
341+ # Low-level methods
342+
343+ lstat = operator .methodcaller ('lstat' )
344+ add_slash = operator .methodcaller ('joinpath' , '' )
345+
346+ @staticmethod
347+ def scandir (path ):
348+ """Emulates os.scandir(), which returns an object that can be used as
349+ a context manager. This method is called by walk() and glob().
350+ """
351+ return contextlib .nullcontext (path .iterdir ())
352+
353+ @staticmethod
354+ def concat_path (path , text ):
355+ """Appends text to the given path.
356+ """
357+ return path .with_segments (path ._raw_path + text )
358+
359+ @staticmethod
360+ def parse_entry (entry ):
361+ """Returns the path of an entry yielded from scandir().
362+ """
363+ return entry
364+
365+ # High-level methods
366+
367+ def compile (self , pat ):
368+ return _compile_pattern (pat , self .sep , self .case_sensitive , self .recursive )
369+
370+ def selector (self , parts ):
371+ """Returns a function that selects from a given path, walking and
372+ filtering according to the glob-style pattern parts in *parts*.
373+ """
374+ if not parts :
375+ return self .select_exists
376+ part = parts .pop ()
377+ if self .recursive and part == '**' :
378+ selector = self .recursive_selector
379+ elif part in _special_parts :
380+ selector = self .special_selector
381+ elif not self .case_pedantic and magic_check .search (part ) is None :
382+ selector = self .literal_selector
383+ else :
384+ selector = self .wildcard_selector
385+ return selector (part , parts )
386+
387+ def special_selector (self , part , parts ):
388+ """Returns a function that selects special children of the given path.
389+ """
390+ select_next = self .selector (parts )
391+
392+ def select_special (path , exists = False ):
393+ path = self .concat_path (self .add_slash (path ), part )
394+ return select_next (path , exists )
395+ return select_special
396+
397+ def literal_selector (self , part , parts ):
398+ """Returns a function that selects a literal descendant of a path.
399+ """
400+
401+ # Optimization: consume and join any subsequent literal parts here,
402+ # rather than leaving them for the next selector. This reduces the
403+ # number of string concatenation operations and calls to add_slash().
404+ while parts and magic_check .search (parts [- 1 ]) is None :
405+ part += self .sep + parts .pop ()
406+
407+ select_next = self .selector (parts )
408+
409+ def select_literal (path , exists = False ):
410+ path = self .concat_path (self .add_slash (path ), part )
411+ return select_next (path , exists = False )
412+ return select_literal
413+
414+ def wildcard_selector (self , part , parts ):
415+ """Returns a function that selects direct children of a given path,
416+ filtering by pattern.
417+ """
418+
419+ match = None if part == '*' else self .compile (part )
420+ dir_only = bool (parts )
421+ if dir_only :
422+ select_next = self .selector (parts )
423+
424+ def select_wildcard (path , exists = False ):
425+ try :
426+ # We must close the scandir() object before proceeding to
427+ # avoid exhausting file descriptors when globbing deep trees.
428+ with self .scandir (path ) as scandir_it :
429+ entries = list (scandir_it )
430+ except OSError :
431+ pass
432+ else :
433+ for entry in entries :
434+ if match is None or match (entry .name ):
435+ if dir_only :
436+ try :
437+ if not entry .is_dir ():
438+ continue
439+ except OSError :
440+ continue
441+ entry_path = self .parse_entry (entry )
442+ if dir_only :
443+ yield from select_next (entry_path , exists = True )
444+ else :
445+ yield entry_path
446+ return select_wildcard
447+
448+ def recursive_selector (self , part , parts ):
449+ """Returns a function that selects a given path and all its children,
450+ recursively, filtering by pattern.
451+ """
452+ # Optimization: consume following '**' parts, which have no effect.
453+ while parts and parts [- 1 ] == '**' :
454+ parts .pop ()
455+
456+ # Optimization: consume and join any following non-special parts here,
457+ # rather than leaving them for the next selector. They're used to
458+ # build a regular expression, which we use to filter the results of
459+ # the recursive walk. As a result, non-special pattern segments
460+ # following a '**' wildcard don't require additional filesystem access
461+ # to expand.
462+ follow_symlinks = self .recursive is not _no_recurse_symlinks
463+ if follow_symlinks :
464+ while parts and parts [- 1 ] not in _special_parts :
465+ part += self .sep + parts .pop ()
466+
467+ match = None if part == '**' else self .compile (part )
468+ dir_only = bool (parts )
469+ select_next = self .selector (parts )
470+
471+ def select_recursive (path , exists = False ):
472+ path = self .add_slash (path )
473+ match_pos = len (str (path ))
474+ if match is None or match (str (path ), match_pos ):
475+ yield from select_next (path , exists )
476+ stack = [path ]
477+ while stack :
478+ yield from select_recursive_step (stack , match_pos )
479+
480+ def select_recursive_step (stack , match_pos ):
481+ path = stack .pop ()
482+ try :
483+ # We must close the scandir() object before proceeding to
484+ # avoid exhausting file descriptors when globbing deep trees.
485+ with self .scandir (path ) as scandir_it :
486+ entries = list (scandir_it )
487+ except OSError :
488+ pass
489+ else :
490+ for entry in entries :
491+ is_dir = False
492+ try :
493+ if entry .is_dir (follow_symlinks = follow_symlinks ):
494+ is_dir = True
495+ except OSError :
496+ pass
497+
498+ if is_dir or not dir_only :
499+ entry_path = self .parse_entry (entry )
500+ if match is None or match (str (entry_path ), match_pos ):
501+ if dir_only :
502+ yield from select_next (entry_path , exists = True )
503+ else :
504+ # Optimization: directly yield the path if this is
505+ # last pattern part.
506+ yield entry_path
507+ if is_dir :
508+ stack .append (entry_path )
509+
510+ return select_recursive
511+
512+ def select_exists (self , path , exists = False ):
513+ """Yields the given path, if it exists.
514+ """
515+ if exists :
516+ # Optimization: this path is already known to exist, e.g. because
517+ # it was returned from os.scandir(), so we skip calling lstat().
518+ yield path
519+ else :
520+ try :
521+ self .lstat (path )
522+ yield path
523+ except OSError :
524+ pass
525+
526+
527+ class _StringGlobber (_Globber ):
528+ lstat = staticmethod (os .lstat )
529+ scandir = staticmethod (os .scandir )
530+ parse_entry = operator .attrgetter ('path' )
531+ concat_path = operator .add
532+
533+ if os .name == 'nt' :
534+ @staticmethod
535+ def add_slash (pathname ):
536+ tail = os .path .splitroot (pathname )[2 ]
537+ if not tail or tail [- 1 ] in '\\ /' :
538+ return pathname
539+ return f'{ pathname } \\ '
540+ else :
541+ @staticmethod
542+ def add_slash (pathname ):
543+ if not pathname or pathname [- 1 ] == '/' :
544+ return pathname
545+ return f'{ pathname } /'
0 commit comments