From 241a6bf70449798a6a7c3b1b87a7e64d025ff144 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Thu, 9 Oct 2025 10:52:59 +0000 Subject: [PATCH 1/4] ruff --- pcre/__init__.py | 12 ++++++------ pcre/pcre.py | 12 +++++------- setup.py | 1 + tests/test_bench_string.py | 1 + tests/test_cache.py | 1 + tests/test_core.py | 4 ++-- tests/test_jit.py | 5 ++--- tests/test_module.py | 3 +-- tests/test_simd.py | 2 +- tests/test_transformers_regex.py | 6 ++++-- 10 files changed, 24 insertions(+), 23 deletions(-) diff --git a/pcre/__init__.py b/pcre/__init__.py index 87047b3..35749a6 100644 --- a/pcre/__init__.py +++ b/pcre/__init__.py @@ -17,29 +17,29 @@ from typing import Any from . import cpcre2 -from .flags import PY_ONLY_FLAG_MEMBERS from .cache import get_cache_limit, set_cache_limit -from .threads import configure_threads +from .flags import PY_ONLY_FLAG_MEMBERS from .pcre import ( Match, Pattern, PcreError, clear_cache, - configure_thread_pool, - configure, compile, + configure, + configure_thread_pool, findall, finditer, - module_fullmatch, fullmatch, - parallel_map, match, + module_fullmatch, + parallel_map, search, shutdown_thread_pool, split, sub, subn, ) +from .threads import configure_threads __version__ = getattr(cpcre2, "__version__", "0.0") diff --git a/pcre/pcre.py b/pcre/pcre.py index 5372437..48380f4 100644 --- a/pcre/pcre.py +++ b/pcre/pcre.py @@ -25,13 +25,6 @@ THREADS, strip_py_only_flags, ) -from .threads import ( - configure_thread_pool, - ensure_thread_pool, - get_auto_threshold, - get_thread_default, - shutdown_thread_pool, -) from .re_compat import ( Match, TemplatePatternStub, @@ -48,6 +41,11 @@ render_template, resolve_endpos, ) +from .threads import ( + ensure_thread_pool, + get_auto_threshold, + get_thread_default, +) _CPattern = _pcre2.Pattern diff --git a/setup.py b/setup.py index 0fa0eb0..f7a0a1c 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,7 @@ from setuptools import Extension, setup + try: from setuptools._distutils.ccompiler import CCompiler, new_compiler from setuptools._distutils.errors import CCompilerError, DistutilsExecError diff --git a/tests/test_bench_string.py b/tests/test_bench_string.py index 3986e9a..35dcc94 100644 --- a/tests/test_bench_string.py +++ b/tests/test_bench_string.py @@ -10,6 +10,7 @@ from pathlib import Path from typing import Dict + try: from pcre import cpcre2 except ImportError: diff --git a/tests/test_cache.py b/tests/test_cache.py index 7607b4c..202bf47 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -11,6 +11,7 @@ import pcre from pcre.cache import _PATTERN_CACHE + _PATTERN_SUBJECTS: List[Tuple[Any, Any]] = [ (r"(foo)(bar)", "foobar foo foobar"), (r"(?P\\w+)", "Hello world from Python"), diff --git a/tests/test_core.py b/tests/test_core.py index 47b4c4b..c946ddf 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -2,8 +2,8 @@ from collections import OrderedDict import pytest -from pcre import cache as cache_mod from pcre import Flag +from pcre import cache as cache_mod from pcre import pcre as core from pcre.flags import strip_py_only_flags @@ -283,7 +283,7 @@ def test_pattern_match_handles_optional_end(): def test_configure_updates_default_jit(monkeypatch): - calls = [] + pass def test_pattern_search_and_fullmatch_delegate(): search_method = MethodRecorder(FakeMatch((2, 4), group0="search-result")) diff --git a/tests/test_jit.py b/tests/test_jit.py index caff5ff..0488886 100644 --- a/tests/test_jit.py +++ b/tests/test_jit.py @@ -3,9 +3,8 @@ import types -import pytest - import pcre +import pytest from pcre import Flag from pcre import pcre as core @@ -142,7 +141,7 @@ def fake_cached(pattern, flags, wrapper, *, jit): second = pcre.compile("expr", flags=Flag.NO_JIT) third = pcre.compile("expr") - assert [j for j in captured["jits"]] == [True, False, True] + assert list(captured["jits"]) == [True, False, True] assert first.jit is True assert second.jit is False assert third.jit is True diff --git a/tests/test_module.py b/tests/test_module.py index fa8318c..efbfabb 100644 --- a/tests/test_module.py +++ b/tests/test_module.py @@ -1,6 +1,5 @@ -import pytest - import pcre +import pytest from pcre import Flag diff --git a/tests/test_simd.py b/tests/test_simd.py index 86d84b5..65abbb3 100644 --- a/tests/test_simd.py +++ b/tests/test_simd.py @@ -1,7 +1,7 @@ import platform -import pytest import pcre +import pytest def test_ascii_vector_mode_exposed(): diff --git a/tests/test_transformers_regex.py b/tests/test_transformers_regex.py index b214ffe..2f6edb5 100644 --- a/tests/test_transformers_regex.py +++ b/tests/test_transformers_regex.py @@ -1,8 +1,10 @@ +import json import os import unittest -import regex -import json + import pcre +import regex + class TestTransformersRegex(unittest.TestCase): From 8b666ea55e1ecb92c12619a863a134258dec612c Mon Sep 17 00:00:00 2001 From: Qubitium Date: Thu, 9 Oct 2025 11:05:10 +0000 Subject: [PATCH 2/4] cleanup --- pcre/__init__.py | 4 +- setup.py | 827 +--------------------------------------------- setup_utils.py | 833 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 839 insertions(+), 825 deletions(-) create mode 100644 setup_utils.py diff --git a/pcre/__init__.py b/pcre/__init__.py index 35749a6..ae1e0b0 100644 --- a/pcre/__init__.py +++ b/pcre/__init__.py @@ -26,7 +26,6 @@ clear_cache, compile, configure, - configure_thread_pool, findall, finditer, fullmatch, @@ -34,11 +33,12 @@ module_fullmatch, parallel_map, search, - shutdown_thread_pool, split, sub, subn, ) + +from .threads import configure_thread_pool, shutdown_thread_pool from .threads import configure_threads diff --git a/setup.py b/setup.py index f7a0a1c..e66d2a6 100644 --- a/setup.py +++ b/setup.py @@ -5,841 +5,22 @@ from __future__ import annotations -import os -import platform -import shlex -import shutil -import subprocess import sys -import tempfile -from collections.abc import Callable from pathlib import Path from setuptools import Extension, setup - -try: - from setuptools._distutils.ccompiler import CCompiler, new_compiler - from setuptools._distutils.errors import CCompilerError, DistutilsExecError - from setuptools._distutils.sysconfig import customize_compiler -except ImportError: # pragma: no cover - fallback for older Python environments - from distutils.ccompiler import CCompiler, new_compiler # type: ignore - from distutils.errors import CCompilerError, DistutilsExecError # type: ignore - from distutils.sysconfig import customize_compiler # type: ignore - - ROOT_DIR = Path(__file__).resolve().parent -PCRE_EXT_DIR = ROOT_DIR / "pcre_ext" -PCRE2_REPO_URL = "https://github.com/PCRE2Project/pcre2.git" -PCRE2_TAG = "pcre2-10.46" - - -MODULE_SOURCES = [ - "pcre_ext/pcre2.c", - "pcre_ext/error.c", - "pcre_ext/cache.c", - "pcre_ext/flag.c", - "pcre_ext/util.c", - "pcre_ext/memory.c", -] - -LIB_EXTENSIONS = [ - ".so", - ".so.0", - ".so.1", - ".a", - ".dylib", - ".sl", -] - -LIBRARY_BASENAME = "libpcre2-8" - - -def _run_pkg_config(*args: str) -> list[str]: - try: - result = subprocess.run( - ["pkg-config", *args, "libpcre2-8"], - check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) - except (FileNotFoundError, subprocess.CalledProcessError): - return [] - return shlex.split(result.stdout.strip()) - - -def _run_pkg_config_var(argument: str) -> str | None: - try: - result = subprocess.run( - ["pkg-config", argument, "libpcre2-8"], - check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) - except (FileNotFoundError, subprocess.CalledProcessError): - return None - return result.stdout.strip() or None - - -def _run_command(command: list[str]) -> str | None: - try: - result = subprocess.run( - command, - check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) - except (FileNotFoundError, subprocess.CalledProcessError): - return None - return result.stdout.strip() or None - - -_COMPILER_INITIALIZED = False -_COMPILER_INSTANCE: CCompiler | None = None -_COMPILER_FLAG_CACHE: dict[str, bool] = {} -_TRUTHY_VALUES = {"1", "true", "yes", "on"} - - -def _is_truthy_env(name: str) -> bool: - value = os.environ.get(name) - if value is None: - return False - return value.strip().lower() in _TRUTHY_VALUES - - -def _is_windows_platform() -> bool: - return sys.platform.startswith("win") or os.name == "nt" - - -def _is_wsl_environment() -> bool: - if not sys.platform.startswith("linux"): - return False - if os.environ.get("WSL_DISTRO_NAME"): - return True - try: - release = platform.release() - except Exception: - return False - return "microsoft" in release.lower() - - -def _prepare_pcre2_source() -> tuple[list[str], list[str], list[str]]: - if _is_windows_platform() and not _is_wsl_environment(): - os.environ["PCRE2_BUILD_FROM_SOURCE"] = "1" - - if not _is_truthy_env("PCRE2_BUILD_FROM_SOURCE"): - return ([], [], []) - - destination = PCRE_EXT_DIR / PCRE2_TAG - git_dir = destination / ".git" - - if destination.exists() and not git_dir.is_dir(): - raise RuntimeError( - f"Existing directory {destination} is not a git checkout; remove or rename it before building" - ) - - if not destination.exists(): - clone_command = [ - "git", - "clone", - "--depth", - "1", - "--branch", - PCRE2_TAG, - "--recurse-submodules", - "--shallow-submodules", - PCRE2_REPO_URL, - str(destination), - ] - try: - subprocess.run(clone_command, check=True) - except FileNotFoundError as exc: # pragma: no cover - git missing on build host - raise RuntimeError("git is required to fetch PCRE2 sources when PCRE2_BUILD_FROM_SOURCE=1") from exc - except subprocess.CalledProcessError as exc: - raise RuntimeError( - "Failed to clone PCRE2 source from official repository; see the output above for details" - ) from exc - - try: - subprocess.run( - ["git", "submodule", "update", "--init", "--recursive"], - cwd=destination, - check=True, - ) - except FileNotFoundError as exc: # pragma: no cover - git missing on build host - raise RuntimeError("git with submodule support is required to fetch PCRE2 dependencies") from exc - except subprocess.CalledProcessError as exc: - raise RuntimeError( - "Failed to update PCRE2 git submodules; see the output above for details" - ) from exc - - build_dir = destination / "build" - build_roots = [ - destination, - destination / ".libs", - destination / "src", - destination / "src" / ".libs", - build_dir, - build_dir / "lib", - build_dir / "bin", - build_dir / "Release", - build_dir / "Debug", - build_dir / "RelWithDebInfo", - build_dir / "MinSizeRel", - ] - - def _has_built_library() -> bool: - patterns = [ - "libpcre2-8.so", - "libpcre2-8.so.*", - "libpcre2-8.a", - "libpcre2-8.dylib", - "libpcre2-8.lib", - "pcre2-8.dll", - ] - for root in build_roots: - if not root.exists(): - continue - for pattern in patterns: - if any(root.glob(f"**/{pattern}")): - return True - return False - - if not _has_built_library(): - env = os.environ.copy() - build_succeeded = False - cmake_error: Exception | None = None - - if shutil.which("cmake"): - try: - cmake_args = [ - "cmake", - "-S", - str(destination), - "-B", - str(build_dir), - "-DPCRE2_SUPPORT_JIT=ON", - "-DPCRE2_BUILD_PCRE2_16=ON", - "-DPCRE2_BUILD_TESTS=OFF", - "-DBUILD_SHARED_LIBS=ON", - ] - if not _is_windows_platform(): - cmake_args.append("-DCMAKE_BUILD_TYPE=Release") - subprocess.run(cmake_args, cwd=destination, env=env, check=True) - - build_command = [ - "cmake", - "--build", - str(build_dir), - ] - if _is_windows_platform(): - build_command.extend(["--config", "Release"]) - build_command.extend(["--", "-j4"]) - subprocess.run(build_command, cwd=destination, env=env, check=True) - except (FileNotFoundError, subprocess.CalledProcessError) as exc: - cmake_error = exc - else: - build_succeeded = True - - if not build_succeeded: - autoconf_script = destination / "configure" - autoconf_ready = autoconf_script.exists() and not _is_windows_platform() - - if autoconf_ready: - build_dir.mkdir(parents=True, exist_ok=True) - try: - configure_command = [ - str(autoconf_script), - "--enable-jit", - "--enable-pcre2-8", - "--disable-tests", - ] - subprocess.run(configure_command, cwd=build_dir, env=env, check=True) - subprocess.run(["make", "-j4"], cwd=build_dir, env=env, check=True) - except FileNotFoundError as exc: - raise RuntimeError( - "Building PCRE2 from source via Autoconf requires the GNU build toolchain (configure/make) to be available on PATH" - ) from exc - except subprocess.CalledProcessError as exc: - raise RuntimeError( - "Failed to build PCRE2 from source using Autoconf; see the output above for details" - ) from exc - else: - build_succeeded = True - elif cmake_error is not None and isinstance(cmake_error, subprocess.CalledProcessError): - raise RuntimeError( - "Failed to build PCRE2 from source; see the output above for details" - ) from cmake_error - - if not build_succeeded: - raise RuntimeError( - "PCRE2 build tooling was not found. Install CMake or Autoconf (configure/make) to build from source." - ) - - header_source = destination / "src" / "pcre2.h.generic" - header_target = destination / "src" / "pcre2.h" - if header_source.exists() and not header_target.exists(): - shutil.copy2(header_source, header_target) - - include_target = PCRE_EXT_DIR / "pcre2.h" - if header_target.exists(): - shutil.copy2(header_target, include_target) - - include_dirs: list[str] = [] - library_dirs: list[str] = [] - library_files: list[str] = [] - seen_includes: set[str] = set() - seen_lib_dirs: set[str] = set() - seen_lib_files: set[str] = set() - - def _add_include(path: Path) -> None: - path = path.resolve() - path_str = str(path) - if path.is_dir() and path_str not in seen_includes: - include_dirs.append(path_str) - seen_includes.add(path_str) - - def _add_library_file(path: Path) -> None: - path = path.resolve() - if not path.is_file(): - return - path_str = str(path) - if path_str not in seen_lib_files: - library_files.append(path_str) - seen_lib_files.add(path_str) - parent = str(path.parent.resolve()) - if parent not in seen_lib_dirs: - library_dirs.append(parent) - seen_lib_dirs.add(parent) - - include_dir = destination / "src" - _add_include(include_dir) - - search_roots = [ - destination, - destination / "src", - destination / ".libs", - destination / "src" / ".libs", - build_dir, - build_dir / "lib", - build_dir / "bin", - build_dir / "Release", - build_dir / "Debug", - build_dir / "RelWithDebInfo", - build_dir / "MinSizeRel", - ] - search_patterns = [ - f"**/{LIBRARY_BASENAME}.lib", - f"**/{LIBRARY_BASENAME}.a", - f"**/{LIBRARY_BASENAME}.so", - f"**/{LIBRARY_BASENAME}.so.*", - f"**/{LIBRARY_BASENAME}.dylib", - "**/pcre2-8.lib", - "**/pcre2-8.dll", - "**/pcre2-8-static.lib", - "**/pcre2-8-static.dll", - ] - - for root in search_roots: - if not root.exists(): - continue - for pattern in search_patterns: - for path in root.glob(pattern): - _add_library_file(path) - - if not library_files: - raise RuntimeError( - "PCRE2 build did not produce any libpcre2-8 artifacts; check the build output for errors" - ) - - return (include_dirs, library_dirs, library_files) - - -def _get_test_compiler() -> CCompiler | None: - global _COMPILER_INITIALIZED, _COMPILER_INSTANCE - if _COMPILER_INITIALIZED: - return _COMPILER_INSTANCE - _COMPILER_INITIALIZED = True - try: - compiler = new_compiler() - customize_compiler(compiler) - except Exception: - _COMPILER_INSTANCE = None - else: - _COMPILER_INSTANCE = compiler - return _COMPILER_INSTANCE - - -def _extract_macos_architectures(command: list[str] | tuple[str, ...] | None) -> list[str]: - if not isinstance(command, (list, tuple)): - return [] - arches: list[str] = [] - iterator = iter(command) - for token in iterator: - if token != "-arch": - continue - arch = next(iterator, "") - if arch: - arches.append(arch) - return arches - - -def _macos_compiler_architectures(compiler: CCompiler | None) -> set[str]: - arches: set[str] = set() - if compiler is not None: - for attr in ("compiler", "compiler_so", "compiler_cxx", "linker_so"): - arches.update(_extract_macos_architectures(getattr(compiler, attr, None))) - archflags = os.environ.get("ARCHFLAGS") - if archflags: - arches.update(_extract_macos_architectures(tuple(shlex.split(archflags)))) - for env_name in ("CFLAGS", "CPPFLAGS"): - value = os.environ.get(env_name) - if value: - arches.update(_extract_macos_architectures(tuple(shlex.split(value)))) - return {arch for arch in arches if arch} - - -def _is_x86_architecture(arch: str) -> bool: - normalized = arch.lower() - return normalized in {"x86_64", "x86_64h", "i386", "i486", "i586", "i686", "amd64"} - - -def _should_disable_native_flags_for_macos(compiler: CCompiler | None) -> bool: - if sys.platform != "darwin": - return False - arches = _macos_compiler_architectures(compiler) - if not arches: - machine = platform.machine() - if machine: - arches.add(machine) - if not arches: - return False - if len(arches) > 1: - return True - arch = next(iter(arches)) - return not _is_x86_architecture(arch) - - -def _compiler_supports_flag(flag: str) -> bool: - cached = _COMPILER_FLAG_CACHE.get(flag) - if cached is not None: - return cached - - compiler = _get_test_compiler() - if compiler is None: - _COMPILER_FLAG_CACHE[flag] = False - return False - - with tempfile.TemporaryDirectory() as tmpdir: - source = Path(tmpdir) / "flag_check.c" - source.write_text("int main(void) { return 0; }\n", encoding="utf-8") - try: - compiler.compile( - [str(source)], - output_dir=tmpdir, - extra_postargs=[flag], - ) - except (CCompilerError, DistutilsExecError, OSError): - _COMPILER_FLAG_CACHE[flag] = False - else: - _COMPILER_FLAG_CACHE[flag] = True - return _COMPILER_FLAG_CACHE[flag] - - -def _augment_compile_flags(flags: list[str]) -> None: - if _is_truthy_env("PCRE2_DISABLE_OPT_FLAGS"): - return - - disable_native = _is_truthy_env("PCRE2_DISABLE_NATIVE_FLAGS") - compiler = _get_test_compiler() - if not disable_native and _should_disable_native_flags_for_macos(compiler): - # Apple universal builds (arm64 + x86_64) and arm64-only builds reject x86 specific flags. - disable_native = True - candidate_flags: list[tuple[str, bool]] = [ - ("-O3", False), - ("-march=native", True), - ("-mtune=native", True), - ("-fomit-frame-pointer", False), - ("-funroll-loops", False), - #("-falign-loops=32", False), - ] +if str(ROOT_DIR) not in sys.path: + sys.path.insert(0, str(ROOT_DIR)) - seen = set(flags) - for flag, requires_native in candidate_flags: - if requires_native and disable_native: - continue - if flag in seen: - continue - if not _compiler_supports_flag(flag): - continue - flags.append(flag) - seen.add(flag) - - -def _homebrew_prefixes() -> list[Path]: - if sys.platform != "darwin": - return [] - - prefixes: list[Path] = [] - for args in (["brew", "--prefix", "pcre2"], ["brew", "--prefix"]): - output = _run_command(args) - if not output: - continue - path = Path(output) - if path.exists(): - prefixes.append(path) - return prefixes - - -def _linux_multiarch_dirs() -> list[str]: - arch = platform.machine() - mapping = { - "x86_64": ["x86_64-linux-gnu"], - "aarch64": ["aarch64-linux-gnu"], - "arm64": ["aarch64-linux-gnu"], - "armv7l": ["arm-linux-gnueabihf"], - "armv6l": ["arm-linux-gnueabihf"], - "armv8l": ["arm-linux-gnueabihf"], - "i686": ["i386-linux-gnu"], - "i386": ["i386-linux-gnu"], - "ppc64le": ["powerpc64le-linux-gnu"], - "s390x": ["s390x-linux-gnu"], - } - return mapping.get(arch, []) - - -def _platform_prefixes() -> list[Path]: - prefixes: list[Path] = [] - - env_root = os.environ.get("PCRE2_ROOT") - if env_root: - for value in env_root.split(os.pathsep): - path = Path(value) - if path.exists(): - prefixes.append(path) - - if sys.platform.startswith("linux"): - prefixes.extend(Path(p) for p in ("/usr/local", "/usr")) - elif sys.platform == "darwin": - prefixes.extend(_homebrew_prefixes()) - prefixes.extend(Path(p) for p in ("/opt/homebrew", "/usr/local", "/usr")) - elif sys.platform.startswith("freebsd"): - prefixes.extend(Path(p) for p in ("/usr/local", "/usr")) - elif sys.platform.startswith("sunos") or sys.platform.startswith("solaris"): - prefixes.extend(Path(p) for p in ("/usr", "/usr/local", "/opt/local")) - else: - prefixes.extend(Path(p) for p in ("/usr/local", "/usr")) - - seen: set[Path] = set() - ordered: list[Path] = [] - for prefix in prefixes: - if prefix not in seen: - ordered.append(prefix) - seen.add(prefix) - return ordered - - -def _platform_library_subdirs() -> list[str]: - subdirs = ["lib", "lib64", "lib32", "lib/pcre2"] - - if sys.platform.startswith("linux"): - for multiarch in _linux_multiarch_dirs(): - subdirs.append(f"lib/{multiarch}") - subdirs.extend([ - "lib/x86_64-linux-gnu", - "lib/i386-linux-gnu", - "lib/aarch64-linux-gnu", - "lib/arm-linux-gnueabihf", - ]) - elif sys.platform.startswith("sunos") or sys.platform.startswith("solaris"): - subdirs.extend(["lib/64", "lib/amd64"]) - - seen: set[str] = set() - ordered: list[str] = [] - for subdir in subdirs: - if subdir not in seen: - ordered.append(subdir) - seen.add(subdir) - return ordered - - -def _extend_unique(target: list[str], value: str) -> None: - if value and value not in target: - target.append(value) - - -def _extend_with_existing( - target: list[str], - candidates: list[Path], - predicate: Callable[[Path], bool] | None = None, -) -> None: - for candidate in candidates: - if not candidate.is_dir(): - continue - if predicate is not None and not predicate(candidate): - continue - _extend_unique(target, str(candidate)) - - -def _header_exists(directory: Path) -> bool: - return (directory / "pcre2.h").exists() - - -def _library_exists(directory: Path) -> bool: - return _locate_library_file(directory) is not None - - -def _locate_library_file(directory: Path) -> Path | None: - if not directory.exists(): - return None - for extension in LIB_EXTENSIONS: - candidate = directory / f"{LIBRARY_BASENAME}{extension}" - if candidate.exists(): - return candidate - for candidate in directory.glob(f"{LIBRARY_BASENAME}.so.*"): - if candidate.exists(): - return candidate - fallback = directory / f"{LIBRARY_BASENAME}.dll" - if fallback.exists(): - return fallback - return None - - -def _find_library_with_pkg_config() -> list[str]: - library_files: list[str] = [] - libfile = _run_pkg_config_var("--variable=libfile") - if libfile: - path = Path(libfile) - if path.exists(): - library_files.append(str(path)) - if not library_files: - libdir = _run_pkg_config_var("--variable=libdir") - if libdir: - candidate = _locate_library_file(Path(libdir)) - if candidate is not None: - library_files.append(str(candidate)) - return library_files - - -def _find_library_with_ldconfig() -> list[str]: - if not sys.platform.startswith("linux"): - return [] - output = _run_command(["ldconfig", "-p"]) - if not output: - return [] - for line in output.splitlines(): - if "libpcre2-8.so" not in line: - continue - parts = line.strip().split(" => ") - if len(parts) != 2: - continue - path = Path(parts[1].strip()) - if path.exists(): - return [str(path)] - return [] - - -def _find_library_with_brew() -> list[str]: - if sys.platform != "darwin": - return [] - library_files: list[str] = [] - for prefix in _homebrew_prefixes(): - candidate = _locate_library_file(prefix / "lib") - if candidate is not None: - library_files.append(str(candidate)) - return library_files - - -def _discover_include_dirs() -> list[str]: - prefixes = _platform_prefixes() - candidates: list[Path] = [] - for prefix in prefixes: - candidates.extend( - [ - prefix / "include", - prefix / "include/pcre2", - ] - ) - include_dirs: list[str] = [] - _extend_with_existing(include_dirs, candidates, _header_exists) - return include_dirs - - -def _discover_library_dirs() -> list[str]: - prefixes = _platform_prefixes() - candidates: list[Path] = [] - subdirs = _platform_library_subdirs() - for prefix in prefixes: - for subdir in subdirs: - candidates.append(prefix / subdir) - library_dirs: list[str] = [] - _extend_with_existing(library_dirs, candidates, _library_exists) - return library_dirs - - -def _has_header(include_dirs: list[str]) -> bool: - for directory in include_dirs: - if _header_exists(Path(directory)): - return True - return False - - -def _has_library(library_dirs: list[str]) -> bool: - for directory in library_dirs: - if _library_exists(Path(directory)): - return True - return False - - -def _collect_build_config() -> dict[str, list[str] | list[tuple[str, str | None]]]: - include_dirs: list[str] = [] - library_dirs: list[str] = [] - libraries: list[str] = [] - extra_compile_args: list[str] = [] - extra_link_args: list[str] = [] - define_macros: list[tuple[str, str | None]] = [] - library_files: list[str] = [] - - source_include_dirs, source_library_dirs, source_library_files = _prepare_pcre2_source() - for directory in source_include_dirs: - _extend_unique(include_dirs, directory) - for directory in source_library_dirs: - _extend_unique(library_dirs, directory) - for path in source_library_files: - _extend_unique(library_files, path) - - cflags = _run_pkg_config("--cflags") - libs = _run_pkg_config("--libs") - - for flag in cflags: - if flag.startswith("-I") and len(flag) > 2: - _extend_unique(include_dirs, flag[2:]) - elif flag.startswith("-D") and len(flag) > 2: - name_value = flag[2:].split("=", 1) - define_macros.append((name_value[0], name_value[1] if len(name_value) > 1 else None)) - else: - extra_compile_args.append(flag) - - for flag in libs: - if flag.startswith("-L") and len(flag) > 2: - _extend_unique(library_dirs, flag[2:]) - elif flag.startswith("-l") and len(flag) > 2: - _extend_unique(libraries, flag[2:]) - else: - extra_link_args.append(flag) - - env_include = os.environ.get("PCRE2_INCLUDE_DIR") - if env_include: - for path in env_include.split(os.pathsep): - _extend_unique(include_dirs, path) - - env_lib = os.environ.get("PCRE2_LIBRARY_DIR") - if env_lib: - for path in env_lib.split(os.pathsep): - _extend_unique(library_dirs, path) - - env_lib_path = os.environ.get("PCRE2_LIBRARY_PATH") - if env_lib_path: - for raw_path in env_lib_path.split(os.pathsep): - candidate = raw_path.strip() - if not candidate: - continue - path = Path(candidate) - if path.is_file() or any(candidate.endswith(ext) for ext in LIB_EXTENSIONS): - _extend_unique(library_files, str(path)) - parent = str(path.parent) - if parent: - _extend_unique(library_dirs, parent) - else: - _extend_unique(library_dirs, candidate) - - env_libs = os.environ.get("PCRE2_LIBRARIES") - if env_libs: - for name in env_libs.split(os.pathsep): - _extend_unique(libraries, name) - - if not library_files: - for path in _find_library_with_pkg_config(): - _extend_unique(library_files, path) - - if not library_files: - directory_candidates = [Path(p) for p in library_dirs] - directory_candidates.extend(Path(p) for p in _discover_library_dirs()) - for directory in directory_candidates: - located = _locate_library_file(directory) - if located is not None: - _extend_unique(library_files, str(located)) - break - - if not library_files: - for path in _find_library_with_ldconfig(): - _extend_unique(library_files, path) - - if not library_files: - for path in _find_library_with_brew(): - _extend_unique(library_files, path) - - env_cflags = os.environ.get("PCRE2_CFLAGS") - if env_cflags: - extra_compile_args.extend(shlex.split(env_cflags)) - - env_ldflags = os.environ.get("PCRE2_LDFLAGS") - if env_ldflags: - extra_link_args.extend(shlex.split(env_ldflags)) - - if not any(flag.startswith("-std=") for flag in extra_compile_args): - extra_compile_args.append("-std=c99") - - if not _has_header(include_dirs): - include_dirs.extend(_discover_include_dirs()) - - if not _has_library(library_dirs): - library_dirs.extend(_discover_library_dirs()) - - if library_files: - linkable_files: list[str] = [] - for path in library_files: - suffix = Path(path).suffix.lower() - if suffix == ".dll": - continue - linkable_files.append(path) - - if linkable_files: - libraries = [lib for lib in libraries if lib != "pcre2-8"] - for path in linkable_files: - _extend_unique(extra_link_args, path) - parent = str(Path(path).parent) - if parent: - _extend_unique(library_dirs, parent) - elif "pcre2-8" not in libraries: - libraries.append("pcre2-8") - elif "pcre2-8" not in libraries: - libraries.append("pcre2-8") - - if sys.platform.startswith("linux") and "dl" not in libraries: - libraries.append("dl") - - _augment_compile_flags(extra_compile_args) - print(extra_compile_args) - - return { - "include_dirs": include_dirs, - "library_dirs": library_dirs, - "libraries": libraries, - "extra_compile_args": extra_compile_args, - "extra_link_args": extra_link_args, - "define_macros": define_macros, - } +from setup_utils import MODULE_SOURCES, collect_build_config EXTENSION = Extension( name="pcre.cpcre2", sources=MODULE_SOURCES, - **_collect_build_config(), + **collect_build_config(), ) - setup(ext_modules=[EXTENSION]) diff --git a/setup_utils.py b/setup_utils.py new file mode 100644 index 0000000..0b1451c --- /dev/null +++ b/setup_utils.py @@ -0,0 +1,833 @@ +# SPDX-FileCopyrightText: 2025 ModelCloud.ai +# SPDX-FileCopyrightText: 2025 qubitium@modelcloud.ai +# SPDX-License-Identifier: Apache-2.0 +# Contact: qubitium@modelcloud.ai, x.com/qubitium + +from __future__ import annotations + +import os +import platform +import shlex +import shutil +import subprocess +import sys +import tempfile +from collections.abc import Callable +from pathlib import Path + +try: + from setuptools._distutils.ccompiler import CCompiler, new_compiler + from setuptools._distutils.errors import CCompilerError, DistutilsExecError + from setuptools._distutils.sysconfig import customize_compiler +except ImportError: # pragma: no cover - fallback for older Python environments + from distutils.ccompiler import CCompiler, new_compiler # type: ignore + from distutils.errors import CCompilerError, DistutilsExecError # type: ignore + from distutils.sysconfig import customize_compiler # type: ignore + + +ROOT_DIR = Path(__file__).resolve().parent +PCRE_EXT_DIR = ROOT_DIR / "pcre_ext" +PCRE2_REPO_URL = "https://github.com/PCRE2Project/pcre2.git" +PCRE2_TAG = "pcre2-10.46" + + +MODULE_SOURCES = [ + "pcre_ext/pcre2.c", + "pcre_ext/error.c", + "pcre_ext/cache.c", + "pcre_ext/flag.c", + "pcre_ext/util.c", + "pcre_ext/memory.c", +] + +LIB_EXTENSIONS = [ + ".so", + ".so.0", + ".so.1", + ".a", + ".dylib", + ".sl", +] + +LIBRARY_BASENAME = "libpcre2-8" + +__all__ = ["MODULE_SOURCES", "collect_build_config"] + + +def _run_pkg_config(*args: str) -> list[str]: + try: + result = subprocess.run( + ["pkg-config", *args, "libpcre2-8"], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + except (FileNotFoundError, subprocess.CalledProcessError): + return [] + return shlex.split(result.stdout.strip()) + + +def _run_pkg_config_var(argument: str) -> str | None: + try: + result = subprocess.run( + ["pkg-config", argument, "libpcre2-8"], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + except (FileNotFoundError, subprocess.CalledProcessError): + return None + return result.stdout.strip() or None + + +def _run_command(command: list[str]) -> str | None: + try: + result = subprocess.run( + command, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + except (FileNotFoundError, subprocess.CalledProcessError): + return None + return result.stdout.strip() or None + + +_COMPILER_INITIALIZED = False +_COMPILER_INSTANCE: CCompiler | None = None +_COMPILER_FLAG_CACHE: dict[str, bool] = {} +_TRUTHY_VALUES = {"1", "true", "yes", "on"} + + +def _is_truthy_env(name: str) -> bool: + value = os.environ.get(name) + if value is None: + return False + return value.strip().lower() in _TRUTHY_VALUES + + +def _is_windows_platform() -> bool: + return sys.platform.startswith("win") or os.name == "nt" + + +def _is_wsl_environment() -> bool: + if not sys.platform.startswith("linux"): + return False + if os.environ.get("WSL_DISTRO_NAME"): + return True + try: + release = platform.release() + except Exception: + return False + return "microsoft" in release.lower() + + +def _prepare_pcre2_source() -> tuple[list[str], list[str], list[str]]: + if _is_windows_platform() and not _is_wsl_environment(): + os.environ["PCRE2_BUILD_FROM_SOURCE"] = "1" + + if not _is_truthy_env("PCRE2_BUILD_FROM_SOURCE"): + return ([], [], []) + + destination = PCRE_EXT_DIR / PCRE2_TAG + git_dir = destination / ".git" + + if destination.exists() and not git_dir.is_dir(): + raise RuntimeError( + f"Existing directory {destination} is not a git checkout; remove or rename it before building" + ) + + if not destination.exists(): + clone_command = [ + "git", + "clone", + "--depth", + "1", + "--branch", + PCRE2_TAG, + "--recurse-submodules", + "--shallow-submodules", + PCRE2_REPO_URL, + str(destination), + ] + try: + subprocess.run(clone_command, check=True) + except FileNotFoundError as exc: # pragma: no cover - git missing on build host + raise RuntimeError("git is required to fetch PCRE2 sources when PCRE2_BUILD_FROM_SOURCE=1") from exc + except subprocess.CalledProcessError as exc: + raise RuntimeError( + "Failed to clone PCRE2 source from official repository; see the output above for details" + ) from exc + + try: + subprocess.run( + ["git", "submodule", "update", "--init", "--recursive"], + cwd=destination, + check=True, + ) + except FileNotFoundError as exc: # pragma: no cover - git missing on build host + raise RuntimeError("git with submodule support is required to fetch PCRE2 dependencies") from exc + except subprocess.CalledProcessError as exc: + raise RuntimeError( + "Failed to update PCRE2 git submodules; see the output above for details" + ) from exc + + build_dir = destination / "build" + build_roots = [ + destination, + destination / ".libs", + destination / "src", + destination / "src" / ".libs", + build_dir, + build_dir / "lib", + build_dir / "bin", + build_dir / "Release", + build_dir / "Debug", + build_dir / "RelWithDebInfo", + build_dir / "MinSizeRel", + ] + + def _has_built_library() -> bool: + patterns = [ + "libpcre2-8.so", + "libpcre2-8.so.*", + "libpcre2-8.a", + "libpcre2-8.dylib", + "libpcre2-8.lib", + "pcre2-8.dll", + ] + for root in build_roots: + if not root.exists(): + continue + for pattern in patterns: + if any(root.glob(f"**/{pattern}")): + return True + return False + + if not _has_built_library(): + env = os.environ.copy() + build_succeeded = False + cmake_error: Exception | None = None + + if shutil.which("cmake"): + try: + cmake_args = [ + "cmake", + "-S", + str(destination), + "-B", + str(build_dir), + "-DPCRE2_SUPPORT_JIT=ON", + "-DPCRE2_BUILD_PCRE2_16=ON", + "-DPCRE2_BUILD_TESTS=OFF", + "-DBUILD_SHARED_LIBS=ON", + ] + if not _is_windows_platform(): + cmake_args.append("-DCMAKE_BUILD_TYPE=Release") + subprocess.run(cmake_args, cwd=destination, env=env, check=True) + + build_command = [ + "cmake", + "--build", + str(build_dir), + ] + if _is_windows_platform(): + build_command.extend(["--config", "Release"]) + build_command.extend(["--", "-j4"]) + subprocess.run(build_command, cwd=destination, env=env, check=True) + except (FileNotFoundError, subprocess.CalledProcessError) as exc: + cmake_error = exc + else: + build_succeeded = True + + if not build_succeeded: + autoconf_script = destination / "configure" + autoconf_ready = autoconf_script.exists() and not _is_windows_platform() + + if autoconf_ready: + build_dir.mkdir(parents=True, exist_ok=True) + try: + configure_command = [ + str(autoconf_script), + "--enable-jit", + "--enable-pcre2-8", + "--disable-tests", + ] + subprocess.run(configure_command, cwd=build_dir, env=env, check=True) + subprocess.run(["make", "-j4"], cwd=build_dir, env=env, check=True) + except FileNotFoundError as exc: + raise RuntimeError( + "Building PCRE2 from source via Autoconf requires the GNU build toolchain (configure/make) to be available on PATH" + ) from exc + except subprocess.CalledProcessError as exc: + raise RuntimeError( + "Failed to build PCRE2 from source using Autoconf; see the output above for details" + ) from exc + else: + build_succeeded = True + elif cmake_error is not None and isinstance(cmake_error, subprocess.CalledProcessError): + raise RuntimeError( + "Failed to build PCRE2 from source; see the output above for details" + ) from cmake_error + + if not build_succeeded: + raise RuntimeError( + "PCRE2 build tooling was not found. Install CMake or Autoconf (configure/make) to build from source." + ) + + header_source = destination / "src" / "pcre2.h.generic" + header_target = destination / "src" / "pcre2.h" + if header_source.exists() and not header_target.exists(): + shutil.copy2(header_source, header_target) + + include_target = PCRE_EXT_DIR / "pcre2.h" + if header_target.exists(): + shutil.copy2(header_target, include_target) + + include_dirs: list[str] = [] + library_dirs: list[str] = [] + library_files: list[str] = [] + seen_includes: set[str] = set() + seen_lib_dirs: set[str] = set() + seen_lib_files: set[str] = set() + + def _add_include(path: Path) -> None: + path = path.resolve() + path_str = str(path) + if path.is_dir() and path_str not in seen_includes: + include_dirs.append(path_str) + seen_includes.add(path_str) + + def _add_library_file(path: Path) -> None: + path = path.resolve() + if not path.is_file(): + return + path_str = str(path) + if path_str not in seen_lib_files: + library_files.append(path_str) + seen_lib_files.add(path_str) + parent = str(path.parent.resolve()) + if parent not in seen_lib_dirs: + library_dirs.append(parent) + seen_lib_dirs.add(parent) + + include_dir = destination / "src" + _add_include(include_dir) + + search_roots = [ + destination, + destination / "src", + destination / ".libs", + destination / "src" / ".libs", + build_dir, + build_dir / "lib", + build_dir / "bin", + build_dir / "Release", + build_dir / "Debug", + build_dir / "RelWithDebInfo", + build_dir / "MinSizeRel", + ] + search_patterns = [ + f"**/{LIBRARY_BASENAME}.lib", + f"**/{LIBRARY_BASENAME}.a", + f"**/{LIBRARY_BASENAME}.so", + f"**/{LIBRARY_BASENAME}.so.*", + f"**/{LIBRARY_BASENAME}.dylib", + "**/pcre2-8.lib", + "**/pcre2-8.dll", + "**/pcre2-8-static.lib", + "**/pcre2-8-static.dll", + ] + + for root in search_roots: + if not root.exists(): + continue + for pattern in search_patterns: + for path in root.glob(pattern): + _add_library_file(path) + + if not library_files: + raise RuntimeError( + "PCRE2 build did not produce any libpcre2-8 artifacts; check the build output for errors" + ) + + return (include_dirs, library_dirs, library_files) + + +def _get_test_compiler() -> CCompiler | None: + global _COMPILER_INITIALIZED, _COMPILER_INSTANCE + if _COMPILER_INITIALIZED: + return _COMPILER_INSTANCE + _COMPILER_INITIALIZED = True + try: + compiler = new_compiler() + customize_compiler(compiler) + except Exception: + _COMPILER_INSTANCE = None + else: + _COMPILER_INSTANCE = compiler + return _COMPILER_INSTANCE + + +def _extract_macos_architectures(command: list[str] | tuple[str, ...] | None) -> list[str]: + if not isinstance(command, (list, tuple)): + return [] + arches: list[str] = [] + iterator = iter(command) + for token in iterator: + if token != "-arch": + continue + arch = next(iterator, "") + if arch: + arches.append(arch) + return arches + + +def _macos_compiler_architectures(compiler: CCompiler | None) -> set[str]: + arches: set[str] = set() + if compiler is not None: + for attr in ("compiler", "compiler_so", "compiler_cxx", "linker_so"): + arches.update(_extract_macos_architectures(getattr(compiler, attr, None))) + archflags = os.environ.get("ARCHFLAGS") + if archflags: + arches.update(_extract_macos_architectures(tuple(shlex.split(archflags)))) + for env_name in ("CFLAGS", "CPPFLAGS"): + value = os.environ.get(env_name) + if value: + arches.update(_extract_macos_architectures(tuple(shlex.split(value)))) + return {arch for arch in arches if arch} + + +def _is_x86_architecture(arch: str) -> bool: + normalized = arch.lower() + return normalized in {"x86_64", "x86_64h", "i386", "i486", "i586", "i686", "amd64"} + + +def _should_disable_native_flags_for_macos(compiler: CCompiler | None) -> bool: + if sys.platform != "darwin": + return False + arches = _macos_compiler_architectures(compiler) + if not arches: + machine = platform.machine() + if machine: + arches.add(machine) + if not arches: + return False + if len(arches) > 1: + return True + arch = next(iter(arches)) + return not _is_x86_architecture(arch) + + +def _compiler_supports_flag(flag: str) -> bool: + cached = _COMPILER_FLAG_CACHE.get(flag) + if cached is not None: + return cached + + compiler = _get_test_compiler() + if compiler is None: + _COMPILER_FLAG_CACHE[flag] = False + return False + + with tempfile.TemporaryDirectory() as tmpdir: + source = Path(tmpdir) / "flag_check.c" + source.write_text("int main(void) { return 0; }\n", encoding="utf-8") + try: + compiler.compile( + [str(source)], + output_dir=tmpdir, + extra_postargs=[flag], + ) + except (CCompilerError, DistutilsExecError, OSError): + _COMPILER_FLAG_CACHE[flag] = False + else: + _COMPILER_FLAG_CACHE[flag] = True + return _COMPILER_FLAG_CACHE[flag] + + +def _augment_compile_flags(flags: list[str]) -> None: + if _is_truthy_env("PCRE2_DISABLE_OPT_FLAGS"): + return + + disable_native = _is_truthy_env("PCRE2_DISABLE_NATIVE_FLAGS") + compiler = _get_test_compiler() + if not disable_native and _should_disable_native_flags_for_macos(compiler): + # Apple universal builds (arm64 + x86_64) and arm64-only builds reject x86 specific flags. + disable_native = True + candidate_flags: list[tuple[str, bool]] = [ + ("-O3", False), + ("-march=native", True), + ("-mtune=native", True), + ("-fomit-frame-pointer", False), + ("-funroll-loops", False), + #("-falign-loops=32", False), + ] + + seen = set(flags) + for flag, requires_native in candidate_flags: + if requires_native and disable_native: + continue + if flag in seen: + continue + if not _compiler_supports_flag(flag): + continue + flags.append(flag) + seen.add(flag) + + +def _homebrew_prefixes() -> list[Path]: + if sys.platform != "darwin": + return [] + + prefixes: list[Path] = [] + for args in (["brew", "--prefix", "pcre2"], ["brew", "--prefix"]): + output = _run_command(args) + if not output: + continue + path = Path(output) + if path.exists(): + prefixes.append(path) + return prefixes + + +def _linux_multiarch_dirs() -> list[str]: + arch = platform.machine() + mapping = { + "x86_64": ["x86_64-linux-gnu"], + "aarch64": ["aarch64-linux-gnu"], + "arm64": ["aarch64-linux-gnu"], + "armv7l": ["arm-linux-gnueabihf"], + "armv6l": ["arm-linux-gnueabihf"], + "armv8l": ["arm-linux-gnueabihf"], + "i686": ["i386-linux-gnu"], + "i386": ["i386-linux-gnu"], + "ppc64le": ["powerpc64le-linux-gnu"], + "s390x": ["s390x-linux-gnu"], + } + return mapping.get(arch, []) + + +def _platform_prefixes() -> list[Path]: + prefixes: list[Path] = [] + + env_root = os.environ.get("PCRE2_ROOT") + if env_root: + for value in env_root.split(os.pathsep): + path = Path(value) + if path.exists(): + prefixes.append(path) + + if sys.platform.startswith("linux"): + prefixes.extend(Path(p) for p in ("/usr/local", "/usr")) + elif sys.platform == "darwin": + prefixes.extend(_homebrew_prefixes()) + prefixes.extend(Path(p) for p in ("/opt/homebrew", "/usr/local", "/usr")) + elif sys.platform.startswith("freebsd"): + prefixes.extend(Path(p) for p in ("/usr/local", "/usr")) + elif sys.platform.startswith("sunos") or sys.platform.startswith("solaris"): + prefixes.extend(Path(p) for p in ("/usr", "/usr/local", "/opt/local")) + else: + prefixes.extend(Path(p) for p in ("/usr/local", "/usr")) + + seen: set[Path] = set() + ordered: list[Path] = [] + for prefix in prefixes: + if prefix not in seen: + ordered.append(prefix) + seen.add(prefix) + return ordered + + +def _platform_library_subdirs() -> list[str]: + subdirs = ["lib", "lib64", "lib32", "lib/pcre2"] + + if sys.platform.startswith("linux"): + for multiarch in _linux_multiarch_dirs(): + subdirs.append(f"lib/{multiarch}") + subdirs.extend([ + "lib/x86_64-linux-gnu", + "lib/i386-linux-gnu", + "lib/aarch64-linux-gnu", + "lib/arm-linux-gnueabihf", + ]) + elif sys.platform.startswith("sunos") or sys.platform.startswith("solaris"): + subdirs.extend(["lib/64", "lib/amd64"]) + + seen: set[str] = set() + ordered: list[str] = [] + for subdir in subdirs: + if subdir not in seen: + ordered.append(subdir) + seen.add(subdir) + return ordered + + +def _extend_unique(target: list[str], value: str) -> None: + if value and value not in target: + target.append(value) + + +def _extend_with_existing( + target: list[str], + candidates: list[Path], + predicate: Callable[[Path], bool] | None = None, +) -> None: + for candidate in candidates: + if not candidate.is_dir(): + continue + if predicate is not None and not predicate(candidate): + continue + _extend_unique(target, str(candidate)) + + +def _extend_env_paths(target: list[str], env_var: str) -> None: + value = os.environ.get(env_var) + if not value: + return + for raw_path in value.split(os.pathsep): + candidate = raw_path.strip() + if candidate: + _extend_unique(target, candidate) + +def _header_exists(directory: Path) -> bool: + return (directory / "pcre2.h").exists() + + +def _library_exists(directory: Path) -> bool: + return _locate_library_file(directory) is not None + + +def _locate_library_file(directory: Path) -> Path | None: + if not directory.exists(): + return None + for extension in LIB_EXTENSIONS: + candidate = directory / f"{LIBRARY_BASENAME}{extension}" + if candidate.exists(): + return candidate + for candidate in directory.glob(f"{LIBRARY_BASENAME}.so.*"): + if candidate.exists(): + return candidate + fallback = directory / f"{LIBRARY_BASENAME}.dll" + if fallback.exists(): + return fallback + return None + + +def _find_library_with_pkg_config() -> list[str]: + library_files: list[str] = [] + libfile = _run_pkg_config_var("--variable=libfile") + if libfile: + path = Path(libfile) + if path.exists(): + library_files.append(str(path)) + if not library_files: + libdir = _run_pkg_config_var("--variable=libdir") + if libdir: + candidate = _locate_library_file(Path(libdir)) + if candidate is not None: + library_files.append(str(candidate)) + return library_files + + +def _find_library_with_ldconfig() -> list[str]: + if not sys.platform.startswith("linux"): + return [] + output = _run_command(["ldconfig", "-p"]) + if not output: + return [] + for line in output.splitlines(): + if "libpcre2-8.so" not in line: + continue + parts = line.strip().split(" => ") + if len(parts) != 2: + continue + path = Path(parts[1].strip()) + if path.exists(): + return [str(path)] + return [] + + +def _find_library_with_brew() -> list[str]: + if sys.platform != "darwin": + return [] + library_files: list[str] = [] + for prefix in _homebrew_prefixes(): + candidate = _locate_library_file(prefix / "lib") + if candidate is not None: + library_files.append(str(candidate)) + return library_files + + +def _discover_include_dirs() -> list[str]: + prefixes = _platform_prefixes() + candidates: list[Path] = [] + for prefix in prefixes: + candidates.extend( + [ + prefix / "include", + prefix / "include/pcre2", + ] + ) + include_dirs: list[str] = [] + _extend_with_existing(include_dirs, candidates, _header_exists) + return include_dirs + + +def _discover_library_dirs() -> list[str]: + prefixes = _platform_prefixes() + candidates: list[Path] = [] + subdirs = _platform_library_subdirs() + for prefix in prefixes: + for subdir in subdirs: + candidates.append(prefix / subdir) + library_dirs: list[str] = [] + _extend_with_existing(library_dirs, candidates, _library_exists) + return library_dirs + + +def _has_header(include_dirs: list[str]) -> bool: + for directory in include_dirs: + if _header_exists(Path(directory)): + return True + return False + + +def _has_library(library_dirs: list[str]) -> bool: + for directory in library_dirs: + if _library_exists(Path(directory)): + return True + return False + + +def collect_build_config() -> dict[str, list[str] | list[tuple[str, str | None]]]: + include_dirs: list[str] = [] + library_dirs: list[str] = [] + libraries: list[str] = [] + extra_compile_args: list[str] = [] + extra_link_args: list[str] = [] + define_macros: list[tuple[str, str | None]] = [] + library_files: list[str] = [] + + source_include_dirs, source_library_dirs, source_library_files = _prepare_pcre2_source() + for directory in source_include_dirs: + _extend_unique(include_dirs, directory) + for directory in source_library_dirs: + _extend_unique(library_dirs, directory) + for path in source_library_files: + _extend_unique(library_files, path) + + cflags = _run_pkg_config("--cflags") + libs = _run_pkg_config("--libs") + + for flag in cflags: + if flag.startswith("-I") and len(flag) > 2: + _extend_unique(include_dirs, flag[2:]) + elif flag.startswith("-D") and len(flag) > 2: + name_value = flag[2:].split("=", 1) + define_macros.append((name_value[0], name_value[1] if len(name_value) > 1 else None)) + else: + extra_compile_args.append(flag) + + for flag in libs: + if flag.startswith("-L") and len(flag) > 2: + _extend_unique(library_dirs, flag[2:]) + elif flag.startswith("-l") and len(flag) > 2: + _extend_unique(libraries, flag[2:]) + else: + extra_link_args.append(flag) + + _extend_env_paths(include_dirs, "PCRE2_INCLUDE_DIR") + + _extend_env_paths(library_dirs, "PCRE2_LIBRARY_DIR") + + env_lib_path = os.environ.get("PCRE2_LIBRARY_PATH") + if env_lib_path: + for raw_path in env_lib_path.split(os.pathsep): + candidate = raw_path.strip() + if not candidate: + continue + path = Path(candidate) + if path.is_file() or any(candidate.endswith(ext) for ext in LIB_EXTENSIONS): + _extend_unique(library_files, str(path)) + parent = str(path.parent) + if parent: + _extend_unique(library_dirs, parent) + else: + _extend_unique(library_dirs, candidate) + + _extend_env_paths(libraries, "PCRE2_LIBRARIES") + + if not library_files: + for path in _find_library_with_pkg_config(): + _extend_unique(library_files, path) + + if not library_files: + directory_candidates = [Path(p) for p in library_dirs] + directory_candidates.extend(Path(p) for p in _discover_library_dirs()) + for directory in directory_candidates: + located = _locate_library_file(directory) + if located is not None: + _extend_unique(library_files, str(located)) + break + + if not library_files: + for path in _find_library_with_ldconfig(): + _extend_unique(library_files, path) + + if not library_files: + for path in _find_library_with_brew(): + _extend_unique(library_files, path) + + env_cflags = os.environ.get("PCRE2_CFLAGS") + if env_cflags: + extra_compile_args.extend(shlex.split(env_cflags)) + + env_ldflags = os.environ.get("PCRE2_LDFLAGS") + if env_ldflags: + extra_link_args.extend(shlex.split(env_ldflags)) + + if not any(flag.startswith("-std=") for flag in extra_compile_args): + extra_compile_args.append("-std=c99") + + if not _has_header(include_dirs): + include_dirs.extend(_discover_include_dirs()) + + if not _has_library(library_dirs): + library_dirs.extend(_discover_library_dirs()) + + if library_files: + linkable_files: list[str] = [] + for path in library_files: + suffix = Path(path).suffix.lower() + if suffix == ".dll": + continue + linkable_files.append(path) + + if linkable_files: + libraries = [lib for lib in libraries if lib != "pcre2-8"] + for path in linkable_files: + _extend_unique(extra_link_args, path) + parent = str(Path(path).parent) + if parent: + _extend_unique(library_dirs, parent) + elif "pcre2-8" not in libraries: + libraries.append("pcre2-8") + elif "pcre2-8" not in libraries: + libraries.append("pcre2-8") + + if sys.platform.startswith("linux") and "dl" not in libraries: + libraries.append("dl") + + _augment_compile_flags(extra_compile_args) + + return { + "include_dirs": include_dirs, + "library_dirs": library_dirs, + "libraries": libraries, + "extra_compile_args": extra_compile_args, + "extra_link_args": extra_link_args, + "define_macros": define_macros, + } From a3d17ae3d74dc9055662fcef16aa3349339f78b2 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Thu, 9 Oct 2025 11:07:34 +0000 Subject: [PATCH 3/4] fix wrong module. transformers use regex, not re. it was an import alias from regex as re --- tests/transformers_regex_usages.jsonl | 970 +++++++++++++------------- 1 file changed, 485 insertions(+), 485 deletions(-) diff --git a/tests/transformers_regex_usages.jsonl b/tests/transformers_regex_usages.jsonl index 1b38530..8a0b31e 100644 --- a/tests/transformers_regex_usages.jsonl +++ b/tests/transformers_regex_usages.jsonl @@ -1,485 +1,485 @@ -{"file": "./setup.py", "line": 197, "col": 26, "module": "re", "call": "findall", "pattern": "^(([^!=<>~ ]+)(?:[!=<>~ ].*)?$)", "test_string": "Example likes !!A."} -{"file": "./utils/check_inits.py", "line": 50, "col": 14, "module": "re", "call": "compile", "pattern": "is\\_([a-z_]*)_available()", "test_string": "Example likes is m alpha."} -{"file": "./utils/check_inits.py", "line": 52, "col": 29, "module": "re", "call": "compile", "pattern": "^_import_structure\\s+=\\s+\\{([^\\}]+)\\}", "test_string": "Example likes alpha alpha=x{a}."} -{"file": "./utils/check_inits.py", "line": 54, "col": 30, "module": "re", "call": "compile", "pattern": "\\s+\"\\S*\":\\s+\\[([^\\]]*)\\]", "test_string": "Example likes x\"x\":x[a]."} -{"file": "./utils/check_inits.py", "line": 56, "col": 19, "module": "re", "call": "compile", "pattern": "^\\s*if\\s+not\\s+is\\_[a-z_]*\\_available\\(\\)", "test_string": "Example likes alpha m alpha()."} -{"file": "./utils/check_inits.py", "line": 58, "col": 28, "module": "re", "call": "compile", "pattern": "^\\s*_import_structure\\[\"\\S*\"\\]\\.append\\(\"(\\S*)\"\\)", "test_string": "Example likes x alpha alpha[\"x\"].alpha(\"x\")."} -{"file": "./utils/check_inits.py", "line": 60, "col": 29, "module": "re", "call": "compile", "pattern": "^\\s*_import_structure\\[\\S*\\](?:\\.extend\\(|\\s*=\\s+)\\[([^\\]]*)\\]", "test_string": "Example likes x alpha alpha[x].alpha([a]."} -{"file": "./utils/check_inits.py", "line": 62, "col": 19, "module": "re", "call": "compile", "pattern": "^\\s+\"([^\"]+)\",", "test_string": "Example likes x\"a\",."} -{"file": "./utils/check_inits.py", "line": 64, "col": 23, "module": "re", "call": "compile", "pattern": "^\\s+\\[([^\\]]+)\\]", "test_string": "Example likes x[a]."} -{"file": "./utils/check_inits.py", "line": 66, "col": 13, "module": "re", "call": "compile", "pattern": "\\s+from\\s+\\S*\\s+import\\s+([^\\(\\s].*)\\n", "test_string": "Example likes alpha(A."} -{"file": "./utils/check_inits.py", "line": 68, "col": 10, "module": "re", "call": "compile", "pattern": "^\\s*try:", "test_string": "Example likes alpha:."} -{"file": "./utils/check_inits.py", "line": 70, "col": 11, "module": "re", "call": "compile", "pattern": "^\\s*else:", "test_string": "Example likes alpha:."} -{"file": "./utils/check_inits.py", "line": 124, "col": 22, "module": "re", "call": "findall", "pattern": "\\[([^\\]]+)\\]", "test_string": "Example likes [a]."} -{"file": "./utils/check_inits.py", "line": 335, "col": 37, "module": "re", "call": "findall", "pattern": "import_structure\\[\\\"([^\\\"]*)\\\"\\]", "test_string": "Example likes alpha alpha[\"a\"]."} -{"file": "./utils/check_dummies.py", "line": 48, "col": 14, "module": "re", "call": "compile", "pattern": "is\\_([a-z_]*)_available()", "test_string": "Example likes is m alpha."} -{"file": "./utils/check_dummies.py", "line": 50, "col": 25, "module": "re", "call": "compile", "pattern": "\\s+from\\s+\\S*\\s+import\\s+([^\\(\\s].*)\\n", "test_string": "Example likes alpha(A."} -{"file": "./utils/check_dummies.py", "line": 52, "col": 19, "module": "re", "call": "compile", "pattern": "^\\s+if\\s+not\\s+\\(?is\\_[a-z_]*\\_available\\(\\)", "test_string": "Example likes alpha(is m alpha()."} -{"file": "./utils/check_config_docstrings.py", "line": 34, "col": 17, "module": "re", "call": "compile", "pattern": "\\[(.+?)\\]\\((https://huggingface\\.co/.+?)\\)", "test_string": "http://example.com/docs"} -{"file": "./utils/add_dates.py", "line": 65, "col": 16, "module": "re", "call": "findall", "pattern": "https://huggingface\\.co/papers/\\d+\\.\\d+", "test_string": "http://example.com/docs"} -{"file": "./utils/add_dates.py", "line": 66, "col": 17, "module": "re", "call": "findall", "pattern": "https://arxiv\\.org/abs/\\d+\\.\\d+", "test_string": "http://example.com/docs"} -{"file": "./utils/add_dates.py", "line": 67, "col": 17, "module": "re", "call": "findall", "pattern": "https://arxiv\\.org/pdf/\\d+\\.\\d+", "test_string": "http://example.com/docs"} -{"file": "./utils/add_dates.py", "line": 152, "col": 18, "module": "re", "call": "findall", "pattern": "https://arxiv\\.org/abs/(\\d+\\.\\d+)", "test_string": "http://example.com/docs"} -{"file": "./utils/add_dates.py", "line": 153, "col": 19, "module": "re", "call": "findall", "pattern": "https://arxiv\\.org/pdf/(\\d+\\.\\d+)", "test_string": "http://example.com/docs"} -{"file": "./utils/add_dates.py", "line": 72, "col": 26, "module": "re", "call": "findall", "pattern": "https://[^\\s\\)]+", "test_string": "http://example.com/docs"} -{"file": "./utils/add_dates.py", "line": 204, "col": 23, "module": "re", "call": "finditer", "pattern": "-->", "test_string": "Example likes >."} -{"file": "./utils/add_dates.py", "line": 212, "col": 27, "module": "re", "call": "finditer", "pattern": "-->", "test_string": "Example likes >."} -{"file": "./utils/scan_skipped_tests.py", "line": 55, "col": 19, "module": "re", "call": "search", "pattern": "reason\\s*=\\s*[\"\\'](.*?)[\"\\']", "test_string": "Example likes alpha=x\"A\"."} -{"file": "./utils/scan_skipped_tests.py", "line": 58, "col": 19, "module": "re", "call": "search", "pattern": "\\((?:.*?,\\s*)?[\"\\'](.*?)[\"\\']\\)", "test_string": "Example likes (A,x\"A\")."} -{"file": "./utils/scan_skipped_tests.py", "line": 70, "col": 14, "module": "re", "call": "compile", "pattern": "((?:^\\s*@.*?\\n)*?)^\\s*def\\s+(test_[A-Za-z0-9_]+)\\b", "test_string": "alice@example.com"} -{"file": "./utils/scan_skipped_tests.py", "line": 40, "col": 25, "module": "re", "call": "findall", "pattern": "^\\s*def\\s+(test_[A-Za-z0-9_]+)", "test_string": "Example likes alpha M."} -{"file": "./utils/update_metadata.py", "line": 55, "col": 16, "module": "re", "call": "compile", "pattern": "(.*)(?:Model|Encoder|Decoder|ForConditionalGeneration|ForRetrieval)", "test_string": "Example likes alpha."} -{"file": "./utils/modular_model_detector.py", "line": 579, "col": 14, "module": "re", "call": "compile", "pattern": "(?:^|[\\*_`\\s>])(?:this|the)\\s+model\\s+was\\s+released\\s+on\\s+(\\d{4}-\\d{2}-\\d{2})\\b", "test_string": "Example likes alpha xx xx."} -{"file": "./utils/modular_model_detector.py", "line": 171, "col": 11, "module": "re", "call": "sub", "pattern": "(\"\"\"|\\'\\'\\')(?:.|\\n)*?\\1", "test_string": "Example likes \"\"\"A\"\"\"."} -{"file": "./utils/modular_model_detector.py", "line": 172, "col": 11, "module": "re", "call": "sub", "pattern": "#.*", "test_string": "Example likes #A."} -{"file": "./utils/modular_model_detector.py", "line": 158, "col": 11, "module": "re", "call": "sub", "pattern": "[^a-z0-9]+", "test_string": "Example likes m."} -{"file": "./utils/modular_model_detector.py", "line": 186, "col": 15, "module": "re", "call": "findall", "pattern": "\\b[a-zA-Z_][a-zA-Z0-9_]*\\b", "test_string": "Example likes mm."} -{"file": "./utils/modular_model_detector.py", "line": 199, "col": 12, "module": "re", "call": "match", "pattern": "^([A-Z][a-z0-9]+)", "test_string": "Example likes Mm."} -{"file": "./utils/modular_model_detector.py", "line": 199, "col": 52, "module": "re", "call": "match", "pattern": "^([A-Za-z0-9]+)", "test_string": "Example likes M."} -{"file": "./utils/modular_model_detector.py", "line": 220, "col": 21, "module": "re", "call": "sub", "pattern": "\\d+", "test_string": "Example likes x."} -{"file": "./utils/modular_model_detector.py", "line": 226, "col": 25, "module": "re", "call": "sub", "pattern": "\\d+", "test_string": "Example likes x."} -{"file": "./utils/modular_model_detector.py", "line": 173, "col": 63, "module": "re", "call": "match", "pattern": "\\s*(from|import)\\s+", "test_string": "Example likes alpha."} -{"file": "./utils/sort_auto_mappings.py", "line": 45, "col": 20, "module": "re", "call": "compile", "pattern": "[A-Z_]+_MAPPING(\\s+|_[A-Z_]+\\s+)=\\s+OrderedDict", "test_string": "Example likes M alpha Mx=alpha."} -{"file": "./utils/sort_auto_mappings.py", "line": 47, "col": 17, "module": "re", "call": "compile", "pattern": "\\s*\\(\\s*\"(\\S[^\"]+)\"", "test_string": "Example likes x(x\"xa\"."} -{"file": "./utils/sort_auto_mappings.py", "line": 71, "col": 25, "module": "re", "call": "search", "pattern": "^(\\s*)\\S", "test_string": "Example likes xx."} -{"file": "./utils/modular_model_converter.py", "line": 1688, "col": 14, "module": "re", "call": "search", "pattern": "modular_(.*)(?=\\.py$)", "test_string": "Example likes alpha A.py."} -{"file": "./utils/modular_model_converter.py", "line": 130, "col": 11, "module": "re", "call": "findall", "pattern": "# Copied from", "test_string": "Example likes # alpha alpha."} -{"file": "./utils/modular_model_converter.py", "line": 993, "col": 15, "module": "re", "call": "match", "pattern": "\\ndef .*\\(.*\\):\\n raise.*Error\\(.*", "test_string": "Example likes alpha A(A): alpha(A."} -{"file": "./utils/modular_model_converter.py", "line": 98, "col": 44, "module": "re", "call": "findall", "pattern": "[A-Z][^A-Z]*", "test_string": "Example likes MM."} -{"file": "./utils/modular_model_converter.py", "line": 282, "col": 32, "module": "re", "call": "sub", "pattern": "#.*", "test_string": "Example likes #A."} -{"file": "./utils/modular_model_converter.py", "line": 283, "col": 32, "module": "re", "call": "sub", "pattern": "\\ *\\n", "test_string": "Example likes ."} -{"file": "./utils/modular_model_converter.py", "line": 289, "col": 32, "module": "re", "call": "sub", "pattern": "#.*", "test_string": "Example likes #A."} -{"file": "./utils/modular_model_converter.py", "line": 290, "col": 32, "module": "re", "call": "sub", "pattern": "\\ *\\n", "test_string": "Example likes ."} -{"file": "./utils/modular_model_converter.py", "line": 306, "col": 32, "module": "re", "call": "sub", "pattern": "#.*", "test_string": "Example likes #A."} -{"file": "./utils/modular_model_converter.py", "line": 307, "col": 32, "module": "re", "call": "sub", "pattern": "\\ *\\n", "test_string": "Example likes ."} -{"file": "./utils/modular_model_converter.py", "line": 323, "col": 32, "module": "re", "call": "sub", "pattern": "#.*", "test_string": "Example likes #A."} -{"file": "./utils/modular_model_converter.py", "line": 324, "col": 32, "module": "re", "call": "sub", "pattern": "\\ *\\n", "test_string": "Example likes ."} -{"file": "./utils/modular_model_converter.py", "line": 1702, "col": 32, "module": "re", "call": "search", "pattern": "(src/transformers/.*|examples/.*)", "test_string": "/home/user/readme.txt"} -{"file": "./utils/modular_model_converter.py", "line": 166, "col": 50, "module": "re", "call": "search", "pattern": "\\\"\\\"\\\"[\\s\\S]*\\\"\\\"\\\"", "test_string": "Example likes \"\"\"x\"\"\"."} -{"file": "./utils/get_pr_run_slow_jobs.py", "line": 21, "col": 11, "module": "re", "call": "compile", "pattern": "src/transformers/(models/.*)/modeling_.*\\.py", "test_string": "/home/user/readme.txt"} -{"file": "./utils/get_pr_run_slow_jobs.py", "line": 22, "col": 11, "module": "re", "call": "compile", "pattern": "src/transformers/(quantizers/quantizer_.*)\\.py", "test_string": "/home/user/readme.txt"} -{"file": "./utils/get_pr_run_slow_jobs.py", "line": 25, "col": 11, "module": "re", "call": "compile", "pattern": "tests/(models/.*)/test_.*\\.py", "test_string": "/home/user/readme.txt"} -{"file": "./utils/get_pr_run_slow_jobs.py", "line": 26, "col": 11, "module": "re", "call": "compile", "pattern": "tests/(quantization/.*)/test_.*\\.py", "test_string": "/home/user/readme.txt"} -{"file": "./utils/get_pr_run_slow_jobs.py", "line": 29, "col": 11, "module": "re", "call": "compile", "pattern": "src/transformers/(models/.*)/.*\\.py", "test_string": "/home/user/readme.txt"} -{"file": "./utils/notification_service_doc_tests.py", "line": 51, "col": 11, "module": "re", "call": "search", "pattern": "_ \\[doctest\\]", "test_string": "Example likes [alpha]."} -{"file": "./utils/release.py", "line": 60, "col": 17, "module": "re", "call": "compile", "pattern": "^check_min_version\\(\"[^\"]+\"\\)\\s*$", "test_string": "alpha alpha alpha(\"a\")x"} -{"file": "./utils/release.py", "line": 61, "col": 13, "module": "re", "call": "compile", "pattern": "^__version__\\s+=\\s+\"([^\"]+)\"\\s*$", "test_string": "alpha x=x\"a\"x"} -{"file": "./utils/release.py", "line": 62, "col": 14, "module": "re", "call": "compile", "pattern": "^(\\s*)version\\s*=\\s*\"[^\"]+\",", "test_string": "Example likes alpha=x\"a\",."} -{"file": "./utils/release.py", "line": 64, "col": 8, "module": "re", "call": "compile", "pattern": "^# \"transformers(\\[.+\\])?.*$", "test_string": "# \"alpha[A]A"} -{"file": "./utils/release.py", "line": 68, "col": 8, "module": "re", "call": "compile", "pattern": "^# \"transformers(\\[.+\\])?.*$", "test_string": "# \"alpha[A]A"} -{"file": "./utils/check_pipeline_typing.py", "line": 35, "col": 29, "module": "re", "call": "search", "pattern": "# (.*)# ", "test_string": "/home/user/readme.txt"} -{"file": "./utils/check_pipeline_typing.py", "line": 39, "col": 25, "module": "re", "call": "search", "pattern": "def pipeline(.*) -> Pipeline:", "test_string": "Example likes alpha alpha > alpha:."} -{"file": "./utils/check_docstrings.py", "line": 70, "col": 11, "module": "re", "call": "compile", "pattern": "^\\s*(Args?|Arguments?|Attributes?|Params?|Parameters?):\\s*$", "test_string": "alpha:x"} -{"file": "./utils/check_docstrings.py", "line": 72, "col": 16, "module": "re", "call": "compile", "pattern": "^(\\s*)(\\S+)\\s+\\((.+)\\)(?:\\:|$)", "test_string": "Example likes alpha(A):."} -{"file": "./utils/check_docstrings.py", "line": 74, "col": 24, "module": "re", "call": "compile", "pattern": "\\*optional\\*, defaults to (.*)$", "test_string": "Example likes *alpha*, alpha to A."} -{"file": "./utils/check_docstrings.py", "line": 492, "col": 13, "module": "re", "call": "search", "pattern": "^(\\s*)(?:\\S|$)", "test_string": "Example likes xx."} -{"file": "./utils/check_docstrings.py", "line": 998, "col": 24, "module": "re", "call": "findall", "pattern": "[,(]\\s*(\\w+)\\s*(?=:|=|,|\\))", "test_string": "Example likes ,alpha:."} -{"file": "./utils/check_docstrings.py", "line": 1007, "col": 24, "module": "re", "call": "findall", "pattern": "^ (\\w+)(?:\\s*:|\\s*=|\\s*$)", "test_string": "Example likes xx:."} -{"file": "./utils/check_docstrings.py", "line": 707, "col": 11, "module": "re", "call": "search", "pattern": "^\\s*#\\s*no-format\\s*$", "test_string": "x#alpha alpha"} -{"file": "./utils/check_docstrings.py", "line": 1252, "col": 16, "module": "re", "call": "findall", "pattern": "custom_args=(\\w+)", "test_string": "Example likes alpha alpha=x."} -{"file": "./utils/check_docstrings.py", "line": 711, "col": 13, "module": "re", "call": "search", "pattern": "^\\s*#\\s*ignore-order\\s*$", "test_string": "x#alpha alpha"} -{"file": "./utils/check_docstrings.py", "line": 596, "col": 13, "module": "re", "call": "search", "pattern": "defaults to `?None`?", "test_string": "Example likes alpha to `alpha`."} -{"file": "./utils/compare_test_runs.py", "line": 22, "col": 12, "module": "re", "call": "match", "pattern": "^(SKIPPED|XFAIL|XPASS|EXPECTEDFAIL)\\s+\\[?\\d*\\]?\\s*(\\S+:\\d+)", "test_string": "Example likes alpha[x]xx:x."} -{"file": "./utils/compare_test_runs.py", "line": 29, "col": 15, "module": "re", "call": "split", "pattern": "\\s+-\\s+", "test_string": "Example likes x x."} -{"file": "./utils/check_copies.py", "line": 531, "col": 19, "module": "re", "call": "compile", "pattern": "^(\\s*)#\\s*Copied from\\s+transformers\\.(\\S+\\.\\S+)\\s*($|\\S.*$)", "test_string": "Example likes x#alpha alpha.x.alpha."} -{"file": "./utils/check_copies.py", "line": 532, "col": 33, "module": "re", "call": "compile", "pattern": "^(\\s*)#\\s*Copied from\\s+tests\\.(\\S+\\.\\S+)\\s*($|\\S.*$)", "test_string": "Example likes x#alpha alpha.x.alpha."} -{"file": "./utils/check_copies.py", "line": 533, "col": 22, "module": "re", "call": "compile", "pattern": "^\\s*(\\S+)->(\\S+)(\\s+.*|$)", "test_string": "Example likes xx >alpha."} -{"file": "./utils/check_copies.py", "line": 534, "col": 19, "module": "re", "call": "compile", "pattern": "]*>", "test_string": "Example likes ."} -{"file": "./utils/check_copies.py", "line": 600, "col": 22, "module": "re", "call": "compile", "pattern": "class\\s+([^\\(:]+)(?:\\(|:)", "test_string": "Example likes alpha((."} -{"file": "./utils/check_copies.py", "line": 601, "col": 21, "module": "re", "call": "compile", "pattern": "def\\s+([^\\(]+)\\(", "test_string": "Example likes alpha(."} -{"file": "./utils/check_copies.py", "line": 931, "col": 23, "module": "re", "call": "compile", "pattern": "\\*\\*\\[([^\\]]*)\\]\\(([^\\)]*)\\)\\*\\* \\(from ([^)]*)\\)[^\\[]*([^\\)]*\\)).*?by (.*?[A-Za-z\\*]{2,}?)\\. (.*)$", "test_string": "Example likes **[a](a)** (alpha a)aa)alpha alpha. A."} -{"file": "./utils/check_copies.py", "line": 935, "col": 29, "module": "re", "call": "compile", "pattern": "\\*\\*\\[([^\\]]*)\\]\\(([^\\)]*)\\)\\*\\*", "test_string": "Example likes **[a](a)**."} -{"file": "./utils/check_copies.py", "line": 937, "col": 29, "module": "re", "call": "compile", "pattern": " \\[([^\\]]*)\\]\\(([^\\)]*)\\)", "test_string": "Example likes [a](a)."} -{"file": "./utils/check_copies.py", "line": 172, "col": 11, "module": "re", "call": "search", "pattern": "^\\s*\\)(\\s*->.*:|:)\\s*$", "test_string": "x)x >A:x"} -{"file": "./utils/check_copies.py", "line": 552, "col": 15, "module": "re", "call": "search", "pattern": "^(\\s*)\\S", "test_string": "Example likes xx."} -{"file": "./utils/check_copies.py", "line": 950, "col": 18, "module": "re", "call": "search", "pattern": "\\*\\*\\[([^\\]]*)", "test_string": "Example likes **[a."} -{"file": "./utils/check_copies.py", "line": 944, "col": 16, "module": "re", "call": "search", "pattern": "\\*\\*\\[([^\\]]*)", "test_string": "Example likes **[a."} -{"file": "./utils/custom_init_isort.py", "line": 48, "col": 13, "module": "re", "call": "compile", "pattern": "^(\\s*)\\S", "test_string": "Example likes xx."} -{"file": "./utils/custom_init_isort.py", "line": 50, "col": 17, "module": "re", "call": "compile", "pattern": "^\\s*\"([^\"]+)\":", "test_string": "Example likes x\"a\":."} -{"file": "./utils/custom_init_isort.py", "line": 52, "col": 19, "module": "re", "call": "compile", "pattern": "^\\s*_import_structure\\[\"([^\"]+)\"\\]", "test_string": "Example likes x alpha alpha[\"a\"]."} -{"file": "./utils/custom_init_isort.py", "line": 54, "col": 17, "module": "re", "call": "compile", "pattern": "^\\s*\"([^\"]+)\",\\s*$", "test_string": "x\"a\",x"} -{"file": "./utils/custom_init_isort.py", "line": 56, "col": 22, "module": "re", "call": "compile", "pattern": "\\[([^\\]]+)\\]", "test_string": "Example likes [a]."} -{"file": "./utils/add_pipeline_model_mapping_to_test.py", "line": 206, "col": 8, "module": "re", "call": "compile", "pattern": "\\s(is_\\S+?_available\\(\\))\\s", "test_string": "Example likes alpha x alpha()x."} -{"file": "./utils/tests_fetcher.py", "line": 541, "col": 35, "module": "re", "call": "compile", "pattern": "(?:^|\\n)\\s*from\\s+(\\.+\\S+)\\s+import\\s+([^\\n]+)(?=\\n)", "test_string": "Example likes alpha.alpha."} -{"file": "./utils/tests_fetcher.py", "line": 545, "col": 34, "module": "re", "call": "compile", "pattern": "(?:^|\\n)\\s*from\\s+(\\.+\\S+)\\s+import\\s+\\(([^\\)]+)\\)", "test_string": "Example likes alpha.alpha(a)."} -{"file": "./utils/tests_fetcher.py", "line": 551, "col": 33, "module": "re", "call": "compile", "pattern": "(?:^|\\n)\\s*from\\s+transformers(\\S*)\\s+import\\s+([^\\n]+)(?=\\n)", "test_string": "Example likes alpha."} -{"file": "./utils/tests_fetcher.py", "line": 555, "col": 32, "module": "re", "call": "compile", "pattern": "(?:^|\\n)\\s*from\\s+transformers(\\S*)\\s+import\\s+\\(([^\\)]+)\\)", "test_string": "Example likes alpha(a)."} -{"file": "./utils/tests_fetcher.py", "line": 1082, "col": 21, "module": "re", "call": "search", "pattern": "\\[([^\\]]*)\\]", "test_string": "Example likes [a]."} -{"file": "./utils/check_repo.py", "line": 869, "col": 16, "module": "re", "call": "compile", "pattern": "^\\s*@(\\S+)\\s+$", "test_string": "alice@example.com"} -{"file": "./utils/check_repo.py", "line": 618, "col": 17, "module": "re", "call": "findall", "pattern": "all_model_classes\\s+=\\s+\\(\\s*\\(([^\\)]*)\\)", "test_string": "Example likes alpha alpha alpha=x(x(a)."} -{"file": "./utils/check_repo.py", "line": 620, "col": 18, "module": "re", "call": "findall", "pattern": "all_model_classes\\s+=\\s+\\(([^\\)]*)\\)", "test_string": "Example likes alpha alpha alpha=x(a)."} -{"file": "./utils/check_repo.py", "line": 631, "col": 27, "module": "re", "call": "findall", "pattern": "base_model_class\\s+=.*", "test_string": "Example likes alpha alpha alpha=A."} -{"file": "./utils/check_repo.py", "line": 928, "col": 23, "module": "re", "call": "findall", "pattern": "\\[\\[autodoc\\]\\]\\s+(\\S+)\\s+", "test_string": "Example likes [[alpha]]alpha."} -{"file": "./utils/check_repo.py", "line": 937, "col": 46, "module": "re", "call": "findall", "pattern": "(?<=-\\s).*", "test_string": "Example likes A."} -{"file": "./utils/notification_service.py", "line": 1070, "col": 19, "module": "re", "call": "compile", "pattern": "\\(#(\\d+)\\)$", "test_string": "Example likes (#x)."} -{"file": "./utils/pr_slow_ci_models.py", "line": 100, "col": 10, "module": "re", "call": "compile", "pattern": "src/transformers/models/(.*)/modeling_.*\\.py", "test_string": "/home/user/readme.txt"} -{"file": "./benchmark/benchmark.py", "line": 82, "col": 17, "module": "re", "call": "search", "pattern": "/commit=([^/]+)", "test_string": "/home/user/readme.txt"} -{"file": "./src/transformers/dynamic_module_utils.py", "line": 139, "col": 23, "module": "re", "call": "findall", "pattern": "^\\s*import\\s+\\.(\\S+)\\s*$", "test_string": "alpha.xx"} -{"file": "./src/transformers/dynamic_module_utils.py", "line": 141, "col": 24, "module": "re", "call": "findall", "pattern": "^\\s*from\\s+\\.(\\S+)\\s+import", "test_string": "Example likes alpha.alpha."} -{"file": "./src/transformers/modeling_utils.py", "line": 882, "col": 21, "module": "re", "call": "findall", "pattern": ".(\\d+).", "test_string": "Example likes alpha."} -{"file": "./src/transformers/modeling_utils.py", "line": 890, "col": 15, "module": "re", "call": "sub", "pattern": ".(\\d+).", "test_string": "Example likes alpha."} -{"file": "./src/transformers/modeling_utils.py", "line": 2624, "col": 11, "module": "re", "call": "search", "pattern": "class \\w+Attention\\(nn.Module\\)", "test_string": "Example likes alpha alpha(alpha)."} -{"file": "./src/transformers/modeling_utils.py", "line": 3970, "col": 18, "module": "re", "call": "compile", "pattern": "(.*?)-\\d{5}-of-\\d{5}", "test_string": "Example likes A alpha of alpha."} -{"file": "./src/transformers/modeling_utils.py", "line": 5789, "col": 27, "module": "re", "call": "sub", "pattern": "\\.\\d+\\.", "test_string": "Example likes .x.."} -{"file": "./src/transformers/modeling_utils.py", "line": 3844, "col": 34, "module": "re", "call": "sub", "pattern": "\\(.*\\)", "test_string": "Example likes (A)."} -{"file": "./src/transformers/modeling_utils.py", "line": 884, "col": 28, "module": "re", "call": "sub", "pattern": ".(\\d+).", "test_string": "Example likes alpha."} -{"file": "./src/transformers/modeling_utils.py", "line": 885, "col": 25, "module": "re", "call": "sub", "pattern": ".(\\d+).", "test_string": "Example likes alpha."} -{"file": "./src/transformers/modeling_utils.py", "line": 886, "col": 21, "module": "re", "call": "sub", "pattern": ".(\\d+).", "test_string": "Example likes alpha."} -{"file": "./src/transformers/tokenization_utils_base.py", "line": 150, "col": 21, "module": "re", "call": "compile", "pattern": "tokenizer\\.(.*)\\.json", "test_string": "Example likes alpha.A.alpha."} -{"file": "./src/transformers/model_debugging_utils.py", "line": 52, "col": 23, "module": "re", "call": "compile", "pattern": "object at 0x[0-9A-Fa-f]+", "test_string": "Example likes alpha at 0xC."} -{"file": "./src/transformers/model_debugging_utils.py", "line": 188, "col": 18, "module": "re", "call": "compile", "pattern": "(.*)\\.(\\d+)$", "test_string": "Example likes A.x."} -{"file": "./src/transformers/testing_utils.py", "line": 1570, "col": 11, "module": "re", "call": "sub", "pattern": "^.*\\r", "test_string": "Example likes A."} -{"file": "./src/transformers/testing_utils.py", "line": 2435, "col": 13, "module": "re", "call": "sub", "pattern": "^gw", "test_string": "Example likes gw."} -{"file": "./src/transformers/testing_utils.py", "line": 2287, "col": 23, "module": "re", "call": "sub", "pattern": ".*_ _ _ (_ ){10,}_ _ ", "test_string": "Example likes A."} -{"file": "./src/transformers/testing_utils.py", "line": 2802, "col": 28, "module": "re", "call": "sub", "pattern": "(>>> .*load_dataset\\(.*)", "test_string": "Example likes >>> alpha alpha(A."} -{"file": "./src/transformers/testing_utils.py", "line": 2805, "col": 16, "module": "re", "call": "search", "pattern": "cuda|to\\(0\\)|device=0", "test_string": "Example likes alpha=0."} -{"file": "./src/transformers/modeling_gguf_pytorch_utils.py", "line": 338, "col": 22, "module": "re", "call": "sub", "pattern": "mlp.experts.\\d+.", "test_string": "Example likes alpha."} -{"file": "./src/transformers/trainer_utils.py", "line": 199, "col": 17, "module": "re", "call": "compile", "pattern": "^\" + PREFIX_CHECKPOINT_DIR + r\"\\-(\\d+)$", "test_string": "\" alpha alpha alpha r\" x"} -{"file": "./src/transformers/models/bertweet/tokenization_bertweet.py", "line": 589, "col": 10, "module": "re", "call": "compile", "pattern": "([^a-zA-Z0-9])\\1{3,}", "test_string": "Example likes alpha."} -{"file": "./src/transformers/models/bertweet/tokenization_bertweet.py", "line": 596, "col": 9, "module": "re", "call": "compile", "pattern": "&(#?(x?))([^&;\\s]+);", "test_string": "Example likes &#x&;."} -{"file": "./src/transformers/models/bertweet/tokenization_bertweet.py", "line": 737, "col": 14, "module": "re", "call": "compile", "pattern": "(.)\\1{2,}", "test_string": "Example likes alpha."} -{"file": "./src/transformers/models/bertweet/tokenization_bertweet.py", "line": 745, "col": 14, "module": "re", "call": "compile", "pattern": "(?.+?", "test_string": "/home/user/readme.txt"} -{"file": "./src/transformers/models/kosmos2/processing_kosmos2.py", "line": 616, "col": 16, "module": "re", "call": "search", "pattern": "", "test_string": "Example likes ."} -{"file": "./src/transformers/models/kosmos2/processing_kosmos2.py", "line": 617, "col": 16, "module": "re", "call": "search", "pattern": "", "test_string": "Example likes ."} -{"file": "./src/transformers/models/roberta/tokenization_roberta.py", "line": 201, "col": 19, "module": "re", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} -{"file": "./src/transformers/models/mm_grounding_dino/convert_mm_grounding_dino_to_hf.py", "line": 341, "col": 24, "module": "re", "call": "match", "pattern": "neck.extra_convs.(\\d+).gn.(weight|bias)", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/mm_grounding_dino/convert_mm_grounding_dino_to_hf.py", "line": 346, "col": 24, "module": "re", "call": "match", "pattern": "neck.extra_convs.(\\d+).conv.(weight|bias)", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/mm_grounding_dino/convert_mm_grounding_dino_to_hf.py", "line": 336, "col": 24, "module": "re", "call": "match", "pattern": "backbone.norm(\\d+).(weight|bias)", "test_string": "Example likes alpha."} -{"file": "./src/transformers/models/mm_grounding_dino/convert_mm_grounding_dino_to_hf.py", "line": 364, "col": 20, "module": "re", "call": "match", "pattern": "bbox_head.(cls|reg)_branches.(\\d+).(.*)", "test_string": "Example likes alpha alpha alpha."} -{"file": "./src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py", "line": 402, "col": 33, "module": "re", "call": "compile", "pattern": "(https?|ftp)(:\\/\\/[-_\\.!~*\\'()a-zA-Z0-9;\\/?:\\@&=\\+$,%#]+)", "test_string": "alice@example.com"} -{"file": "./src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py", "line": 403, "col": 33, "module": "re", "call": "compile", "pattern": "[A-Za-z0-9\\._+]*@[\\-_0-9A-Za-z]+(\\.[A-Za-z]+)*", "test_string": "alice@example.com"} -{"file": "./src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py", "line": 404, "col": 33, "module": "re", "call": "compile", "pattern": "[\\(]{0,1}[0-9]{2,4}[\\)\\-\\(]{0,1}[0-9]{2,4}[\\)\\-]{0,1}[0-9]{3,4}", "test_string": "Example-likes-(44-44-444."} -{"file": "./src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py", "line": 405, "col": 33, "module": "re", "call": "compile", "pattern": "([12]\\d{3}[/\\-年])*(0?[1-9]|1[0-2])[/\\-月]((0?[1-9]|[12][0-9]|3[01])日?)*(\\d{1,2}|:|\\d{1,2}時|\\d{1,2}分|\\(日\\)|\\(月\\)|\\(火\\)|\\(水\\)|\\(木\\)|\\(金\\)|\\(土\\)|㈰|㈪|㈫|㈬|㈭|㈮|㈯)*", "test_string": "/home/user/readme.txt"} -{"file": "./src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py", "line": 408, "col": 33, "module": "re", "call": "compile", "pattern": "(明治|大正|昭和|平成|令和|㍾|㍽|㍼|㍻|\\u32ff)\\d{1,2}年(0?[1-9]|1[0-2])月(0?[1-9]|[12][0-9]|3[01])日(\\d{1,2}|:|\\d{1,2}時|\\d{1,2}分|\\(日\\)|\\(月\\)|\\(火\\)|\\(水\\)|\\(木\\)|\\(金\\)|\\(土\\)|㈰|㈪|㈫|㈬|㈭|㈮|㈯)*", "test_string": "Example likes 明治x年05月05日x時."} -{"file": "./src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py", "line": 415, "col": 37, "module": "re", "call": "compile", "pattern": "(?:\\d,\\d{3}|[\\d億])*+\"\n r\"(?:\\d,\\d{3}|[\\d万])*+\"\n r\"(?:\\d,\\d{3}|[\\d千])*+\"\n r\"(?:千円|万円|千万円|円|千ドル|万ドル|千万ドル|ドル|千ユーロ|万ユーロ|千万ユーロ|ユーロ)+\"\n r\"(?:\\(税込\\)|\\(税抜\\)|\\+tax)*", "test_string": "Example likes a\" r\"a\" r\"a\" r\"千万ユーロ\" r\"+alpha."} -{"file": "./src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py", "line": 423, "col": 37, "module": "re", "call": "compile", "pattern": "(?:\\d,\\d{3}|[\\d億万千])*\"\n r\"(?:千円|万円|千万円|円|千ドル|万ドル|千万ドル|ドル|千ユーロ|万ユーロ|千万ユーロ|ユーロ)+\"\n r\"(?:\\(税込\\)|\\(税抜\\)|\\+tax)*", "test_string": "Example likes x,alpha\" r\"千万ユーロ\" r\"+alpha."} -{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 100, "col": 31, "module": "re", "call": "compile", "pattern": "encoders.(\\d*).level_blocks.(\\d*).model.(\\d*).(\\d).(bias|weight)", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 101, "col": 30, "module": "re", "call": "compile", "pattern": "encoders.(\\d*).level_blocks.(\\d*).model.(\\d*).(\\d).model.(\\d*).model.(\\d*).(bias|weight)", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 104, "col": 32, "module": "re", "call": "compile", "pattern": "encoders.(\\d*).level_blocks.(\\d*).model.(\\d*).(bias|weight)", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 106, "col": 32, "module": "re", "call": "compile", "pattern": "decoders.(\\d*).level_blocks.(\\d*).model.(\\d*).(\\d).(bias|weight)", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 107, "col": 30, "module": "re", "call": "compile", "pattern": "decoders.(\\d*).level_blocks.(\\d*).model.(\\d*).(\\d).model.(\\d*).model.(\\d*).(bias|weight)", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 110, "col": 31, "module": "re", "call": "compile", "pattern": "decoders.(\\d*).level_blocks.(\\d*).model.(\\d*).(bias|weight)", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 112, "col": 29, "module": "re", "call": "compile", "pattern": "conditioner_blocks.(\\d*).cond.model.(\\d*).(\\d).(bias|weight)", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 113, "col": 27, "module": "re", "call": "compile", "pattern": "conditioner_blocks.(\\d*).cond.model.(\\d*).(\\d).model.(\\d*).model.(\\d*).(bias|weight)", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 116, "col": 28, "module": "re", "call": "compile", "pattern": "conditioner_blocks.(\\d*).cond.model.(\\d*).(bias|weight)", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/deprecated/jukebox/tokenization_jukebox.py", "line": 257, "col": 18, "module": "re", "call": "compile", "pattern": "_+", "test_string": "Example likes ."} -{"file": "./src/transformers/models/deprecated/jukebox/tokenization_jukebox.py", "line": 214, "col": 32, "module": "re", "call": "compile", "pattern": "[^A-Za-z0-9.,:;!?\\-'\\\"()\\[\\] \\t\\n]+", "test_string": "Example likes M."} -{"file": "./src/transformers/models/deprecated/jukebox/tokenization_jukebox.py", "line": 223, "col": 32, "module": "re", "call": "compile", "pattern": "[^A-Za-z0-9.,:;!?\\-+'\\\"()\\[\\] \\t\\n]+", "test_string": "Example likes M."} -{"file": "./src/transformers/models/deprecated/tapex/tokenization_tapex.py", "line": 293, "col": 19, "module": "re", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} -{"file": "./src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py", "line": 53, "col": 33, "module": "re", "call": "search", "pattern": "\\d\\.\\d", "test_string": "Example likes x.x."} -{"file": "./src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py", "line": 81, "col": 35, "module": "re", "call": "search", "pattern": ".\\d.", "test_string": "Example likes alpha."} -{"file": "./src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py", "line": 56, "col": 20, "module": "re", "call": "search", "pattern": "\\d\\.\\d\\d.", "test_string": "Example likes x.alpha."} -{"file": "./src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py", "line": 58, "col": 20, "module": "re", "call": "search", "pattern": "\\d\\.\\d.", "test_string": "Example likes x.xA."} -{"file": "./src/transformers/models/mistral/convert_mistral_weights_to_hf.py", "line": 209, "col": 60, "module": "re", "call": "match", "pattern": "consolidated.\\d+.pth", "test_string": "Example likes alpha."} -{"file": "./src/transformers/models/siglip/tokenization_siglip.py", "line": 291, "col": 15, "module": "re", "call": "sub", "pattern": "\\s+", "test_string": "Example likes x."} -{"file": "./src/transformers/models/llama4/convert_llama4_weights_to_hf.py", "line": 435, "col": 17, "module": "re", "call": "search", "pattern": "(gate|up)_proj", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/llama4/convert_llama4_weights_to_hf.py", "line": 437, "col": 27, "module": "re", "call": "sub", "pattern": "(gate|up)_proj", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/llama4/convert_llama4_weights_to_hf.py", "line": 438, "col": 25, "module": "re", "call": "sub", "pattern": "(gate|up)_proj", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/deepseek_vl/convert_deepseek_vl_weights_to_hf.py", "line": 141, "col": 18, "module": "re", "call": "search", "pattern": "(\\(.*?\\))", "test_string": "Example likes (A)."} -{"file": "./src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py", "line": 74, "col": 15, "module": "re", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} -{"file": "./src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py", "line": 75, "col": 30, "module": "re", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} -{"file": "./src/transformers/models/marian/convert_marian_tatoeba_to_pytorch.py", "line": 126, "col": 29, "module": "re", "call": "search", "pattern": "tuned4[^-]+", "test_string": "Example likes alpha4a."} -{"file": "./src/transformers/models/marian/convert_marian_tatoeba_to_pytorch.py", "line": 311, "col": 57, "module": "re", "call": "search", "pattern": "\\d\\d\\d\\d-\\d\\d?-\\d\\d?", "test_string": "Example likes alpha xx xx."} -{"file": "./src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py", "line": 196, "col": 11, "module": "re", "call": "match", "pattern": "(\\S+)\\.attention\\.self\\.(key|value|query)\\.(bias|weight)", "test_string": "Example likes x.alpha.alpha.alpha.alpha."} -{"file": "./src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py", "line": 196, "col": 91, "module": "re", "call": "match", "pattern": "(\\S+)\\.attention\\.output\\.dense\\.weight", "test_string": "Example likes x.alpha.alpha.alpha.alpha."} -{"file": "./src/transformers/models/ctrl/tokenization_ctrl.py", "line": 196, "col": 16, "module": "re", "call": "findall", "pattern": "\\S+\\n?", "test_string": "Example likes x."} -{"file": "./src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py", "line": 153, "col": 15, "module": "re", "call": "compile", "pattern": "layers\\.(\\d+)\\.([a-z0-9_.]+)\\.([a-z]+)", "test_string": "Example likes alpha.x.m.m."} -{"file": "./src/transformers/models/mllama/convert_mllama_weights_to_hf.py", "line": 369, "col": 22, "module": "re", "call": "sub", "pattern": "layers.(\\d+).", "test_string": "Example likes alpha."} -{"file": "./src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py", "line": 60, "col": 15, "module": "re", "call": "sub", "pattern": "blocks\\.(\\d+)\\.att", "test_string": "Example likes alpha.x.alpha."} -{"file": "./src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py", "line": 62, "col": 15, "module": "re", "call": "sub", "pattern": "blocks\\.(\\d+)\\.ffn", "test_string": "Example likes alpha.x.alpha."} -{"file": "./src/transformers/models/glm4v/convert_glm4v_mgt_weights_to_hf.py", "line": 254, "col": 20, "module": "re", "call": "match", "pattern": "mp_rank_(\\d{2})", "test_string": "Example likes mp alpha xx."} -{"file": "./src/transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py", "line": 103, "col": 15, "module": "re", "call": "sub", "pattern": "[\\(\\)\\[\\]\\<\\>\\\"]+", "test_string": "Example likes (."} -{"file": "./src/transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py", "line": 106, "col": 15, "module": "re", "call": "sub", "pattern": "\\s+", "test_string": "Example likes x."} -{"file": "./src/transformers/models/timesfm/convert_timesfm_orignal_to_hf.py", "line": 27, "col": 16, "module": "re", "call": "match", "pattern": "(.*)\\[(\\d+)\\]", "test_string": "Example likes A[x]."} -{"file": "./src/transformers/models/mobilenet_v1/convert_original_tf_checkpoint_to_pytorch.py", "line": 148, "col": 14, "module": "re", "call": "match", "pattern": "^mobilenet_v1_([^_]*)_([^_]*)$", "test_string": "alpha v1 a a"} -{"file": "./src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py", "line": 77, "col": 19, "module": "re", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} -{"file": "./src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py", "line": 78, "col": 34, "module": "re", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 71, "col": 11, "module": "re", "call": "sub", "pattern": "^\\(([\\d.]+[a-zA-Z]?)\\) \\\\\\[(.+?)\\\\\\]$", "test_string": "(.m) \\[A\\]"} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 73, "col": 11, "module": "re", "call": "sub", "pattern": "^\\\\\\[(.+?)\\\\\\] \\(([\\d.]+[a-zA-Z]?)\\)$", "test_string": "\\[A\\] (.m)"} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 75, "col": 11, "module": "re", "call": "sub", "pattern": "^\\\\\\[(.+?)\\\\\\] \\(([\\d.]+[a-zA-Z]?)\\) (\\\\\\[.+?\\\\\\])$", "test_string": "\\[A\\] (.m) \\[A\\]"} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 85, "col": 11, "module": "re", "call": "sub", "pattern": "\\\\mbox{ ?\\\\boldmath\\$(.*?)\\$}", "test_string": "Example likes \\alpha{ \\alpha$A$}."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 87, "col": 11, "module": "re", "call": "sub", "pattern": "((?:http|ftp|https):\\/\\/(?:[\\w_-]+(?:(?:\\.[\\w_-]+)+))(?:[\\w.,@?^=%&:\\/~+#-]*[\\w@?^=%&\\/~+#-]))", "test_string": "alice@example.com"} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 93, "col": 11, "module": "re", "call": "sub", "pattern": "```\\s*(.+?)\\s*```", "test_string": "Example likes ```alpha```."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 119, "col": 16, "module": "re", "call": "search", "pattern": ". ([-*]) ", "test_string": "Example likes A."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 446, "col": 15, "module": "re", "call": "sub", "pattern": "## References\\n+\\[MISSING_PAGE_POST(:\\d+)?\\]", "test_string": "Example likes ## alpha\n[alpha alpha alpha:x]."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 480, "col": 21, "module": "re", "call": "sub", "pattern": "(^.+)\\\\begin{tab", "test_string": "Example likes A\\alpha{alpha."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 500, "col": 21, "module": "re", "call": "sub", "pattern": "(?:\\n|^)#+ \\d*\\W? ?(.{100,})", "test_string": "Example likes # xx alpha."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 508, "col": 21, "module": "re", "call": "sub", "pattern": "^#+ (?:[\\d+\\.]+|[ixv\\.]+)?\\s*(?:$|\\n\\s*)", "test_string": "Example likes # ix\nx."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 519, "col": 21, "module": "re", "call": "sub", "pattern": "^\\* \\[\\d+\\](\\s?[A-W]\\.+\\s?){10,}.*$", "test_string": "* [x]xL.alpha.alpha.alpha.alpha.xA"} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 521, "col": 21, "module": "re", "call": "sub", "pattern": "^(\\* \\[\\d+\\])\\[\\](.*)$", "test_string": "* [x][]A"} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 523, "col": 21, "module": "re", "call": "sub", "pattern": "(^\\w\\n\\n|\\n\\n\\w$)", "test_string": "Example likes x."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 525, "col": 21, "module": "re", "call": "sub", "pattern": "([\\s.,()])_([a-zA-Z0-9])__([a-zA-Z0-9]){1,3}_([\\s.,:()])", "test_string": "Example likes . m m .."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 530, "col": 21, "module": "re", "call": "sub", "pattern": "([\\s.,\\d])_([a-zA-Z0-9])_([\\s.,\\d;])", "test_string": "Example likes . m .."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 532, "col": 21, "module": "re", "call": "sub", "pattern": "(\\nFootnote .*?:) (?:footnotetext|thanks):\\W*(.*(?:\\n\\n|$))", "test_string": "Example likes alpha A: alpha:xA."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 538, "col": 21, "module": "re", "call": "sub", "pattern": "\\[FOOTNOTE:.+?\\](.*?)\\[ENDFOOTNOTE\\]", "test_string": "Example likes [alpha:A]A[alpha]."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 544, "col": 11, "module": "re", "call": "match", "pattern": "[A-Z0-9,;:]$", "test_string": "Example likes M."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 565, "col": 21, "module": "re", "call": "sub", "pattern": "\\\\begin{tabular}{([clr ]){2,}}\\s*[& ]*\\s*(\\\\\\\\)? \\\\end{tabular}", "test_string": "Example likes \\alpha{alpha}{cc}x&x\\\\ \\alpha{alpha}."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 571, "col": 21, "module": "re", "call": "sub", "pattern": "(\\*\\*S\\. A\\. B\\.\\*\\*\\n+){2,}", "test_string": "Example likes **S. A. B.**\n**S. A. B.**."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 573, "col": 21, "module": "re", "call": "sub", "pattern": "^#+( [\\[\\d\\w])?$", "test_string": "# ["} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 575, "col": 21, "module": "re", "call": "sub", "pattern": "^\\.\\s*$", "test_string": ".x"} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 577, "col": 21, "module": "re", "call": "sub", "pattern": "\\n{3,}", "test_string": "Example likes ."} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 134, "col": 15, "module": "re", "call": "match", "pattern": "^[\\dixv]+((?:\\.[\\dixv])?)+$", "test_string": "i.i"} -{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 237, "col": 15, "module": "re", "call": "sub", "pattern": "(?:[\\d_]|\\*\\*)", "test_string": "Example likes ."} -{"file": "./src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py", "line": 74, "col": 15, "module": "re", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} -{"file": "./src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py", "line": 75, "col": 30, "module": "re", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} -{"file": "./src/transformers/models/t5/tokenization_t5_fast.py", "line": 226, "col": 38, "module": "re", "call": "search", "pattern": "", "test_string": "Example likes ."} -{"file": "./src/transformers/models/t5/tokenization_t5.py", "line": 278, "col": 38, "module": "re", "call": "search", "pattern": "", "test_string": "Example likes ."} -{"file": "./src/transformers/models/blenderbot/tokenization_blenderbot.py", "line": 207, "col": 19, "module": "re", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} -{"file": "./src/transformers/models/speecht5/number_normalizer.py", "line": 186, "col": 15, "module": "re", "call": "sub", "pattern": "(\\d+,\\d+)", "test_string": "Example likes x,x."} -{"file": "./src/transformers/models/deepseek_vl_hybrid/convert_deepseek_vl_hybrid_weights_to_hf.py", "line": 168, "col": 18, "module": "re", "call": "search", "pattern": "(\\(.*?\\))", "test_string": "Example likes (A)."} -{"file": "./src/transformers/models/mvp/tokenization_mvp.py", "line": 193, "col": 19, "module": "re", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} -{"file": "./src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py", "line": 74, "col": 15, "module": "re", "call": "fullmatch", "pattern": "[A-Za-z]+\\d+", "test_string": "Example likes Mx."} -{"file": "./src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py", "line": 75, "col": 30, "module": "re", "call": "split", "pattern": "(\\d+)", "test_string": "Example likes x."} -{"file": "./src/transformers/models/auto/configuration_auto.py", "line": 1190, "col": 33, "module": "re", "call": "search", "pattern": "^(\\s*)List options\\s*$", "test_string": "alpha alpha"} -{"file": "./src/transformers/models/auto/configuration_auto.py", "line": 1193, "col": 21, "module": "re", "call": "search", "pattern": "^(\\s*)List options\\s*$", "test_string": "alpha alpha"} -{"file": "./src/transformers/models/codegen/tokenization_codegen.py", "line": 177, "col": 19, "module": "re", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} -{"file": "./src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py", "line": 113, "col": 16, "module": "re", "call": "sub", "pattern": "\\s{2,}", "test_string": "Example likes xx."} -{"file": "./src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py", "line": 172, "col": 16, "module": "re", "call": "findall", "pattern": "\\S+\\n?", "test_string": "Example likes x."} -{"file": "./src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py", "line": 77, "col": 15, "module": "re", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} -{"file": "./src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py", "line": 78, "col": 30, "module": "re", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} -{"file": "./src/transformers/models/myt5/convert_myt5_original_tf_checkpoint_to_pytorch.py", "line": 74, "col": 15, "module": "re", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} -{"file": "./src/transformers/models/myt5/convert_myt5_original_tf_checkpoint_to_pytorch.py", "line": 75, "col": 30, "module": "re", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} -{"file": "./src/transformers/models/flaubert/tokenization_flaubert.py", "line": 71, "col": 11, "module": "re", "call": "sub", "pattern": "。\\s*", "test_string": "Example likes 。x."} -{"file": "./src/transformers/models/flaubert/tokenization_flaubert.py", "line": 96, "col": 11, "module": "re", "call": "sub", "pattern": ".\\s*", "test_string": "Example likes .x."} -{"file": "./src/transformers/models/big_bird/tokenization_big_bird.py", "line": 218, "col": 19, "module": "re", "call": "sub", "pattern": " (\\[(MASK|SEP)\\])", "test_string": "Example likes [alpha]."} -{"file": "./src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py", "line": 136, "col": 15, "module": "re", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} -{"file": "./src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py", "line": 137, "col": 30, "module": "re", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} -{"file": "./src/transformers/models/led/tokenization_led.py", "line": 199, "col": 19, "module": "re", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} -{"file": "./src/transformers/models/pix2struct/convert_pix2struct_original_pytorch_to_hf.py", "line": 85, "col": 26, "module": "re", "call": "sub", "pattern": "layers_(\\d+)", "test_string": "Example likes alpha x."} -{"file": "./src/transformers/models/pix2struct/convert_pix2struct_original_pytorch_to_hf.py", "line": 90, "col": 26, "module": "re", "call": "sub", "pattern": "layers_(\\d+)", "test_string": "Example likes alpha x."} -{"file": "./src/transformers/models/canine/convert_canine_original_tf_checkpoint_to_pytorch.py", "line": 93, "col": 16, "module": "re", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} -{"file": "./src/transformers/models/canine/convert_canine_original_tf_checkpoint_to_pytorch.py", "line": 94, "col": 30, "module": "re", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 57, "col": 11, "module": "re", "call": "sub", "pattern": "layers\\.functional(?:_(\\d+))?\\.layers", "test_string": "Example likes alpha.alpha x.alpha."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 69, "col": 11, "module": "re", "call": "sub", "pattern": "layers\\.sequential\\.layers\\.conv1d\\.", "test_string": "Example likes alpha.alpha.alpha.alpha1d.."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 70, "col": 11, "module": "re", "call": "sub", "pattern": "layers\\.sequential\\.layers\\.conv1d_1\\.", "test_string": "Example likes alpha.alpha.alpha.alpha1d 1.."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 71, "col": 11, "module": "re", "call": "sub", "pattern": "layers\\.sequential\\.layers\\.conv1d_2\\.", "test_string": "Example likes alpha.alpha.alpha.alpha1d 2.."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 72, "col": 11, "module": "re", "call": "sub", "pattern": "layers\\.sequential\\.layers\\.group_normalization\\.", "test_string": "Example likes alpha.alpha.alpha.alpha alpha.."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 73, "col": 11, "module": "re", "call": "sub", "pattern": "mha_with_rope\\.key_dense", "test_string": "Example likes alpha alpha alpha.alpha alpha."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 74, "col": 11, "module": "re", "call": "sub", "pattern": "mha_with_rope\\.query_dense", "test_string": "Example likes alpha alpha alpha.alpha alpha."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 75, "col": 11, "module": "re", "call": "sub", "pattern": "mha_with_rope\\.value_dense", "test_string": "Example likes alpha alpha alpha.alpha alpha."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 76, "col": 11, "module": "re", "call": "sub", "pattern": "mha_with_rope\\.output_dense", "test_string": "Example likes alpha alpha alpha.alpha alpha."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 77, "col": 11, "module": "re", "call": "sub", "pattern": "mha_precomputed_kv\\.key_dense", "test_string": "Example likes alpha alpha kv.alpha alpha."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 78, "col": 11, "module": "re", "call": "sub", "pattern": "mha_precomputed_kv\\.query_dense", "test_string": "Example likes alpha alpha kv.alpha alpha."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 79, "col": 11, "module": "re", "call": "sub", "pattern": "mha_precomputed_kv\\.value_dense", "test_string": "Example likes alpha alpha kv.alpha alpha."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 80, "col": 11, "module": "re", "call": "sub", "pattern": "mha_precomputed_kv\\.output_dense", "test_string": "Example likes alpha alpha kv.alpha alpha."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 81, "col": 11, "module": "re", "call": "sub", "pattern": "mha_causal_with_rope\\.key_dense", "test_string": "Example likes alpha alpha alpha alpha.alpha alpha."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 82, "col": 11, "module": "re", "call": "sub", "pattern": "mha_causal_with_rope\\.query_dense", "test_string": "Example likes alpha alpha alpha alpha.alpha alpha."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 83, "col": 11, "module": "re", "call": "sub", "pattern": "mha_causal_with_rope\\.value_dense", "test_string": "Example likes alpha alpha alpha alpha.alpha alpha."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 84, "col": 11, "module": "re", "call": "sub", "pattern": "mha_causal_with_rope\\.output_dense", "test_string": "Example likes alpha alpha alpha alpha.alpha alpha."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 85, "col": 11, "module": "re", "call": "sub", "pattern": "layer_normalization\\.", "test_string": "Example likes alpha alpha.."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 86, "col": 11, "module": "re", "call": "sub", "pattern": "layer_normalization_1\\.", "test_string": "Example likes alpha alpha 1.."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 87, "col": 11, "module": "re", "call": "sub", "pattern": "layer_normalization_2\\.", "test_string": "Example likes alpha alpha 2.."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 88, "col": 11, "module": "re", "call": "sub", "pattern": "vars\\.0", "test_string": "Example likes alpha.0."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 89, "col": 11, "module": "re", "call": "sub", "pattern": "vars\\.1", "test_string": "Example likes alpha.1."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 90, "col": 11, "module": "re", "call": "sub", "pattern": "layers\\.reversible_embedding", "test_string": "Example likes alpha.alpha alpha."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 64, "col": 15, "module": "re", "call": "sub", "pattern": "functional\\.layers\\.dense\\.", "test_string": "Example likes alpha.alpha.alpha.."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 65, "col": 15, "module": "re", "call": "sub", "pattern": "functional\\.layers\\.dense_1\\.", "test_string": "Example likes alpha.alpha.alpha 1.."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 67, "col": 15, "module": "re", "call": "sub", "pattern": "functional\\.layers\\.sequential\\.layers\\.dense\\.", "test_string": "Example likes alpha.alpha.alpha.alpha.alpha.."} -{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 68, "col": 15, "module": "re", "call": "sub", "pattern": "functional\\.layers\\.sequential\\.layers\\.dense_1\\.", "test_string": "Example likes alpha.alpha.alpha.alpha.alpha 1.."} -{"file": "./src/transformers/models/openai/tokenization_openai.py", "line": 229, "col": 11, "module": "re", "call": "sub", "pattern": "(-+|~+|!+|\"+|;+|\\?+|\\++|,+|\\)+|\\(+|\\\\+|\\/+|\\*+|\\[+|\\]+|}+|{+|\\|+|_+)", "test_string": "/home/user/readme.txt"} -{"file": "./src/transformers/models/openai/tokenization_openai.py", "line": 230, "col": 11, "module": "re", "call": "sub", "pattern": "\\s*\\n\\s*", "test_string": "Example likes x\nx."} -{"file": "./src/transformers/models/openai/tokenization_openai.py", "line": 231, "col": 11, "module": "re", "call": "sub", "pattern": "[^\\S\\n]+", "test_string": "Example likes ."} -{"file": "./src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py", "line": 84, "col": 15, "module": "re", "call": "fullmatch", "pattern": "[A-Za-z]+\\d+", "test_string": "Example likes Mx."} -{"file": "./src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py", "line": 85, "col": 30, "module": "re", "call": "split", "pattern": "(\\d+)", "test_string": "Example likes x."} -{"file": "./src/transformers/models/zamba2/modular_zamba2.py", "line": 990, "col": 46, "module": "re", "call": "compile", "pattern": "^shared_transformer\\.feed_forward\\.gate_up_proj_adapter_list\\.\"\n + str(adapter_id)\n + r\"\\.(?:0|1)\\.weight$", "test_string": "alpha alpha.alpha alpha.alpha up alpha alpha alpha.\" alpha id r\".0.alpha"} -{"file": "./src/transformers/models/zamba2/modular_zamba2.py", "line": 1001, "col": 55, "module": "re", "call": "compile", "pattern": "^shared_transformer\\.self_attn\\.\"\n + r\"(?:linear_q_adapter_list|linear_k_adapter_list|linear_v_adapter_list)\\.\"\n + str(adapter_id)\n + r\"\\.(?:0|1)\\.weight$", "test_string": "alpha alpha.alpha alpha.\" r\"alpha q alpha alpha.\" alpha id r\".0.alpha"} -{"file": "./src/transformers/models/zamba2/modeling_zamba2.py", "line": 1430, "col": 46, "module": "re", "call": "compile", "pattern": "^shared_transformer\\.feed_forward\\.gate_up_proj_adapter_list\\.\"\n + str(adapter_id)\n + r\"\\.(?:0|1)\\.weight$", "test_string": "alpha alpha.alpha alpha.alpha up alpha alpha alpha.\" alpha id r\".0.alpha"} -{"file": "./src/transformers/models/zamba2/modeling_zamba2.py", "line": 1441, "col": 55, "module": "re", "call": "compile", "pattern": "^shared_transformer\\.self_attn\\.\"\n + r\"(?:linear_q_adapter_list|linear_k_adapter_list|linear_v_adapter_list)\\.\"\n + str(adapter_id)\n + r\"\\.(?:0|1)\\.weight$", "test_string": "alpha alpha.alpha alpha.\" r\"alpha q alpha alpha.\" alpha id r\".0.alpha"} -{"file": "./src/transformers/models/deberta/tokenization_deberta.py", "line": 181, "col": 19, "module": "re", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} -{"file": "./src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py", "line": 88, "col": 66, "module": "re", "call": "fullmatch", "pattern": "layer_\\d+", "test_string": "Example likes alpha x."} -{"file": "./src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py", "line": 89, "col": 34, "module": "re", "call": "search", "pattern": "layer_(\\d+)", "test_string": "Example likes alpha x."} -{"file": "./src/transformers/models/longformer/tokenization_longformer.py", "line": 201, "col": 19, "module": "re", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} -{"file": "./src/transformers/models/xlm/tokenization_xlm.py", "line": 71, "col": 11, "module": "re", "call": "sub", "pattern": "。\\s*", "test_string": "Example likes 。x."} -{"file": "./src/transformers/models/xlm/tokenization_xlm.py", "line": 96, "col": 11, "module": "re", "call": "sub", "pattern": ".\\s*", "test_string": "Example likes .x."} -{"file": "./src/transformers/models/roformer/convert_roformer_original_tf_checkpoint_to_pytorch.py", "line": 67, "col": 15, "module": "re", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} -{"file": "./src/transformers/models/roformer/convert_roformer_original_tf_checkpoint_to_pytorch.py", "line": 68, "col": 30, "module": "re", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} -{"file": "./src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py", "line": 151, "col": 15, "module": "re", "call": "compile", "pattern": "layers\\.(\\d+)\\.([a-z0-9_.]+)\\.([a-z0-9_]+)", "test_string": "Example likes alpha.x.m.m."} -{"file": "./src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py", "line": 406, "col": 15, "module": "re", "call": "compile", "pattern": "layers\\.(\\d+)\\.([a-z0-9_.]+)\\.([a-z]+)", "test_string": "Example likes alpha.x.m.m."} -{"file": "./src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py", "line": 781, "col": 15, "module": "re", "call": "compile", "pattern": "transformer.h\\.(\\d+)\\.([a-z0-9_.]+)\\.([a-z]+)", "test_string": "Example likes alpha.x.m.m."} -{"file": "./src/transformers/models/whisper/tokenization_whisper.py", "line": 311, "col": 19, "module": "re", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} -{"file": "./src/transformers/models/whisper/tokenization_whisper.py", "line": 312, "col": 29, "module": "re", "call": "compile", "pattern": "<\\|(\\d+\\.\\d+)\\|>", "test_string": "Example likes <|x.x|>."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 84, "col": 12, "module": "re", "call": "sub", "pattern": "[<\\[][^>\\]]*[>\\]]", "test_string": "Example likes <>>."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 85, "col": 12, "module": "re", "call": "sub", "pattern": "\\(([^)]+?)\\)", "test_string": "Example likes (a)."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 91, "col": 12, "module": "re", "call": "sub", "pattern": "\\s+", "test_string": "Example likes x."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 440, "col": 19, "module": "re", "call": "split", "pattern": "\\band\\s+a\\s+half\\b", "test_string": "Example likes alpha."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 457, "col": 12, "module": "re", "call": "sub", "pattern": "([a-z])([0-9])", "test_string": "Example likes m4."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 458, "col": 12, "module": "re", "call": "sub", "pattern": "([0-9])([a-z])", "test_string": "Example likes 4m."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 461, "col": 12, "module": "re", "call": "sub", "pattern": "([0-9])\\s+(st|nd|rd|th|s)\\b", "test_string": "Example likes 4alpha."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 482, "col": 12, "module": "re", "call": "sub", "pattern": "([€£$])([0-9]+) (?:and )?¢([0-9]{1,2})\\b", "test_string": "Example likes €4 alpha ¢4."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 483, "col": 12, "module": "re", "call": "sub", "pattern": "[€£$]0.([0-9]{1,2})\\b", "test_string": "Example likes €0A4."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 486, "col": 12, "module": "re", "call": "sub", "pattern": "\\b1(s?)\\b", "test_string": "Example likes 1s."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 576, "col": 12, "module": "re", "call": "sub", "pattern": "[<\\[][^>\\]]*[>\\]]", "test_string": "Example likes <>>."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 577, "col": 12, "module": "re", "call": "sub", "pattern": "\\(([^)]+?)\\)", "test_string": "Example likes (a)."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 579, "col": 12, "module": "re", "call": "sub", "pattern": "\\s+'", "test_string": "Example likes x'."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 584, "col": 12, "module": "re", "call": "sub", "pattern": "(\\d),(\\d)", "test_string": "Example likes x,x."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 585, "col": 12, "module": "re", "call": "sub", "pattern": "\\.([^0-9]|$)", "test_string": "Example likes .4."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 592, "col": 12, "module": "re", "call": "sub", "pattern": "[.$¢€£]([^0-9])", "test_string": "Example likes .4."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 593, "col": 12, "module": "re", "call": "sub", "pattern": "([^0-9])%", "test_string": "Example likes 4%."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 595, "col": 12, "module": "re", "call": "sub", "pattern": "\\s+", "test_string": "Example likes x."} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 246, "col": 15, "module": "re", "call": "match", "pattern": "^\\d+(\\.\\d+)?$", "test_string": "x.x"} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 89, "col": 25, "module": "re", "call": "findall", "pattern": "\\X", "test_string": "a simple example phrase"} -{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 243, "col": 51, "module": "re", "call": "match", "pattern": "^\\d+(\\.\\d+)?$", "test_string": "x.x"} -{"file": "./src/transformers/models/whisper/tokenization_whisper_fast.py", "line": 137, "col": 29, "module": "re", "call": "compile", "pattern": "<\\|(\\d+\\.\\d+)\\|>", "test_string": "Example likes <|x.x|>."} -{"file": "./src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py", "line": 59, "col": 15, "module": "re", "call": "fullmatch", "pattern": "[A-Za-z]+\\d+", "test_string": "Example likes Mx."} -{"file": "./src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py", "line": 60, "col": 30, "module": "re", "call": "split", "pattern": "(\\d+)", "test_string": "Example likes x."} -{"file": "./src/transformers/models/gpt2/tokenization_gpt2.py", "line": 167, "col": 19, "module": "re", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} -{"file": "./src/transformers/models/clip/tokenization_clip.py", "line": 77, "col": 11, "module": "re", "call": "sub", "pattern": "\\s+", "test_string": "Example likes x."} -{"file": "./src/transformers/models/clip/tokenization_clip.py", "line": 318, "col": 19, "module": "re", "call": "compile", "pattern": "<\\|startoftext\\|>|<\\|endoftext\\|>|'s|'t|'re|'ve|'m|'ll|'d|[\\p{L}]+|[\\p{N}]|[^\\s\\p{L}\\p{N}]+", "test_string": "a simple example phrase"} -{"file": "./src/transformers/models/udop/tokenization_udop.py", "line": 340, "col": 38, "module": "re", "call": "search", "pattern": "", "test_string": "Example likes ."} -{"file": "./src/transformers/models/byt5/convert_byt5_original_tf_checkpoint_to_pytorch.py", "line": 74, "col": 15, "module": "re", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} -{"file": "./src/transformers/models/byt5/convert_byt5_original_tf_checkpoint_to_pytorch.py", "line": 75, "col": 30, "module": "re", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} -{"file": "./src/transformers/models/bart/tokenization_bart.py", "line": 194, "col": 19, "module": "re", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} -{"file": "./src/transformers/models/vits/tokenization_vits.py", "line": 39, "col": 24, "module": "re", "call": "compile", "pattern": "[^\\x00-\\x7F]", "test_string": "Example likes ?."} -{"file": "./src/transformers/models/vits/tokenization_vits.py", "line": 200, "col": 28, "module": "re", "call": "sub", "pattern": "\\s+", "test_string": "Example likes x."} -{"file": "./src/transformers/models/idefics3/processing_idefics3.py", "line": 145, "col": 53, "module": "re", "call": "compile", "pattern": "(\\n?\\n?|\\n?)+", "test_string": "Example likes ."} -{"file": "./src/transformers/models/clvp/tokenization_clvp.py", "line": 177, "col": 19, "module": "re", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} -{"file": "./src/transformers/models/clvp/number_normalizer.py", "line": 209, "col": 15, "module": "re", "call": "sub", "pattern": "([0-9][0-9,]+[0-9])", "test_string": "Example likes 444."} -{"file": "./src/transformers/models/clvp/number_normalizer.py", "line": 210, "col": 15, "module": "re", "call": "sub", "pattern": "£([0-9,]*[0-9])", "test_string": "Example likes £44."} -{"file": "./src/transformers/models/clvp/number_normalizer.py", "line": 211, "col": 15, "module": "re", "call": "sub", "pattern": "\\$([0-9.,]*[0-9])", "test_string": "Example likes $44."} -{"file": "./src/transformers/models/clvp/number_normalizer.py", "line": 212, "col": 15, "module": "re", "call": "sub", "pattern": "([0-9]++\\.[0-9]+)", "test_string": "Example likes a.4."} -{"file": "./src/transformers/models/clvp/number_normalizer.py", "line": 213, "col": 15, "module": "re", "call": "sub", "pattern": "[0-9]++(st|nd|rd|th)", "test_string": "Example likes alpha."} -{"file": "./src/transformers/models/clvp/number_normalizer.py", "line": 214, "col": 15, "module": "re", "call": "sub", "pattern": "[0-9]+", "test_string": "Example likes 4."} -{"file": "./src/transformers/models/clvp/number_normalizer.py", "line": 229, "col": 22, "module": "re", "call": "compile", "pattern": "\\s+", "test_string": "Example likes x."} -{"file": "./src/transformers/models/mobilebert/convert_mobilebert_original_tf_checkpoint_to_pytorch.py", "line": 69, "col": 15, "module": "re", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} -{"file": "./src/transformers/models/mobilebert/convert_mobilebert_original_tf_checkpoint_to_pytorch.py", "line": 70, "col": 30, "module": "re", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} -{"file": "./src/transformers/models/florence2/processing_florence2.py", "line": 681, "col": 32, "module": "re", "call": "sub", "pattern": "^", "test_string": "Example likes ."} -{"file": "./src/transformers/models/florence2/processing_florence2.py", "line": 702, "col": 58, "module": "re", "call": "finditer", "pattern": "", "test_string": "Example likes ."} -{"file": "./src/transformers/models/florence2/modular_florence2.py", "line": 874, "col": 32, "module": "re", "call": "sub", "pattern": "^", "test_string": "Example likes ."} -{"file": "./src/transformers/models/florence2/modular_florence2.py", "line": 895, "col": 58, "module": "re", "call": "finditer", "pattern": "", "test_string": "Example likes ."} -{"file": "./src/transformers/models/herbert/tokenization_herbert.py", "line": 53, "col": 11, "module": "re", "call": "sub", "pattern": "。\\s*", "test_string": "Example likes 。x."} -{"file": "./src/transformers/models/herbert/tokenization_herbert.py", "line": 78, "col": 11, "module": "re", "call": "sub", "pattern": ".\\s*", "test_string": "Example likes .x."} -{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 58, "col": 14, "module": "re", "call": "sub", "pattern": "blocks", "test_string": "Example likes alpha."} -{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 60, "col": 14, "module": "re", "call": "sub", "pattern": "attn", "test_string": "Example likes alpha."} -{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 62, "col": 14, "module": "re", "call": "sub", "pattern": "norm1", "test_string": "Example likes alpha1."} -{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 64, "col": 14, "module": "re", "call": "sub", "pattern": "norm2", "test_string": "Example likes alpha2."} -{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 66, "col": 14, "module": "re", "call": "sub", "pattern": "encoder.norm", "test_string": "Example likes alpha."} -{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 68, "col": 14, "module": "re", "call": "sub", "pattern": "encoder.patch_embed.proj", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 71, "col": 14, "module": "re", "call": "sub", "pattern": "encoder.pos_embed", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 73, "col": 14, "module": "re", "call": "sub", "pattern": "encoder.cls_token", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 76, "col": 14, "module": "re", "call": "sub", "pattern": "self_attn.proj", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py", "line": 150, "col": 15, "module": "re", "call": "sub", "pattern": "@@$", "test_string": "alice@example.com"} -{"file": "./src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py", "line": 150, "col": 67, "module": "re", "call": "sub", "pattern": "$", "test_string": "Example likes ."} -{"file": "./src/transformers/models/phobert/tokenization_phobert.py", "line": 279, "col": 16, "module": "re", "call": "findall", "pattern": "\\S+\\n?", "test_string": "Example likes x."} -{"file": "./src/transformers/models/gpt_oss/convert_gpt_oss_weights_to_hf.py", "line": 202, "col": 20, "module": "re", "call": "sub", "pattern": "qkv_proj", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/gpt_oss/convert_gpt_oss_weights_to_hf.py", "line": 203, "col": 20, "module": "re", "call": "sub", "pattern": "qkv_proj", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/gpt_oss/convert_gpt_oss_weights_to_hf.py", "line": 204, "col": 20, "module": "re", "call": "sub", "pattern": "qkv_proj", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/models/imagegpt/convert_imagegpt_original_tf2_to_pytorch.py", "line": 78, "col": 15, "module": "re", "call": "fullmatch", "pattern": "[A-Za-z]+\\d+", "test_string": "Example likes Mx."} -{"file": "./src/transformers/models/imagegpt/convert_imagegpt_original_tf2_to_pytorch.py", "line": 79, "col": 30, "module": "re", "call": "split", "pattern": "(\\d+)", "test_string": "Example likes x."} -{"file": "./src/transformers/models/luke/tokenization_luke.py", "line": 322, "col": 19, "module": "re", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} -{"file": "./src/transformers/models/donut/processing_donut.py", "line": 112, "col": 30, "module": "re", "call": "search", "pattern": "", "test_string": "/home/user/readme.txt"} -{"file": "./src/transformers/commands/add_new_model_like.py", "line": 179, "col": 11, "module": "re", "call": "search", "pattern": "(?:tokenization)|(?:image_processing)_auto_fast.py", "test_string": "Example likes alpha alpha alpha alpha."} -{"file": "./src/transformers/commands/add_new_model_like.py", "line": 231, "col": 29, "module": "re", "call": "sub", "pattern": "# ?", "test_string": "Example likes #."} -{"file": "./src/transformers/commands/add_fast_image_processor.py", "line": 256, "col": 34, "module": "re", "call": "search", "pattern": " image_processing_class = .*", "test_string": "Example likes alpha alpha alpha = A."} -{"file": "./src/transformers/commands/add_fast_image_processor.py", "line": 280, "col": 21, "module": "re", "call": "search", "pattern": "^# coding=utf-8\\n(#[^\\n]*\\n)*", "test_string": "Example likes # alpha=alpha 8\n#a."} -{"file": "./src/transformers/commands/add_fast_image_processor.py", "line": 302, "col": 21, "module": "re", "call": "sub", "pattern": "# Copyright (\\d+)\\s", "test_string": "Example likes # alpha xx."} -{"file": "./src/transformers/commands/add_fast_image_processor.py", "line": 304, "col": 12, "module": "re", "call": "search", "pattern": "^\"\"\"Image processor.*$", "test_string": "\"\"\"alpha alpha"} -{"file": "./src/transformers/commands/add_fast_image_processor.py", "line": 365, "col": 12, "module": "re", "call": "search", "pattern": "def __init__.*?def ", "test_string": "Example likes alpha alpha alpha."} -{"file": "./src/transformers/commands/add_fast_image_processor.py", "line": 379, "col": 19, "module": "re", "call": "findall", "pattern": "= (.*?)(?:,|\\))", "test_string": "Example likes = A,."} -{"file": "./src/transformers/commands/add_fast_image_processor.py", "line": 470, "col": 27, "module": "re", "call": "findall", "pattern": "class (\\w*ImageProcessor)", "test_string": "Example likes alpha alpha."} -{"file": "./src/transformers/commands/add_fast_image_processor.py", "line": 48, "col": 22, "module": "re", "call": "compile", "pattern": "if TYPE_CHECKING:\\n(?P.*?)(?=\\s*else:)", "test_string": "Example likes if alpha alpha:\nalpha:."} -{"file": "./src/transformers/commands/serving.py", "line": 1097, "col": 44, "module": "re", "call": "search", "pattern": "\\\"name\\\": \\\"(.*?)\\\"", "test_string": "Example likes \"alpha\": \"A\"."} -{"file": "./src/transformers/pipelines/document_question_answering.py", "line": 498, "col": 17, "module": "re", "call": "search", "pattern": "(.*)", "test_string": "/home/user/readme.txt"} -{"file": "./src/transformers/pipelines/document_question_answering.py", "line": 493, "col": 19, "module": "re", "call": "sub", "pattern": "<.*?>", "test_string": "Example likes ."} -{"file": "./src/transformers/utils/chat_template_utils.py", "line": 59, "col": 17, "module": "re", "call": "compile", "pattern": "^(.*?)[\\n\\s]*(Args:|Returns:|Raises:|\\Z)", "test_string": "Example likes A\nalpha:."} -{"file": "./src/transformers/utils/chat_template_utils.py", "line": 61, "col": 10, "module": "re", "call": "compile", "pattern": "\\n\\s*Args:\\n\\s*(.*?)[\\n\\s]*(Returns:|Raises:|\\Z)", "test_string": "Example likes alpha:\nxA\nalpha:."} -{"file": "./src/transformers/utils/chat_template_utils.py", "line": 63, "col": 16, "module": "re", "call": "compile", "pattern": "\n(?:^|\\n) # Match the start of the args block, or a newline\n\\s*(\\w+):\\s* # Capture the argument name and strip spacing\n(.*?)\\s* # Capture the argument description, which can span multiple lines, and strip trailing spacing\n(?=\\n\\s*\\w+:|\\Z) # Stop when you hit the next argument or the end of the block\n", "test_string": "Example likes # alpha alpha alpha of alpha alpha alpha, or a alpha\nxx:x # alpha alpha alpha alpha alpha alpha alpha\nAx # alpha alpha alpha alpha, alpha alpha alpha alpha alpha, alpha alpha alpha alpha xx: # alpha alpha alpha alpha alpha alpha alpha or alpha alpha of alpha alpha."} -{"file": "./src/transformers/utils/chat_template_utils.py", "line": 73, "col": 13, "module": "re", "call": "compile", "pattern": "\\n\\s*Returns:\\n\\s*(.*?)[\\n\\s]*(Raises:|\\Z)", "test_string": "Example likes alpha:\nxA\nalpha:."} -{"file": "./src/transformers/utils/chat_template_utils.py", "line": 362, "col": 23, "module": "re", "call": "search", "pattern": "\\(choices:\\s*(.*?)\\)\\s*$", "test_string": "Example likes (alpha:xA)x."} -{"file": "./src/transformers/utils/chat_template_utils.py", "line": 230, "col": 31, "module": "re", "call": "sub", "pattern": "\\s*\\n+\\s*", "test_string": "Example likes x\nx."} -{"file": "./src/transformers/utils/chat_template_utils.py", "line": 476, "col": 44, "module": "re", "call": "search", "pattern": "\\{\\%-?\\s*generation\\s*-?\\%\\}", "test_string": "Example likes {% alpha %}."} -{"file": "./src/transformers/utils/notebook.py", "line": 357, "col": 40, "module": "re", "call": "sub", "pattern": "\\_loss$", "test_string": "Example likes alpha."} -{"file": "./src/transformers/utils/versions.py", "line": 69, "col": 7, "module": "re", "call": "match", "pattern": "^[\\w_\\-\\d]+$", "test_string": ""} -{"file": "./src/transformers/utils/versions.py", "line": 72, "col": 16, "module": "re", "call": "findall", "pattern": "^([^!=<>\\s]+)([\\s!=<>]{1,2}.+)", "test_string": "Example likes !!A."} -{"file": "./src/transformers/utils/versions.py", "line": 82, "col": 20, "module": "re", "call": "findall", "pattern": "^([\\s!=<>]{1,2})(.+)", "test_string": "Example likes !A."} -{"file": "./src/transformers/utils/hub.py", "line": 252, "col": 13, "module": "re", "call": "search", "pattern": "snapshots/([^/]+)/", "test_string": "/home/user/readme.txt"} -{"file": "./src/transformers/utils/auto_docstring.py", "line": 71, "col": 17, "module": "re", "call": "compile", "pattern": "\\[(.+?)\\]\\((https://huggingface\\.co/.+?)\\)", "test_string": "http://example.com/docs"} -{"file": "./src/transformers/utils/auto_docstring.py", "line": 994, "col": 20, "module": "re", "call": "compile", "pattern": "(of shape\\s*(?:`.*?`|\\(.*?\\)))", "test_string": "Example likes of alpha`A`."} -{"file": "./src/transformers/utils/auto_docstring.py", "line": 1002, "col": 22, "module": "re", "call": "compile", "pattern": "(defaults to \\s*[^)]*)", "test_string": "Example likes alpha to xa."} -{"file": "./src/transformers/utils/auto_docstring.py", "line": 1023, "col": 12, "module": "re", "call": "search", "pattern": "(?m)^([ \\t]*)(?=Example|Return)", "test_string": "Example likes alpha."} -{"file": "./src/transformers/utils/auto_docstring.py", "line": 1029, "col": 19, "module": "re", "call": "compile", "pattern": "(?:Args:)(\\n.*)?(\\n)?$", "test_string": "Example likes alpha:\nA."} -{"file": "./src/transformers/utils/auto_docstring.py", "line": 1159, "col": 23, "module": "re", "call": "findall", "pattern": "{(.*?)}", "test_string": "Example likes {A}."} -{"file": "./src/transformers/utils/auto_docstring.py", "line": 1601, "col": 20, "module": "re", "call": "search", "pattern": "(?m)^([ \\t]*)(?=Example)", "test_string": "Example likes alpha."} -{"file": "./src/transformers/utils/auto_docstring.py", "line": 1065, "col": 32, "module": "re", "call": "sub", "pattern": "^", "test_string": "Example likes ."} -{"file": "./src/transformers/utils/auto_docstring.py", "line": 1297, "col": 25, "module": "re", "call": "sub", "pattern": "ForwardRef\\('([\\w.]+)'\\)", "test_string": "Example likes alpha('.')."} -{"file": "./src/transformers/utils/auto_docstring.py", "line": 1299, "col": 25, "module": "re", "call": "sub", "pattern": "Optional\\[(.*?)\\]", "test_string": "Example likes alpha[A]."} -{"file": "./src/transformers/utils/auto_docstring.py", "line": 1642, "col": 52, "module": "re", "call": "search", "pattern": "(?m)^([ \\t]*)(?=Example)", "test_string": "Example likes alpha."} -{"file": "./src/transformers/utils/auto_docstring.py", "line": 1599, "col": 28, "module": "re", "call": "search", "pattern": "(?m)^([ \\t]*)(?=Return)", "test_string": "Example likes alpha."} -{"file": "./src/transformers/utils/auto_docstring.py", "line": 1499, "col": 33, "module": "re", "call": "sub", "pattern": "ForwardRef\\('([\\w.]+)'\\)", "test_string": "Example likes alpha('.')."} -{"file": "./src/transformers/utils/auto_docstring.py", "line": 1501, "col": 33, "module": "re", "call": "sub", "pattern": "Optional\\[(.*?)\\]", "test_string": "Example likes alpha[A]."} -{"file": "./src/transformers/utils/auto_docstring.py", "line": 1842, "col": 33, "module": "re", "call": "sub", "pattern": "ForwardRef\\('([\\w.]+)'\\)", "test_string": "Example likes alpha('.')."} -{"file": "./src/transformers/utils/auto_docstring.py", "line": 1844, "col": 33, "module": "re", "call": "sub", "pattern": "Optional\\[(.*?)\\]", "test_string": "Example likes alpha[A]."} -{"file": "./src/transformers/utils/doc.py", "line": 100, "col": 13, "module": "re", "call": "search", "pattern": "^(\\s*)\\S", "test_string": "Example likes xx."} -{"file": "./src/transformers/utils/doc.py", "line": 124, "col": 20, "module": "re", "call": "sub", "pattern": "^(\\s+)(\\S+)(\\s+)", "test_string": "Example likes alpha."} -{"file": "./src/transformers/utils/doc.py", "line": 125, "col": 20, "module": "re", "call": "sub", "pattern": ":\\s*\\n\\s*(\\S)", "test_string": "Example likes :x\nxx."} -{"file": "./src/transformers/utils/doc.py", "line": 1085, "col": 15, "module": "re", "call": "match", "pattern": "^refs/pr/\\\\d+", "test_string": "/home/user/readme.txt"} -{"file": "./src/transformers/utils/doc.py", "line": 140, "col": 33, "module": "re", "call": "search", "pattern": "^\\s*(Args|Parameters):\\s*$", "test_string": "alpha:x"} -{"file": "./src/transformers/utils/doc.py", "line": 1105, "col": 33, "module": "re", "call": "search", "pattern": "^\\s*Returns?:\\s*$", "test_string": "alpha:x"} -{"file": "./src/transformers/data/metrics/squad_metrics.py", "line": 40, "col": 16, "module": "re", "call": "compile", "pattern": "\\b(a|an|the)\\b", "test_string": "Example likes alpha."} -{"file": "./tests/test_modeling_common.py", "line": 905, "col": 27, "module": "re", "call": "search", "pattern": "^# Copyright (\\d{4})", "test_string": "Example likes # alpha alpha."} -{"file": "./tests/test_modeling_common.py", "line": 953, "col": 19, "module": "re", "call": "search", "pattern": "\\.parametrizations\\..*?\\.original[01]", "test_string": "Example likes .alpha.A.alpha0."} -{"file": "./tests/test_tokenization_common.py", "line": 270, "col": 37, "module": "re", "call": "match", "pattern": "^[ a-zA-Z]+$", "test_string": "m"} -{"file": "./tests/test_pipeline_mixin.py", "line": 918, "col": 17, "module": "re", "call": "match", "pattern": "(\\w+)\\W", "test_string": "Example likes xx."} -{"file": "./tests/models/janus/test_modeling_janus.py", "line": 237, "col": 23, "module": "re", "call": "finditer", "pattern": "Attention(?!Pool)", "test_string": "Example likes alpha."} -{"file": "./tests/models/janus/test_modeling_janus.py", "line": 242, "col": 23, "module": "re", "call": "finditer", "pattern": "Attention(?!Pool)", "test_string": "Example likes alpha."} -{"file": "./tests/models/whisper/test_modeling_whisper.py", "line": 2533, "col": 31, "module": "re", "call": "split", "pattern": "<\\|[\\d\\.]+\\|>", "test_string": "Example likes <|.|>."} -{"file": "./tests/models/whisper/test_modeling_whisper.py", "line": 2537, "col": 28, "module": "re", "call": "findall", "pattern": "<\\|[\\d\\.]+\\|>", "test_string": "Example likes <|.|>."} -{"file": "./tests/models/deepseek_vl_hybrid/test_modeling_deepseek_vl_hybrid.py", "line": 258, "col": 24, "module": "re", "call": "finditer", "pattern": "Attention(?!Pool)", "test_string": "Example likes alpha."} -{"file": "./tests/models/deepseek_vl_hybrid/test_modeling_deepseek_vl_hybrid.py", "line": 267, "col": 24, "module": "re", "call": "finditer", "pattern": "Attention(?!Pool)", "test_string": "Example likes alpha."} -{"file": "./tests/models/byt5/test_tokenization_byt5.py", "line": 62, "col": 37, "module": "re", "call": "match", "pattern": "^[ a-zA-Z]+$", "test_string": "m"} -{"file": "./tests/models/markuplm/test_tokenization_markuplm.py", "line": 1553, "col": 37, "module": "re", "call": "match", "pattern": "^[ a-zA-Z]+$", "test_string": "m"} -{"file": "./tests/models/layoutlmv2/test_tokenization_layoutlmv2.py", "line": 1768, "col": 37, "module": "re", "call": "match", "pattern": "^[ a-zA-Z]+$", "test_string": "m"} -{"file": "./tests/models/deepseek_vl/test_modeling_deepseek_vl.py", "line": 222, "col": 23, "module": "re", "call": "finditer", "pattern": "Attention(?!Pool)", "test_string": "Example likes alpha."} -{"file": "./tests/models/deepseek_vl/test_modeling_deepseek_vl.py", "line": 227, "col": 23, "module": "re", "call": "finditer", "pattern": "Attention(?!Pool)", "test_string": "Example likes alpha."} -{"file": "./tests/models/perceiver/test_tokenization_perceiver.py", "line": 63, "col": 37, "module": "re", "call": "match", "pattern": "^[ a-zA-Z]+$", "test_string": "m"} -{"file": "./tests/models/t5/test_tokenization_t5.py", "line": 374, "col": 25, "module": "re", "call": "search", "pattern": "", "test_string": "Example likes ."} -{"file": "./tests/models/t5/test_tokenization_t5.py", "line": 385, "col": 25, "module": "re", "call": "search", "pattern": "", "test_string": "Example likes ."} -{"file": "./tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py", "line": 1302, "col": 19, "module": "re", "call": "sub", "pattern": "<.*?>", "test_string": "Example likes ."} -{"file": "./tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py", "line": 1365, "col": 19, "module": "re", "call": "sub", "pattern": "<.*?>", "test_string": "Example likes ."} -{"file": "./tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py", "line": 1427, "col": 19, "module": "re", "call": "sub", "pattern": "<.*?>", "test_string": "Example likes ."} -{"file": "./tests/models/layoutlmv3/test_tokenization_layoutlmv3.py", "line": 1658, "col": 37, "module": "re", "call": "match", "pattern": "^[ a-zA-Z]+$", "test_string": "m"} -{"file": "./tests/utils/test_attention_visualizer.py", "line": 24, "col": 10, "module": "re", "call": "compile", "pattern": "\\x1b\\[[0-9;]*m", "test_string": "Example likes \u001b[4m."} +{"file": "./setup.py", "line": 197, "col": 26, "module": "regex", "call": "findall", "pattern": "^(([^!=<>~ ]+)(?:[!=<>~ ].*)?$)", "test_string": "Example likes !!A."} +{"file": "./utils/check_inits.py", "line": 50, "col": 14, "module": "regex", "call": "compile", "pattern": "is\\_([a-z_]*)_available()", "test_string": "Example likes is m alpha."} +{"file": "./utils/check_inits.py", "line": 52, "col": 29, "module": "regex", "call": "compile", "pattern": "^_import_structure\\s+=\\s+\\{([^\\}]+)\\}", "test_string": "Example likes alpha alpha=x{a}."} +{"file": "./utils/check_inits.py", "line": 54, "col": 30, "module": "regex", "call": "compile", "pattern": "\\s+\"\\S*\":\\s+\\[([^\\]]*)\\]", "test_string": "Example likes x\"x\":x[a]."} +{"file": "./utils/check_inits.py", "line": 56, "col": 19, "module": "regex", "call": "compile", "pattern": "^\\s*if\\s+not\\s+is\\_[a-z_]*\\_available\\(\\)", "test_string": "Example likes alpha m alpha()."} +{"file": "./utils/check_inits.py", "line": 58, "col": 28, "module": "regex", "call": "compile", "pattern": "^\\s*_import_structure\\[\"\\S*\"\\]\\.append\\(\"(\\S*)\"\\)", "test_string": "Example likes x alpha alpha[\"x\"].alpha(\"x\")."} +{"file": "./utils/check_inits.py", "line": 60, "col": 29, "module": "regex", "call": "compile", "pattern": "^\\s*_import_structure\\[\\S*\\](?:\\.extend\\(|\\s*=\\s+)\\[([^\\]]*)\\]", "test_string": "Example likes x alpha alpha[x].alpha([a]."} +{"file": "./utils/check_inits.py", "line": 62, "col": 19, "module": "regex", "call": "compile", "pattern": "^\\s+\"([^\"]+)\",", "test_string": "Example likes x\"a\",."} +{"file": "./utils/check_inits.py", "line": 64, "col": 23, "module": "regex", "call": "compile", "pattern": "^\\s+\\[([^\\]]+)\\]", "test_string": "Example likes x[a]."} +{"file": "./utils/check_inits.py", "line": 66, "col": 13, "module": "regex", "call": "compile", "pattern": "\\s+from\\s+\\S*\\s+import\\s+([^\\(\\s].*)\\n", "test_string": "Example likes alpha(A."} +{"file": "./utils/check_inits.py", "line": 68, "col": 10, "module": "regex", "call": "compile", "pattern": "^\\s*try:", "test_string": "Example likes alpha:."} +{"file": "./utils/check_inits.py", "line": 70, "col": 11, "module": "regex", "call": "compile", "pattern": "^\\s*else:", "test_string": "Example likes alpha:."} +{"file": "./utils/check_inits.py", "line": 124, "col": 22, "module": "regex", "call": "findall", "pattern": "\\[([^\\]]+)\\]", "test_string": "Example likes [a]."} +{"file": "./utils/check_inits.py", "line": 335, "col": 37, "module": "regex", "call": "findall", "pattern": "import_structure\\[\\\"([^\\\"]*)\\\"\\]", "test_string": "Example likes alpha alpha[\"a\"]."} +{"file": "./utils/check_dummies.py", "line": 48, "col": 14, "module": "regex", "call": "compile", "pattern": "is\\_([a-z_]*)_available()", "test_string": "Example likes is m alpha."} +{"file": "./utils/check_dummies.py", "line": 50, "col": 25, "module": "regex", "call": "compile", "pattern": "\\s+from\\s+\\S*\\s+import\\s+([^\\(\\s].*)\\n", "test_string": "Example likes alpha(A."} +{"file": "./utils/check_dummies.py", "line": 52, "col": 19, "module": "regex", "call": "compile", "pattern": "^\\s+if\\s+not\\s+\\(?is\\_[a-z_]*\\_available\\(\\)", "test_string": "Example likes alpha(is m alpha()."} +{"file": "./utils/check_config_docstrings.py", "line": 34, "col": 17, "module": "regex", "call": "compile", "pattern": "\\[(.+?)\\]\\((https://huggingface\\.co/.+?)\\)", "test_string": "http://example.com/docs"} +{"file": "./utils/add_dates.py", "line": 65, "col": 16, "module": "regex", "call": "findall", "pattern": "https://huggingface\\.co/papers/\\d+\\.\\d+", "test_string": "http://example.com/docs"} +{"file": "./utils/add_dates.py", "line": 66, "col": 17, "module": "regex", "call": "findall", "pattern": "https://arxiv\\.org/abs/\\d+\\.\\d+", "test_string": "http://example.com/docs"} +{"file": "./utils/add_dates.py", "line": 67, "col": 17, "module": "regex", "call": "findall", "pattern": "https://arxiv\\.org/pdf/\\d+\\.\\d+", "test_string": "http://example.com/docs"} +{"file": "./utils/add_dates.py", "line": 152, "col": 18, "module": "regex", "call": "findall", "pattern": "https://arxiv\\.org/abs/(\\d+\\.\\d+)", "test_string": "http://example.com/docs"} +{"file": "./utils/add_dates.py", "line": 153, "col": 19, "module": "regex", "call": "findall", "pattern": "https://arxiv\\.org/pdf/(\\d+\\.\\d+)", "test_string": "http://example.com/docs"} +{"file": "./utils/add_dates.py", "line": 72, "col": 26, "module": "regex", "call": "findall", "pattern": "https://[^\\s\\)]+", "test_string": "http://example.com/docs"} +{"file": "./utils/add_dates.py", "line": 204, "col": 23, "module": "regex", "call": "finditer", "pattern": "-->", "test_string": "Example likes >."} +{"file": "./utils/add_dates.py", "line": 212, "col": 27, "module": "regex", "call": "finditer", "pattern": "-->", "test_string": "Example likes >."} +{"file": "./utils/scan_skipped_tests.py", "line": 55, "col": 19, "module": "regex", "call": "search", "pattern": "reason\\s*=\\s*[\"\\'](.*?)[\"\\']", "test_string": "Example likes alpha=x\"A\"."} +{"file": "./utils/scan_skipped_tests.py", "line": 58, "col": 19, "module": "regex", "call": "search", "pattern": "\\((?:.*?,\\s*)?[\"\\'](.*?)[\"\\']\\)", "test_string": "Example likes (A,x\"A\")."} +{"file": "./utils/scan_skipped_tests.py", "line": 70, "col": 14, "module": "regex", "call": "compile", "pattern": "((?:^\\s*@.*?\\n)*?)^\\s*def\\s+(test_[A-Za-z0-9_]+)\\b", "test_string": "alice@example.com"} +{"file": "./utils/scan_skipped_tests.py", "line": 40, "col": 25, "module": "regex", "call": "findall", "pattern": "^\\s*def\\s+(test_[A-Za-z0-9_]+)", "test_string": "Example likes alpha M."} +{"file": "./utils/update_metadata.py", "line": 55, "col": 16, "module": "regex", "call": "compile", "pattern": "(.*)(?:Model|Encoder|Decoder|ForConditionalGeneration|ForRetrieval)", "test_string": "Example likes alpha."} +{"file": "./utils/modular_model_detector.py", "line": 579, "col": 14, "module": "regex", "call": "compile", "pattern": "(?:^|[\\*_`\\s>])(?:this|the)\\s+model\\s+was\\s+released\\s+on\\s+(\\d{4}-\\d{2}-\\d{2})\\b", "test_string": "Example likes alpha xx xx."} +{"file": "./utils/modular_model_detector.py", "line": 171, "col": 11, "module": "regex", "call": "sub", "pattern": "(\"\"\"|\\'\\'\\')(?:.|\\n)*?\\1", "test_string": "Example likes \"\"\"A\"\"\"."} +{"file": "./utils/modular_model_detector.py", "line": 172, "col": 11, "module": "regex", "call": "sub", "pattern": "#.*", "test_string": "Example likes #A."} +{"file": "./utils/modular_model_detector.py", "line": 158, "col": 11, "module": "regex", "call": "sub", "pattern": "[^a-z0-9]+", "test_string": "Example likes m."} +{"file": "./utils/modular_model_detector.py", "line": 186, "col": 15, "module": "regex", "call": "findall", "pattern": "\\b[a-zA-Z_][a-zA-Z0-9_]*\\b", "test_string": "Example likes mm."} +{"file": "./utils/modular_model_detector.py", "line": 199, "col": 12, "module": "regex", "call": "match", "pattern": "^([A-Z][a-z0-9]+)", "test_string": "Example likes Mm."} +{"file": "./utils/modular_model_detector.py", "line": 199, "col": 52, "module": "regex", "call": "match", "pattern": "^([A-Za-z0-9]+)", "test_string": "Example likes M."} +{"file": "./utils/modular_model_detector.py", "line": 220, "col": 21, "module": "regex", "call": "sub", "pattern": "\\d+", "test_string": "Example likes x."} +{"file": "./utils/modular_model_detector.py", "line": 226, "col": 25, "module": "regex", "call": "sub", "pattern": "\\d+", "test_string": "Example likes x."} +{"file": "./utils/modular_model_detector.py", "line": 173, "col": 63, "module": "regex", "call": "match", "pattern": "\\s*(from|import)\\s+", "test_string": "Example likes alpha."} +{"file": "./utils/sort_auto_mappings.py", "line": 45, "col": 20, "module": "regex", "call": "compile", "pattern": "[A-Z_]+_MAPPING(\\s+|_[A-Z_]+\\s+)=\\s+OrderedDict", "test_string": "Example likes M alpha Mx=alpha."} +{"file": "./utils/sort_auto_mappings.py", "line": 47, "col": 17, "module": "regex", "call": "compile", "pattern": "\\s*\\(\\s*\"(\\S[^\"]+)\"", "test_string": "Example likes x(x\"xa\"."} +{"file": "./utils/sort_auto_mappings.py", "line": 71, "col": 25, "module": "regex", "call": "search", "pattern": "^(\\s*)\\S", "test_string": "Example likes xx."} +{"file": "./utils/modular_model_converter.py", "line": 1688, "col": 14, "module": "regex", "call": "search", "pattern": "modular_(.*)(?=\\.py$)", "test_string": "Example likes alpha A.py."} +{"file": "./utils/modular_model_converter.py", "line": 130, "col": 11, "module": "regex", "call": "findall", "pattern": "# Copied from", "test_string": "Example likes # alpha alpha."} +{"file": "./utils/modular_model_converter.py", "line": 993, "col": 15, "module": "regex", "call": "match", "pattern": "\\ndef .*\\(.*\\):\\n raise.*Error\\(.*", "test_string": "Example likes alpha A(A): alpha(A."} +{"file": "./utils/modular_model_converter.py", "line": 98, "col": 44, "module": "regex", "call": "findall", "pattern": "[A-Z][^A-Z]*", "test_string": "Example likes MM."} +{"file": "./utils/modular_model_converter.py", "line": 282, "col": 32, "module": "regex", "call": "sub", "pattern": "#.*", "test_string": "Example likes #A."} +{"file": "./utils/modular_model_converter.py", "line": 283, "col": 32, "module": "regex", "call": "sub", "pattern": "\\ *\\n", "test_string": "Example likes ."} +{"file": "./utils/modular_model_converter.py", "line": 289, "col": 32, "module": "regex", "call": "sub", "pattern": "#.*", "test_string": "Example likes #A."} +{"file": "./utils/modular_model_converter.py", "line": 290, "col": 32, "module": "regex", "call": "sub", "pattern": "\\ *\\n", "test_string": "Example likes ."} +{"file": "./utils/modular_model_converter.py", "line": 306, "col": 32, "module": "regex", "call": "sub", "pattern": "#.*", "test_string": "Example likes #A."} +{"file": "./utils/modular_model_converter.py", "line": 307, "col": 32, "module": "regex", "call": "sub", "pattern": "\\ *\\n", "test_string": "Example likes ."} +{"file": "./utils/modular_model_converter.py", "line": 323, "col": 32, "module": "regex", "call": "sub", "pattern": "#.*", "test_string": "Example likes #A."} +{"file": "./utils/modular_model_converter.py", "line": 324, "col": 32, "module": "regex", "call": "sub", "pattern": "\\ *\\n", "test_string": "Example likes ."} +{"file": "./utils/modular_model_converter.py", "line": 1702, "col": 32, "module": "regex", "call": "search", "pattern": "(src/transformers/.*|examples/.*)", "test_string": "/home/user/readme.txt"} +{"file": "./utils/modular_model_converter.py", "line": 166, "col": 50, "module": "regex", "call": "search", "pattern": "\\\"\\\"\\\"[\\s\\S]*\\\"\\\"\\\"", "test_string": "Example likes \"\"\"x\"\"\"."} +{"file": "./utils/get_pr_run_slow_jobs.py", "line": 21, "col": 11, "module": "regex", "call": "compile", "pattern": "src/transformers/(models/.*)/modeling_.*\\.py", "test_string": "/home/user/readme.txt"} +{"file": "./utils/get_pr_run_slow_jobs.py", "line": 22, "col": 11, "module": "regex", "call": "compile", "pattern": "src/transformers/(quantizers/quantizer_.*)\\.py", "test_string": "/home/user/readme.txt"} +{"file": "./utils/get_pr_run_slow_jobs.py", "line": 25, "col": 11, "module": "regex", "call": "compile", "pattern": "tests/(models/.*)/test_.*\\.py", "test_string": "/home/user/readme.txt"} +{"file": "./utils/get_pr_run_slow_jobs.py", "line": 26, "col": 11, "module": "regex", "call": "compile", "pattern": "tests/(quantization/.*)/test_.*\\.py", "test_string": "/home/user/readme.txt"} +{"file": "./utils/get_pr_run_slow_jobs.py", "line": 29, "col": 11, "module": "regex", "call": "compile", "pattern": "src/transformers/(models/.*)/.*\\.py", "test_string": "/home/user/readme.txt"} +{"file": "./utils/notification_service_doc_tests.py", "line": 51, "col": 11, "module": "regex", "call": "search", "pattern": "_ \\[doctest\\]", "test_string": "Example likes [alpha]."} +{"file": "./utils/release.py", "line": 60, "col": 17, "module": "regex", "call": "compile", "pattern": "^check_min_version\\(\"[^\"]+\"\\)\\s*$", "test_string": "alpha alpha alpha(\"a\")x"} +{"file": "./utils/release.py", "line": 61, "col": 13, "module": "regex", "call": "compile", "pattern": "^__version__\\s+=\\s+\"([^\"]+)\"\\s*$", "test_string": "alpha x=x\"a\"x"} +{"file": "./utils/release.py", "line": 62, "col": 14, "module": "regex", "call": "compile", "pattern": "^(\\s*)version\\s*=\\s*\"[^\"]+\",", "test_string": "Example likes alpha=x\"a\",."} +{"file": "./utils/release.py", "line": 64, "col": 8, "module": "regex", "call": "compile", "pattern": "^# \"transformers(\\[.+\\])?.*$", "test_string": "# \"alpha[A]A"} +{"file": "./utils/release.py", "line": 68, "col": 8, "module": "regex", "call": "compile", "pattern": "^# \"transformers(\\[.+\\])?.*$", "test_string": "# \"alpha[A]A"} +{"file": "./utils/check_pipeline_typing.py", "line": 35, "col": 29, "module": "regex", "call": "search", "pattern": "# (.*)# ", "test_string": "/home/user/readme.txt"} +{"file": "./utils/check_pipeline_typing.py", "line": 39, "col": 25, "module": "regex", "call": "search", "pattern": "def pipeline(.*) -> Pipeline:", "test_string": "Example likes alpha alpha > alpha:."} +{"file": "./utils/check_docstrings.py", "line": 70, "col": 11, "module": "regex", "call": "compile", "pattern": "^\\s*(Args?|Arguments?|Attributes?|Params?|Parameters?):\\s*$", "test_string": "alpha:x"} +{"file": "./utils/check_docstrings.py", "line": 72, "col": 16, "module": "regex", "call": "compile", "pattern": "^(\\s*)(\\S+)\\s+\\((.+)\\)(?:\\:|$)", "test_string": "Example likes alpha(A):."} +{"file": "./utils/check_docstrings.py", "line": 74, "col": 24, "module": "regex", "call": "compile", "pattern": "\\*optional\\*, defaults to (.*)$", "test_string": "Example likes *alpha*, alpha to A."} +{"file": "./utils/check_docstrings.py", "line": 492, "col": 13, "module": "regex", "call": "search", "pattern": "^(\\s*)(?:\\S|$)", "test_string": "Example likes xx."} +{"file": "./utils/check_docstrings.py", "line": 998, "col": 24, "module": "regex", "call": "findall", "pattern": "[,(]\\s*(\\w+)\\s*(?=:|=|,|\\))", "test_string": "Example likes ,alpha:."} +{"file": "./utils/check_docstrings.py", "line": 1007, "col": 24, "module": "regex", "call": "findall", "pattern": "^ (\\w+)(?:\\s*:|\\s*=|\\s*$)", "test_string": "Example likes xx:."} +{"file": "./utils/check_docstrings.py", "line": 707, "col": 11, "module": "regex", "call": "search", "pattern": "^\\s*#\\s*no-format\\s*$", "test_string": "x#alpha alpha"} +{"file": "./utils/check_docstrings.py", "line": 1252, "col": 16, "module": "regex", "call": "findall", "pattern": "custom_args=(\\w+)", "test_string": "Example likes alpha alpha=x."} +{"file": "./utils/check_docstrings.py", "line": 711, "col": 13, "module": "regex", "call": "search", "pattern": "^\\s*#\\s*ignore-order\\s*$", "test_string": "x#alpha alpha"} +{"file": "./utils/check_docstrings.py", "line": 596, "col": 13, "module": "regex", "call": "search", "pattern": "defaults to `?None`?", "test_string": "Example likes alpha to `alpha`."} +{"file": "./utils/compare_test_runs.py", "line": 22, "col": 12, "module": "regex", "call": "match", "pattern": "^(SKIPPED|XFAIL|XPASS|EXPECTEDFAIL)\\s+\\[?\\d*\\]?\\s*(\\S+:\\d+)", "test_string": "Example likes alpha[x]xx:x."} +{"file": "./utils/compare_test_runs.py", "line": 29, "col": 15, "module": "regex", "call": "split", "pattern": "\\s+-\\s+", "test_string": "Example likes x x."} +{"file": "./utils/check_copies.py", "line": 531, "col": 19, "module": "regex", "call": "compile", "pattern": "^(\\s*)#\\s*Copied from\\s+transformers\\.(\\S+\\.\\S+)\\s*($|\\S.*$)", "test_string": "Example likes x#alpha alpha.x.alpha."} +{"file": "./utils/check_copies.py", "line": 532, "col": 33, "module": "regex", "call": "compile", "pattern": "^(\\s*)#\\s*Copied from\\s+tests\\.(\\S+\\.\\S+)\\s*($|\\S.*$)", "test_string": "Example likes x#alpha alpha.x.alpha."} +{"file": "./utils/check_copies.py", "line": 533, "col": 22, "module": "regex", "call": "compile", "pattern": "^\\s*(\\S+)->(\\S+)(\\s+.*|$)", "test_string": "Example likes xx >alpha."} +{"file": "./utils/check_copies.py", "line": 534, "col": 19, "module": "regex", "call": "compile", "pattern": "]*>", "test_string": "Example likes ."} +{"file": "./utils/check_copies.py", "line": 600, "col": 22, "module": "regex", "call": "compile", "pattern": "class\\s+([^\\(:]+)(?:\\(|:)", "test_string": "Example likes alpha((."} +{"file": "./utils/check_copies.py", "line": 601, "col": 21, "module": "regex", "call": "compile", "pattern": "def\\s+([^\\(]+)\\(", "test_string": "Example likes alpha(."} +{"file": "./utils/check_copies.py", "line": 931, "col": 23, "module": "regex", "call": "compile", "pattern": "\\*\\*\\[([^\\]]*)\\]\\(([^\\)]*)\\)\\*\\* \\(from ([^)]*)\\)[^\\[]*([^\\)]*\\)).*?by (.*?[A-Za-z\\*]{2,}?)\\. (.*)$", "test_string": "Example likes **[a](a)** (alpha a)aa)alpha alpha. A."} +{"file": "./utils/check_copies.py", "line": 935, "col": 29, "module": "regex", "call": "compile", "pattern": "\\*\\*\\[([^\\]]*)\\]\\(([^\\)]*)\\)\\*\\*", "test_string": "Example likes **[a](a)**."} +{"file": "./utils/check_copies.py", "line": 937, "col": 29, "module": "regex", "call": "compile", "pattern": " \\[([^\\]]*)\\]\\(([^\\)]*)\\)", "test_string": "Example likes [a](a)."} +{"file": "./utils/check_copies.py", "line": 172, "col": 11, "module": "regex", "call": "search", "pattern": "^\\s*\\)(\\s*->.*:|:)\\s*$", "test_string": "x)x >A:x"} +{"file": "./utils/check_copies.py", "line": 552, "col": 15, "module": "regex", "call": "search", "pattern": "^(\\s*)\\S", "test_string": "Example likes xx."} +{"file": "./utils/check_copies.py", "line": 950, "col": 18, "module": "regex", "call": "search", "pattern": "\\*\\*\\[([^\\]]*)", "test_string": "Example likes **[a."} +{"file": "./utils/check_copies.py", "line": 944, "col": 16, "module": "regex", "call": "search", "pattern": "\\*\\*\\[([^\\]]*)", "test_string": "Example likes **[a."} +{"file": "./utils/custom_init_isort.py", "line": 48, "col": 13, "module": "regex", "call": "compile", "pattern": "^(\\s*)\\S", "test_string": "Example likes xx."} +{"file": "./utils/custom_init_isort.py", "line": 50, "col": 17, "module": "regex", "call": "compile", "pattern": "^\\s*\"([^\"]+)\":", "test_string": "Example likes x\"a\":."} +{"file": "./utils/custom_init_isort.py", "line": 52, "col": 19, "module": "regex", "call": "compile", "pattern": "^\\s*_import_structure\\[\"([^\"]+)\"\\]", "test_string": "Example likes x alpha alpha[\"a\"]."} +{"file": "./utils/custom_init_isort.py", "line": 54, "col": 17, "module": "regex", "call": "compile", "pattern": "^\\s*\"([^\"]+)\",\\s*$", "test_string": "x\"a\",x"} +{"file": "./utils/custom_init_isort.py", "line": 56, "col": 22, "module": "regex", "call": "compile", "pattern": "\\[([^\\]]+)\\]", "test_string": "Example likes [a]."} +{"file": "./utils/add_pipeline_model_mapping_to_test.py", "line": 206, "col": 8, "module": "regex", "call": "compile", "pattern": "\\s(is_\\S+?_available\\(\\))\\s", "test_string": "Example likes alpha x alpha()x."} +{"file": "./utils/tests_fetcher.py", "line": 541, "col": 35, "module": "regex", "call": "compile", "pattern": "(?:^|\\n)\\s*from\\s+(\\.+\\S+)\\s+import\\s+([^\\n]+)(?=\\n)", "test_string": "Example likes alpha.alpha."} +{"file": "./utils/tests_fetcher.py", "line": 545, "col": 34, "module": "regex", "call": "compile", "pattern": "(?:^|\\n)\\s*from\\s+(\\.+\\S+)\\s+import\\s+\\(([^\\)]+)\\)", "test_string": "Example likes alpha.alpha(a)."} +{"file": "./utils/tests_fetcher.py", "line": 551, "col": 33, "module": "regex", "call": "compile", "pattern": "(?:^|\\n)\\s*from\\s+transformers(\\S*)\\s+import\\s+([^\\n]+)(?=\\n)", "test_string": "Example likes alpha."} +{"file": "./utils/tests_fetcher.py", "line": 555, "col": 32, "module": "regex", "call": "compile", "pattern": "(?:^|\\n)\\s*from\\s+transformers(\\S*)\\s+import\\s+\\(([^\\)]+)\\)", "test_string": "Example likes alpha(a)."} +{"file": "./utils/tests_fetcher.py", "line": 1082, "col": 21, "module": "regex", "call": "search", "pattern": "\\[([^\\]]*)\\]", "test_string": "Example likes [a]."} +{"file": "./utils/check_repo.py", "line": 869, "col": 16, "module": "regex", "call": "compile", "pattern": "^\\s*@(\\S+)\\s+$", "test_string": "alice@example.com"} +{"file": "./utils/check_repo.py", "line": 618, "col": 17, "module": "regex", "call": "findall", "pattern": "all_model_classes\\s+=\\s+\\(\\s*\\(([^\\)]*)\\)", "test_string": "Example likes alpha alpha alpha=x(x(a)."} +{"file": "./utils/check_repo.py", "line": 620, "col": 18, "module": "regex", "call": "findall", "pattern": "all_model_classes\\s+=\\s+\\(([^\\)]*)\\)", "test_string": "Example likes alpha alpha alpha=x(a)."} +{"file": "./utils/check_repo.py", "line": 631, "col": 27, "module": "regex", "call": "findall", "pattern": "base_model_class\\s+=.*", "test_string": "Example likes alpha alpha alpha=A."} +{"file": "./utils/check_repo.py", "line": 928, "col": 23, "module": "regex", "call": "findall", "pattern": "\\[\\[autodoc\\]\\]\\s+(\\S+)\\s+", "test_string": "Example likes [[alpha]]alpha."} +{"file": "./utils/check_repo.py", "line": 937, "col": 46, "module": "regex", "call": "findall", "pattern": "(?<=-\\s).*", "test_string": "Example likes A."} +{"file": "./utils/notification_service.py", "line": 1070, "col": 19, "module": "regex", "call": "compile", "pattern": "\\(#(\\d+)\\)$", "test_string": "Example likes (#x)."} +{"file": "./utils/pr_slow_ci_models.py", "line": 100, "col": 10, "module": "regex", "call": "compile", "pattern": "src/transformers/models/(.*)/modeling_.*\\.py", "test_string": "/home/user/readme.txt"} +{"file": "./benchmark/benchmark.py", "line": 82, "col": 17, "module": "regex", "call": "search", "pattern": "/commit=([^/]+)", "test_string": "/home/user/readme.txt"} +{"file": "./src/transformers/dynamic_module_utils.py", "line": 139, "col": 23, "module": "regex", "call": "findall", "pattern": "^\\s*import\\s+\\.(\\S+)\\s*$", "test_string": "alpha.xx"} +{"file": "./src/transformers/dynamic_module_utils.py", "line": 141, "col": 24, "module": "regex", "call": "findall", "pattern": "^\\s*from\\s+\\.(\\S+)\\s+import", "test_string": "Example likes alpha.alpha."} +{"file": "./src/transformers/modeling_utils.py", "line": 882, "col": 21, "module": "regex", "call": "findall", "pattern": ".(\\d+).", "test_string": "Example likes alpha."} +{"file": "./src/transformers/modeling_utils.py", "line": 890, "col": 15, "module": "regex", "call": "sub", "pattern": ".(\\d+).", "test_string": "Example likes alpha."} +{"file": "./src/transformers/modeling_utils.py", "line": 2624, "col": 11, "module": "regex", "call": "search", "pattern": "class \\w+Attention\\(nn.Module\\)", "test_string": "Example likes alpha alpha(alpha)."} +{"file": "./src/transformers/modeling_utils.py", "line": 3970, "col": 18, "module": "regex", "call": "compile", "pattern": "(.*?)-\\d{5}-of-\\d{5}", "test_string": "Example likes A alpha of alpha."} +{"file": "./src/transformers/modeling_utils.py", "line": 5789, "col": 27, "module": "regex", "call": "sub", "pattern": "\\.\\d+\\.", "test_string": "Example likes .x.."} +{"file": "./src/transformers/modeling_utils.py", "line": 3844, "col": 34, "module": "regex", "call": "sub", "pattern": "\\(.*\\)", "test_string": "Example likes (A)."} +{"file": "./src/transformers/modeling_utils.py", "line": 884, "col": 28, "module": "regex", "call": "sub", "pattern": ".(\\d+).", "test_string": "Example likes alpha."} +{"file": "./src/transformers/modeling_utils.py", "line": 885, "col": 25, "module": "regex", "call": "sub", "pattern": ".(\\d+).", "test_string": "Example likes alpha."} +{"file": "./src/transformers/modeling_utils.py", "line": 886, "col": 21, "module": "regex", "call": "sub", "pattern": ".(\\d+).", "test_string": "Example likes alpha."} +{"file": "./src/transformers/tokenization_utils_base.py", "line": 150, "col": 21, "module": "regex", "call": "compile", "pattern": "tokenizer\\.(.*)\\.json", "test_string": "Example likes alpha.A.alpha."} +{"file": "./src/transformers/model_debugging_utils.py", "line": 52, "col": 23, "module": "regex", "call": "compile", "pattern": "object at 0x[0-9A-Fa-f]+", "test_string": "Example likes alpha at 0xC."} +{"file": "./src/transformers/model_debugging_utils.py", "line": 188, "col": 18, "module": "regex", "call": "compile", "pattern": "(.*)\\.(\\d+)$", "test_string": "Example likes A.x."} +{"file": "./src/transformers/testing_utils.py", "line": 1570, "col": 11, "module": "regex", "call": "sub", "pattern": "^.*\\r", "test_string": "Example likes A."} +{"file": "./src/transformers/testing_utils.py", "line": 2435, "col": 13, "module": "regex", "call": "sub", "pattern": "^gw", "test_string": "Example likes gw."} +{"file": "./src/transformers/testing_utils.py", "line": 2287, "col": 23, "module": "regex", "call": "sub", "pattern": ".*_ _ _ (_ ){10,}_ _ ", "test_string": "Example likes A."} +{"file": "./src/transformers/testing_utils.py", "line": 2802, "col": 28, "module": "regex", "call": "sub", "pattern": "(>>> .*load_dataset\\(.*)", "test_string": "Example likes >>> alpha alpha(A."} +{"file": "./src/transformers/testing_utils.py", "line": 2805, "col": 16, "module": "regex", "call": "search", "pattern": "cuda|to\\(0\\)|device=0", "test_string": "Example likes alpha=0."} +{"file": "./src/transformers/modeling_gguf_pytorch_utils.py", "line": 338, "col": 22, "module": "regex", "call": "sub", "pattern": "mlp.experts.\\d+.", "test_string": "Example likes alpha."} +{"file": "./src/transformers/trainer_utils.py", "line": 199, "col": 17, "module": "regex", "call": "compile", "pattern": "^\" + PREFIX_CHECKPOINT_DIR + r\"\\-(\\d+)$", "test_string": "\" alpha alpha alpha r\" x"} +{"file": "./src/transformers/models/bertweet/tokenization_bertweet.py", "line": 589, "col": 10, "module": "regex", "call": "compile", "pattern": "([^a-zA-Z0-9])\\1{3,}", "test_string": "Example likes alpha."} +{"file": "./src/transformers/models/bertweet/tokenization_bertweet.py", "line": 596, "col": 9, "module": "regex", "call": "compile", "pattern": "&(#?(x?))([^&;\\s]+);", "test_string": "Example likes &#x&;."} +{"file": "./src/transformers/models/bertweet/tokenization_bertweet.py", "line": 737, "col": 14, "module": "regex", "call": "compile", "pattern": "(.)\\1{2,}", "test_string": "Example likes alpha."} +{"file": "./src/transformers/models/bertweet/tokenization_bertweet.py", "line": 745, "col": 14, "module": "regex", "call": "compile", "pattern": "(?.+?", "test_string": "/home/user/readme.txt"} +{"file": "./src/transformers/models/kosmos2/processing_kosmos2.py", "line": 616, "col": 16, "module": "regex", "call": "search", "pattern": "", "test_string": "Example likes ."} +{"file": "./src/transformers/models/kosmos2/processing_kosmos2.py", "line": 617, "col": 16, "module": "regex", "call": "search", "pattern": "", "test_string": "Example likes ."} +{"file": "./src/transformers/models/roberta/tokenization_roberta.py", "line": 201, "col": 19, "module": "regex", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} +{"file": "./src/transformers/models/mm_grounding_dino/convert_mm_grounding_dino_to_hf.py", "line": 341, "col": 24, "module": "regex", "call": "match", "pattern": "neck.extra_convs.(\\d+).gn.(weight|bias)", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/mm_grounding_dino/convert_mm_grounding_dino_to_hf.py", "line": 346, "col": 24, "module": "regex", "call": "match", "pattern": "neck.extra_convs.(\\d+).conv.(weight|bias)", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/mm_grounding_dino/convert_mm_grounding_dino_to_hf.py", "line": 336, "col": 24, "module": "regex", "call": "match", "pattern": "backbone.norm(\\d+).(weight|bias)", "test_string": "Example likes alpha."} +{"file": "./src/transformers/models/mm_grounding_dino/convert_mm_grounding_dino_to_hf.py", "line": 364, "col": 20, "module": "regex", "call": "match", "pattern": "bbox_head.(cls|reg)_branches.(\\d+).(.*)", "test_string": "Example likes alpha alpha alpha."} +{"file": "./src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py", "line": 402, "col": 33, "module": "regex", "call": "compile", "pattern": "(https?|ftp)(:\\/\\/[-_\\.!~*\\'()a-zA-Z0-9;\\/?:\\@&=\\+$,%#]+)", "test_string": "alice@example.com"} +{"file": "./src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py", "line": 403, "col": 33, "module": "regex", "call": "compile", "pattern": "[A-Za-z0-9\\._+]*@[\\-_0-9A-Za-z]+(\\.[A-Za-z]+)*", "test_string": "alice@example.com"} +{"file": "./src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py", "line": 404, "col": 33, "module": "regex", "call": "compile", "pattern": "[\\(]{0,1}[0-9]{2,4}[\\)\\-\\(]{0,1}[0-9]{2,4}[\\)\\-]{0,1}[0-9]{3,4}", "test_string": "Example-likes-(44-44-444."} +{"file": "./src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py", "line": 405, "col": 33, "module": "regex", "call": "compile", "pattern": "([12]\\d{3}[/\\-年])*(0?[1-9]|1[0-2])[/\\-月]((0?[1-9]|[12][0-9]|3[01])日?)*(\\d{1,2}|:|\\d{1,2}時|\\d{1,2}分|\\(日\\)|\\(月\\)|\\(火\\)|\\(水\\)|\\(木\\)|\\(金\\)|\\(土\\)|㈰|㈪|㈫|㈬|㈭|㈮|㈯)*", "test_string": "/home/user/readme.txt"} +{"file": "./src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py", "line": 408, "col": 33, "module": "regex", "call": "compile", "pattern": "(明治|大正|昭和|平成|令和|㍾|㍽|㍼|㍻|\\u32ff)\\d{1,2}年(0?[1-9]|1[0-2])月(0?[1-9]|[12][0-9]|3[01])日(\\d{1,2}|:|\\d{1,2}時|\\d{1,2}分|\\(日\\)|\\(月\\)|\\(火\\)|\\(水\\)|\\(木\\)|\\(金\\)|\\(土\\)|㈰|㈪|㈫|㈬|㈭|㈮|㈯)*", "test_string": "Example likes 明治x年05月05日x時."} +{"file": "./src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py", "line": 415, "col": 37, "module": "regex", "call": "compile", "pattern": "(?:\\d,\\d{3}|[\\d億])*+\"\n r\"(?:\\d,\\d{3}|[\\d万])*+\"\n r\"(?:\\d,\\d{3}|[\\d千])*+\"\n r\"(?:千円|万円|千万円|円|千ドル|万ドル|千万ドル|ドル|千ユーロ|万ユーロ|千万ユーロ|ユーロ)+\"\n r\"(?:\\(税込\\)|\\(税抜\\)|\\+tax)*", "test_string": "Example likes a\" r\"a\" r\"a\" r\"千万ユーロ\" r\"+alpha."} +{"file": "./src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py", "line": 423, "col": 37, "module": "regex", "call": "compile", "pattern": "(?:\\d,\\d{3}|[\\d億万千])*\"\n r\"(?:千円|万円|千万円|円|千ドル|万ドル|千万ドル|ドル|千ユーロ|万ユーロ|千万ユーロ|ユーロ)+\"\n r\"(?:\\(税込\\)|\\(税抜\\)|\\+tax)*", "test_string": "Example likes x,alpha\" r\"千万ユーロ\" r\"+alpha."} +{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 100, "col": 31, "module": "regex", "call": "compile", "pattern": "encoders.(\\d*).level_blocks.(\\d*).model.(\\d*).(\\d).(bias|weight)", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 101, "col": 30, "module": "regex", "call": "compile", "pattern": "encoders.(\\d*).level_blocks.(\\d*).model.(\\d*).(\\d).model.(\\d*).model.(\\d*).(bias|weight)", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 104, "col": 32, "module": "regex", "call": "compile", "pattern": "encoders.(\\d*).level_blocks.(\\d*).model.(\\d*).(bias|weight)", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 106, "col": 32, "module": "regex", "call": "compile", "pattern": "decoders.(\\d*).level_blocks.(\\d*).model.(\\d*).(\\d).(bias|weight)", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 107, "col": 30, "module": "regex", "call": "compile", "pattern": "decoders.(\\d*).level_blocks.(\\d*).model.(\\d*).(\\d).model.(\\d*).model.(\\d*).(bias|weight)", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 110, "col": 31, "module": "regex", "call": "compile", "pattern": "decoders.(\\d*).level_blocks.(\\d*).model.(\\d*).(bias|weight)", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 112, "col": 29, "module": "regex", "call": "compile", "pattern": "conditioner_blocks.(\\d*).cond.model.(\\d*).(\\d).(bias|weight)", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 113, "col": 27, "module": "regex", "call": "compile", "pattern": "conditioner_blocks.(\\d*).cond.model.(\\d*).(\\d).model.(\\d*).model.(\\d*).(bias|weight)", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/deprecated/jukebox/convert_jukebox.py", "line": 116, "col": 28, "module": "regex", "call": "compile", "pattern": "conditioner_blocks.(\\d*).cond.model.(\\d*).(bias|weight)", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/deprecated/jukebox/tokenization_jukebox.py", "line": 257, "col": 18, "module": "regex", "call": "compile", "pattern": "_+", "test_string": "Example likes ."} +{"file": "./src/transformers/models/deprecated/jukebox/tokenization_jukebox.py", "line": 214, "col": 32, "module": "regex", "call": "compile", "pattern": "[^A-Za-z0-9.,:;!?\\-'\\\"()\\[\\] \\t\\n]+", "test_string": "Example likes M."} +{"file": "./src/transformers/models/deprecated/jukebox/tokenization_jukebox.py", "line": 223, "col": 32, "module": "regex", "call": "compile", "pattern": "[^A-Za-z0-9.,:;!?\\-+'\\\"()\\[\\] \\t\\n]+", "test_string": "Example likes M."} +{"file": "./src/transformers/models/deprecated/tapex/tokenization_tapex.py", "line": 293, "col": 19, "module": "regex", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} +{"file": "./src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py", "line": 53, "col": 33, "module": "regex", "call": "search", "pattern": "\\d\\.\\d", "test_string": "Example likes x.x."} +{"file": "./src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py", "line": 81, "col": 35, "module": "regex", "call": "search", "pattern": ".\\d.", "test_string": "Example likes alpha."} +{"file": "./src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py", "line": 56, "col": 20, "module": "regex", "call": "search", "pattern": "\\d\\.\\d\\d.", "test_string": "Example likes x.alpha."} +{"file": "./src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py", "line": 58, "col": 20, "module": "regex", "call": "search", "pattern": "\\d\\.\\d.", "test_string": "Example likes x.xA."} +{"file": "./src/transformers/models/mistral/convert_mistral_weights_to_hf.py", "line": 209, "col": 60, "module": "regex", "call": "match", "pattern": "consolidated.\\d+.pth", "test_string": "Example likes alpha."} +{"file": "./src/transformers/models/siglip/tokenization_siglip.py", "line": 291, "col": 15, "module": "regex", "call": "sub", "pattern": "\\s+", "test_string": "Example likes x."} +{"file": "./src/transformers/models/llama4/convert_llama4_weights_to_hf.py", "line": 435, "col": 17, "module": "regex", "call": "search", "pattern": "(gate|up)_proj", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/llama4/convert_llama4_weights_to_hf.py", "line": 437, "col": 27, "module": "regex", "call": "sub", "pattern": "(gate|up)_proj", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/llama4/convert_llama4_weights_to_hf.py", "line": 438, "col": 25, "module": "regex", "call": "sub", "pattern": "(gate|up)_proj", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/deepseek_vl/convert_deepseek_vl_weights_to_hf.py", "line": 141, "col": 18, "module": "regex", "call": "search", "pattern": "(\\(.*?\\))", "test_string": "Example likes (A)."} +{"file": "./src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py", "line": 74, "col": 15, "module": "regex", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} +{"file": "./src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py", "line": 75, "col": 30, "module": "regex", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} +{"file": "./src/transformers/models/marian/convert_marian_tatoeba_to_pytorch.py", "line": 126, "col": 29, "module": "regex", "call": "search", "pattern": "tuned4[^-]+", "test_string": "Example likes alpha4a."} +{"file": "./src/transformers/models/marian/convert_marian_tatoeba_to_pytorch.py", "line": 311, "col": 57, "module": "regex", "call": "search", "pattern": "\\d\\d\\d\\d-\\d\\d?-\\d\\d?", "test_string": "Example likes alpha xx xx."} +{"file": "./src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py", "line": 196, "col": 11, "module": "regex", "call": "match", "pattern": "(\\S+)\\.attention\\.self\\.(key|value|query)\\.(bias|weight)", "test_string": "Example likes x.alpha.alpha.alpha.alpha."} +{"file": "./src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py", "line": 196, "col": 91, "module": "regex", "call": "match", "pattern": "(\\S+)\\.attention\\.output\\.dense\\.weight", "test_string": "Example likes x.alpha.alpha.alpha.alpha."} +{"file": "./src/transformers/models/ctrl/tokenization_ctrl.py", "line": 196, "col": 16, "module": "regex", "call": "findall", "pattern": "\\S+\\n?", "test_string": "Example likes x."} +{"file": "./src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py", "line": 153, "col": 15, "module": "regex", "call": "compile", "pattern": "layers\\.(\\d+)\\.([a-z0-9_.]+)\\.([a-z]+)", "test_string": "Example likes alpha.x.m.m."} +{"file": "./src/transformers/models/mllama/convert_mllama_weights_to_hf.py", "line": 369, "col": 22, "module": "regex", "call": "sub", "pattern": "layers.(\\d+).", "test_string": "Example likes alpha."} +{"file": "./src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py", "line": 60, "col": 15, "module": "regex", "call": "sub", "pattern": "blocks\\.(\\d+)\\.att", "test_string": "Example likes alpha.x.alpha."} +{"file": "./src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py", "line": 62, "col": 15, "module": "regex", "call": "sub", "pattern": "blocks\\.(\\d+)\\.ffn", "test_string": "Example likes alpha.x.alpha."} +{"file": "./src/transformers/models/glm4v/convert_glm4v_mgt_weights_to_hf.py", "line": 254, "col": 20, "module": "regex", "call": "match", "pattern": "mp_rank_(\\d{2})", "test_string": "Example likes mp alpha xx."} +{"file": "./src/transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py", "line": 103, "col": 15, "module": "regex", "call": "sub", "pattern": "[\\(\\)\\[\\]\\<\\>\\\"]+", "test_string": "Example likes (."} +{"file": "./src/transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py", "line": 106, "col": 15, "module": "regex", "call": "sub", "pattern": "\\s+", "test_string": "Example likes x."} +{"file": "./src/transformers/models/timesfm/convert_timesfm_orignal_to_hf.py", "line": 27, "col": 16, "module": "regex", "call": "match", "pattern": "(.*)\\[(\\d+)\\]", "test_string": "Example likes A[x]."} +{"file": "./src/transformers/models/mobilenet_v1/convert_original_tf_checkpoint_to_pytorch.py", "line": 148, "col": 14, "module": "regex", "call": "match", "pattern": "^mobilenet_v1_([^_]*)_([^_]*)$", "test_string": "alpha v1 a a"} +{"file": "./src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py", "line": 77, "col": 19, "module": "regex", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} +{"file": "./src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py", "line": 78, "col": 34, "module": "regex", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 71, "col": 11, "module": "regex", "call": "sub", "pattern": "^\\(([\\d.]+[a-zA-Z]?)\\) \\\\\\[(.+?)\\\\\\]$", "test_string": "(.m) \\[A\\]"} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 73, "col": 11, "module": "regex", "call": "sub", "pattern": "^\\\\\\[(.+?)\\\\\\] \\(([\\d.]+[a-zA-Z]?)\\)$", "test_string": "\\[A\\] (.m)"} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 75, "col": 11, "module": "regex", "call": "sub", "pattern": "^\\\\\\[(.+?)\\\\\\] \\(([\\d.]+[a-zA-Z]?)\\) (\\\\\\[.+?\\\\\\])$", "test_string": "\\[A\\] (.m) \\[A\\]"} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 85, "col": 11, "module": "regex", "call": "sub", "pattern": "\\\\mbox{ ?\\\\boldmath\\$(.*?)\\$}", "test_string": "Example likes \\alpha{ \\alpha$A$}."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 87, "col": 11, "module": "regex", "call": "sub", "pattern": "((?:http|ftp|https):\\/\\/(?:[\\w_-]+(?:(?:\\.[\\w_-]+)+))(?:[\\w.,@?^=%&:\\/~+#-]*[\\w@?^=%&\\/~+#-]))", "test_string": "alice@example.com"} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 93, "col": 11, "module": "regex", "call": "sub", "pattern": "```\\s*(.+?)\\s*```", "test_string": "Example likes ```alpha```."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 119, "col": 16, "module": "regex", "call": "search", "pattern": ". ([-*]) ", "test_string": "Example likes A."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 446, "col": 15, "module": "regex", "call": "sub", "pattern": "## References\\n+\\[MISSING_PAGE_POST(:\\d+)?\\]", "test_string": "Example likes ## alpha\n[alpha alpha alpha:x]."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 480, "col": 21, "module": "regex", "call": "sub", "pattern": "(^.+)\\\\begin{tab", "test_string": "Example likes A\\alpha{alpha."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 500, "col": 21, "module": "regex", "call": "sub", "pattern": "(?:\\n|^)#+ \\d*\\W? ?(.{100,})", "test_string": "Example likes # xx alpha."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 508, "col": 21, "module": "regex", "call": "sub", "pattern": "^#+ (?:[\\d+\\.]+|[ixv\\.]+)?\\s*(?:$|\\n\\s*)", "test_string": "Example likes # ix\nx."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 519, "col": 21, "module": "regex", "call": "sub", "pattern": "^\\* \\[\\d+\\](\\s?[A-W]\\.+\\s?){10,}.*$", "test_string": "* [x]xL.alpha.alpha.alpha.alpha.xA"} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 521, "col": 21, "module": "regex", "call": "sub", "pattern": "^(\\* \\[\\d+\\])\\[\\](.*)$", "test_string": "* [x][]A"} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 523, "col": 21, "module": "regex", "call": "sub", "pattern": "(^\\w\\n\\n|\\n\\n\\w$)", "test_string": "Example likes x."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 525, "col": 21, "module": "regex", "call": "sub", "pattern": "([\\s.,()])_([a-zA-Z0-9])__([a-zA-Z0-9]){1,3}_([\\s.,:()])", "test_string": "Example likes . m m .."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 530, "col": 21, "module": "regex", "call": "sub", "pattern": "([\\s.,\\d])_([a-zA-Z0-9])_([\\s.,\\d;])", "test_string": "Example likes . m .."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 532, "col": 21, "module": "regex", "call": "sub", "pattern": "(\\nFootnote .*?:) (?:footnotetext|thanks):\\W*(.*(?:\\n\\n|$))", "test_string": "Example likes alpha A: alpha:xA."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 538, "col": 21, "module": "regex", "call": "sub", "pattern": "\\[FOOTNOTE:.+?\\](.*?)\\[ENDFOOTNOTE\\]", "test_string": "Example likes [alpha:A]A[alpha]."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 544, "col": 11, "module": "regex", "call": "match", "pattern": "[A-Z0-9,;:]$", "test_string": "Example likes M."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 565, "col": 21, "module": "regex", "call": "sub", "pattern": "\\\\begin{tabular}{([clr ]){2,}}\\s*[& ]*\\s*(\\\\\\\\)? \\\\end{tabular}", "test_string": "Example likes \\alpha{alpha}{cc}x&x\\\\ \\alpha{alpha}."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 571, "col": 21, "module": "regex", "call": "sub", "pattern": "(\\*\\*S\\. A\\. B\\.\\*\\*\\n+){2,}", "test_string": "Example likes **S. A. B.**\n**S. A. B.**."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 573, "col": 21, "module": "regex", "call": "sub", "pattern": "^#+( [\\[\\d\\w])?$", "test_string": "# ["} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 575, "col": 21, "module": "regex", "call": "sub", "pattern": "^\\.\\s*$", "test_string": ".x"} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 577, "col": 21, "module": "regex", "call": "sub", "pattern": "\\n{3,}", "test_string": "Example likes ."} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 134, "col": 15, "module": "regex", "call": "match", "pattern": "^[\\dixv]+((?:\\.[\\dixv])?)+$", "test_string": "i.i"} +{"file": "./src/transformers/models/nougat/tokenization_nougat_fast.py", "line": 237, "col": 15, "module": "regex", "call": "sub", "pattern": "(?:[\\d_]|\\*\\*)", "test_string": "Example likes ."} +{"file": "./src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py", "line": 74, "col": 15, "module": "regex", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} +{"file": "./src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py", "line": 75, "col": 30, "module": "regex", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} +{"file": "./src/transformers/models/t5/tokenization_t5_fast.py", "line": 226, "col": 38, "module": "regex", "call": "search", "pattern": "", "test_string": "Example likes ."} +{"file": "./src/transformers/models/t5/tokenization_t5.py", "line": 278, "col": 38, "module": "regex", "call": "search", "pattern": "", "test_string": "Example likes ."} +{"file": "./src/transformers/models/blenderbot/tokenization_blenderbot.py", "line": 207, "col": 19, "module": "regex", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} +{"file": "./src/transformers/models/speecht5/number_normalizer.py", "line": 186, "col": 15, "module": "regex", "call": "sub", "pattern": "(\\d+,\\d+)", "test_string": "Example likes x,x."} +{"file": "./src/transformers/models/deepseek_vl_hybrid/convert_deepseek_vl_hybrid_weights_to_hf.py", "line": 168, "col": 18, "module": "regex", "call": "search", "pattern": "(\\(.*?\\))", "test_string": "Example likes (A)."} +{"file": "./src/transformers/models/mvp/tokenization_mvp.py", "line": 193, "col": 19, "module": "regex", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} +{"file": "./src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py", "line": 74, "col": 15, "module": "regex", "call": "fullmatch", "pattern": "[A-Za-z]+\\d+", "test_string": "Example likes Mx."} +{"file": "./src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py", "line": 75, "col": 30, "module": "regex", "call": "split", "pattern": "(\\d+)", "test_string": "Example likes x."} +{"file": "./src/transformers/models/auto/configuration_auto.py", "line": 1190, "col": 33, "module": "regex", "call": "search", "pattern": "^(\\s*)List options\\s*$", "test_string": "alpha alpha"} +{"file": "./src/transformers/models/auto/configuration_auto.py", "line": 1193, "col": 21, "module": "regex", "call": "search", "pattern": "^(\\s*)List options\\s*$", "test_string": "alpha alpha"} +{"file": "./src/transformers/models/codegen/tokenization_codegen.py", "line": 177, "col": 19, "module": "regex", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} +{"file": "./src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py", "line": 113, "col": 16, "module": "regex", "call": "sub", "pattern": "\\s{2,}", "test_string": "Example likes xx."} +{"file": "./src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py", "line": 172, "col": 16, "module": "regex", "call": "findall", "pattern": "\\S+\\n?", "test_string": "Example likes x."} +{"file": "./src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py", "line": 77, "col": 15, "module": "regex", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} +{"file": "./src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py", "line": 78, "col": 30, "module": "regex", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} +{"file": "./src/transformers/models/myt5/convert_myt5_original_tf_checkpoint_to_pytorch.py", "line": 74, "col": 15, "module": "regex", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} +{"file": "./src/transformers/models/myt5/convert_myt5_original_tf_checkpoint_to_pytorch.py", "line": 75, "col": 30, "module": "regex", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} +{"file": "./src/transformers/models/flaubert/tokenization_flaubert.py", "line": 71, "col": 11, "module": "regex", "call": "sub", "pattern": "。\\s*", "test_string": "Example likes 。x."} +{"file": "./src/transformers/models/flaubert/tokenization_flaubert.py", "line": 96, "col": 11, "module": "regex", "call": "sub", "pattern": ".\\s*", "test_string": "Example likes .x."} +{"file": "./src/transformers/models/big_bird/tokenization_big_bird.py", "line": 218, "col": 19, "module": "regex", "call": "sub", "pattern": " (\\[(MASK|SEP)\\])", "test_string": "Example likes [alpha]."} +{"file": "./src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py", "line": 136, "col": 15, "module": "regex", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} +{"file": "./src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py", "line": 137, "col": 30, "module": "regex", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} +{"file": "./src/transformers/models/led/tokenization_led.py", "line": 199, "col": 19, "module": "regex", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} +{"file": "./src/transformers/models/pix2struct/convert_pix2struct_original_pytorch_to_hf.py", "line": 85, "col": 26, "module": "regex", "call": "sub", "pattern": "layers_(\\d+)", "test_string": "Example likes alpha x."} +{"file": "./src/transformers/models/pix2struct/convert_pix2struct_original_pytorch_to_hf.py", "line": 90, "col": 26, "module": "regex", "call": "sub", "pattern": "layers_(\\d+)", "test_string": "Example likes alpha x."} +{"file": "./src/transformers/models/canine/convert_canine_original_tf_checkpoint_to_pytorch.py", "line": 93, "col": 16, "module": "regex", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} +{"file": "./src/transformers/models/canine/convert_canine_original_tf_checkpoint_to_pytorch.py", "line": 94, "col": 30, "module": "regex", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 57, "col": 11, "module": "regex", "call": "sub", "pattern": "layers\\.functional(?:_(\\d+))?\\.layers", "test_string": "Example likes alpha.alpha x.alpha."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 69, "col": 11, "module": "regex", "call": "sub", "pattern": "layers\\.sequential\\.layers\\.conv1d\\.", "test_string": "Example likes alpha.alpha.alpha.alpha1d.."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 70, "col": 11, "module": "regex", "call": "sub", "pattern": "layers\\.sequential\\.layers\\.conv1d_1\\.", "test_string": "Example likes alpha.alpha.alpha.alpha1d 1.."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 71, "col": 11, "module": "regex", "call": "sub", "pattern": "layers\\.sequential\\.layers\\.conv1d_2\\.", "test_string": "Example likes alpha.alpha.alpha.alpha1d 2.."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 72, "col": 11, "module": "regex", "call": "sub", "pattern": "layers\\.sequential\\.layers\\.group_normalization\\.", "test_string": "Example likes alpha.alpha.alpha.alpha alpha.."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 73, "col": 11, "module": "regex", "call": "sub", "pattern": "mha_with_rope\\.key_dense", "test_string": "Example likes alpha alpha alpha.alpha alpha."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 74, "col": 11, "module": "regex", "call": "sub", "pattern": "mha_with_rope\\.query_dense", "test_string": "Example likes alpha alpha alpha.alpha alpha."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 75, "col": 11, "module": "regex", "call": "sub", "pattern": "mha_with_rope\\.value_dense", "test_string": "Example likes alpha alpha alpha.alpha alpha."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 76, "col": 11, "module": "regex", "call": "sub", "pattern": "mha_with_rope\\.output_dense", "test_string": "Example likes alpha alpha alpha.alpha alpha."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 77, "col": 11, "module": "regex", "call": "sub", "pattern": "mha_precomputed_kv\\.key_dense", "test_string": "Example likes alpha alpha kv.alpha alpha."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 78, "col": 11, "module": "regex", "call": "sub", "pattern": "mha_precomputed_kv\\.query_dense", "test_string": "Example likes alpha alpha kv.alpha alpha."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 79, "col": 11, "module": "regex", "call": "sub", "pattern": "mha_precomputed_kv\\.value_dense", "test_string": "Example likes alpha alpha kv.alpha alpha."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 80, "col": 11, "module": "regex", "call": "sub", "pattern": "mha_precomputed_kv\\.output_dense", "test_string": "Example likes alpha alpha kv.alpha alpha."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 81, "col": 11, "module": "regex", "call": "sub", "pattern": "mha_causal_with_rope\\.key_dense", "test_string": "Example likes alpha alpha alpha alpha.alpha alpha."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 82, "col": 11, "module": "regex", "call": "sub", "pattern": "mha_causal_with_rope\\.query_dense", "test_string": "Example likes alpha alpha alpha alpha.alpha alpha."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 83, "col": 11, "module": "regex", "call": "sub", "pattern": "mha_causal_with_rope\\.value_dense", "test_string": "Example likes alpha alpha alpha alpha.alpha alpha."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 84, "col": 11, "module": "regex", "call": "sub", "pattern": "mha_causal_with_rope\\.output_dense", "test_string": "Example likes alpha alpha alpha alpha.alpha alpha."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 85, "col": 11, "module": "regex", "call": "sub", "pattern": "layer_normalization\\.", "test_string": "Example likes alpha alpha.."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 86, "col": 11, "module": "regex", "call": "sub", "pattern": "layer_normalization_1\\.", "test_string": "Example likes alpha alpha 1.."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 87, "col": 11, "module": "regex", "call": "sub", "pattern": "layer_normalization_2\\.", "test_string": "Example likes alpha alpha 2.."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 88, "col": 11, "module": "regex", "call": "sub", "pattern": "vars\\.0", "test_string": "Example likes alpha.0."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 89, "col": 11, "module": "regex", "call": "sub", "pattern": "vars\\.1", "test_string": "Example likes alpha.1."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 90, "col": 11, "module": "regex", "call": "sub", "pattern": "layers\\.reversible_embedding", "test_string": "Example likes alpha.alpha alpha."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 64, "col": 15, "module": "regex", "call": "sub", "pattern": "functional\\.layers\\.dense\\.", "test_string": "Example likes alpha.alpha.alpha.."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 65, "col": 15, "module": "regex", "call": "sub", "pattern": "functional\\.layers\\.dense_1\\.", "test_string": "Example likes alpha.alpha.alpha 1.."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 67, "col": 15, "module": "regex", "call": "sub", "pattern": "functional\\.layers\\.sequential\\.layers\\.dense\\.", "test_string": "Example likes alpha.alpha.alpha.alpha.alpha.."} +{"file": "./src/transformers/models/moonshine/convert_usefulsensors_to_hf.py", "line": 68, "col": 15, "module": "regex", "call": "sub", "pattern": "functional\\.layers\\.sequential\\.layers\\.dense_1\\.", "test_string": "Example likes alpha.alpha.alpha.alpha.alpha 1.."} +{"file": "./src/transformers/models/openai/tokenization_openai.py", "line": 229, "col": 11, "module": "regex", "call": "sub", "pattern": "(-+|~+|!+|\"+|;+|\\?+|\\++|,+|\\)+|\\(+|\\\\+|\\/+|\\*+|\\[+|\\]+|}+|{+|\\|+|_+)", "test_string": "/home/user/readme.txt"} +{"file": "./src/transformers/models/openai/tokenization_openai.py", "line": 230, "col": 11, "module": "regex", "call": "sub", "pattern": "\\s*\\n\\s*", "test_string": "Example likes x\nx."} +{"file": "./src/transformers/models/openai/tokenization_openai.py", "line": 231, "col": 11, "module": "regex", "call": "sub", "pattern": "[^\\S\\n]+", "test_string": "Example likes ."} +{"file": "./src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py", "line": 84, "col": 15, "module": "regex", "call": "fullmatch", "pattern": "[A-Za-z]+\\d+", "test_string": "Example likes Mx."} +{"file": "./src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py", "line": 85, "col": 30, "module": "regex", "call": "split", "pattern": "(\\d+)", "test_string": "Example likes x."} +{"file": "./src/transformers/models/zamba2/modular_zamba2.py", "line": 990, "col": 46, "module": "regex", "call": "compile", "pattern": "^shared_transformer\\.feed_forward\\.gate_up_proj_adapter_list\\.\"\n + str(adapter_id)\n + r\"\\.(?:0|1)\\.weight$", "test_string": "alpha alpha.alpha alpha.alpha up alpha alpha alpha.\" alpha id r\".0.alpha"} +{"file": "./src/transformers/models/zamba2/modular_zamba2.py", "line": 1001, "col": 55, "module": "regex", "call": "compile", "pattern": "^shared_transformer\\.self_attn\\.\"\n + r\"(?:linear_q_adapter_list|linear_k_adapter_list|linear_v_adapter_list)\\.\"\n + str(adapter_id)\n + r\"\\.(?:0|1)\\.weight$", "test_string": "alpha alpha.alpha alpha.\" r\"alpha q alpha alpha.\" alpha id r\".0.alpha"} +{"file": "./src/transformers/models/zamba2/modeling_zamba2.py", "line": 1430, "col": 46, "module": "regex", "call": "compile", "pattern": "^shared_transformer\\.feed_forward\\.gate_up_proj_adapter_list\\.\"\n + str(adapter_id)\n + r\"\\.(?:0|1)\\.weight$", "test_string": "alpha alpha.alpha alpha.alpha up alpha alpha alpha.\" alpha id r\".0.alpha"} +{"file": "./src/transformers/models/zamba2/modeling_zamba2.py", "line": 1441, "col": 55, "module": "regex", "call": "compile", "pattern": "^shared_transformer\\.self_attn\\.\"\n + r\"(?:linear_q_adapter_list|linear_k_adapter_list|linear_v_adapter_list)\\.\"\n + str(adapter_id)\n + r\"\\.(?:0|1)\\.weight$", "test_string": "alpha alpha.alpha alpha.\" r\"alpha q alpha alpha.\" alpha id r\".0.alpha"} +{"file": "./src/transformers/models/deberta/tokenization_deberta.py", "line": 181, "col": 19, "module": "regex", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} +{"file": "./src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py", "line": 88, "col": 66, "module": "regex", "call": "fullmatch", "pattern": "layer_\\d+", "test_string": "Example likes alpha x."} +{"file": "./src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py", "line": 89, "col": 34, "module": "regex", "call": "search", "pattern": "layer_(\\d+)", "test_string": "Example likes alpha x."} +{"file": "./src/transformers/models/longformer/tokenization_longformer.py", "line": 201, "col": 19, "module": "regex", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} +{"file": "./src/transformers/models/xlm/tokenization_xlm.py", "line": 71, "col": 11, "module": "regex", "call": "sub", "pattern": "。\\s*", "test_string": "Example likes 。x."} +{"file": "./src/transformers/models/xlm/tokenization_xlm.py", "line": 96, "col": 11, "module": "regex", "call": "sub", "pattern": ".\\s*", "test_string": "Example likes .x."} +{"file": "./src/transformers/models/roformer/convert_roformer_original_tf_checkpoint_to_pytorch.py", "line": 67, "col": 15, "module": "regex", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} +{"file": "./src/transformers/models/roformer/convert_roformer_original_tf_checkpoint_to_pytorch.py", "line": 68, "col": 30, "module": "regex", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} +{"file": "./src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py", "line": 151, "col": 15, "module": "regex", "call": "compile", "pattern": "layers\\.(\\d+)\\.([a-z0-9_.]+)\\.([a-z0-9_]+)", "test_string": "Example likes alpha.x.m.m."} +{"file": "./src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py", "line": 406, "col": 15, "module": "regex", "call": "compile", "pattern": "layers\\.(\\d+)\\.([a-z0-9_.]+)\\.([a-z]+)", "test_string": "Example likes alpha.x.m.m."} +{"file": "./src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py", "line": 781, "col": 15, "module": "regex", "call": "compile", "pattern": "transformer.h\\.(\\d+)\\.([a-z0-9_.]+)\\.([a-z]+)", "test_string": "Example likes alpha.x.m.m."} +{"file": "./src/transformers/models/whisper/tokenization_whisper.py", "line": 311, "col": 19, "module": "regex", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} +{"file": "./src/transformers/models/whisper/tokenization_whisper.py", "line": 312, "col": 29, "module": "regex", "call": "compile", "pattern": "<\\|(\\d+\\.\\d+)\\|>", "test_string": "Example likes <|x.x|>."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 84, "col": 12, "module": "regex", "call": "sub", "pattern": "[<\\[][^>\\]]*[>\\]]", "test_string": "Example likes <>>."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 85, "col": 12, "module": "regex", "call": "sub", "pattern": "\\(([^)]+?)\\)", "test_string": "Example likes (a)."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 91, "col": 12, "module": "regex", "call": "sub", "pattern": "\\s+", "test_string": "Example likes x."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 440, "col": 19, "module": "regex", "call": "split", "pattern": "\\band\\s+a\\s+half\\b", "test_string": "Example likes alpha."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 457, "col": 12, "module": "regex", "call": "sub", "pattern": "([a-z])([0-9])", "test_string": "Example likes m4."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 458, "col": 12, "module": "regex", "call": "sub", "pattern": "([0-9])([a-z])", "test_string": "Example likes 4m."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 461, "col": 12, "module": "regex", "call": "sub", "pattern": "([0-9])\\s+(st|nd|rd|th|s)\\b", "test_string": "Example likes 4alpha."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 482, "col": 12, "module": "regex", "call": "sub", "pattern": "([€£$])([0-9]+) (?:and )?¢([0-9]{1,2})\\b", "test_string": "Example likes €4 alpha ¢4."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 483, "col": 12, "module": "regex", "call": "sub", "pattern": "[€£$]0.([0-9]{1,2})\\b", "test_string": "Example likes €0A4."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 486, "col": 12, "module": "regex", "call": "sub", "pattern": "\\b1(s?)\\b", "test_string": "Example likes 1s."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 576, "col": 12, "module": "regex", "call": "sub", "pattern": "[<\\[][^>\\]]*[>\\]]", "test_string": "Example likes <>>."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 577, "col": 12, "module": "regex", "call": "sub", "pattern": "\\(([^)]+?)\\)", "test_string": "Example likes (a)."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 579, "col": 12, "module": "regex", "call": "sub", "pattern": "\\s+'", "test_string": "Example likes x'."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 584, "col": 12, "module": "regex", "call": "sub", "pattern": "(\\d),(\\d)", "test_string": "Example likes x,x."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 585, "col": 12, "module": "regex", "call": "sub", "pattern": "\\.([^0-9]|$)", "test_string": "Example likes .4."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 592, "col": 12, "module": "regex", "call": "sub", "pattern": "[.$¢€£]([^0-9])", "test_string": "Example likes .4."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 593, "col": 12, "module": "regex", "call": "sub", "pattern": "([^0-9])%", "test_string": "Example likes 4%."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 595, "col": 12, "module": "regex", "call": "sub", "pattern": "\\s+", "test_string": "Example likes x."} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 246, "col": 15, "module": "regex", "call": "match", "pattern": "^\\d+(\\.\\d+)?$", "test_string": "x.x"} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 89, "col": 25, "module": "regex", "call": "findall", "pattern": "\\X", "test_string": "a simple example phrase"} +{"file": "./src/transformers/models/whisper/english_normalizer.py", "line": 243, "col": 51, "module": "regex", "call": "match", "pattern": "^\\d+(\\.\\d+)?$", "test_string": "x.x"} +{"file": "./src/transformers/models/whisper/tokenization_whisper_fast.py", "line": 137, "col": 29, "module": "regex", "call": "compile", "pattern": "<\\|(\\d+\\.\\d+)\\|>", "test_string": "Example likes <|x.x|>."} +{"file": "./src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py", "line": 59, "col": 15, "module": "regex", "call": "fullmatch", "pattern": "[A-Za-z]+\\d+", "test_string": "Example likes Mx."} +{"file": "./src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py", "line": 60, "col": 30, "module": "regex", "call": "split", "pattern": "(\\d+)", "test_string": "Example likes x."} +{"file": "./src/transformers/models/gpt2/tokenization_gpt2.py", "line": 167, "col": 19, "module": "regex", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} +{"file": "./src/transformers/models/clip/tokenization_clip.py", "line": 77, "col": 11, "module": "regex", "call": "sub", "pattern": "\\s+", "test_string": "Example likes x."} +{"file": "./src/transformers/models/clip/tokenization_clip.py", "line": 318, "col": 19, "module": "regex", "call": "compile", "pattern": "<\\|startoftext\\|>|<\\|endoftext\\|>|'s|'t|'re|'ve|'m|'ll|'d|[\\p{L}]+|[\\p{N}]|[^\\s\\p{L}\\p{N}]+", "test_string": "a simple example phrase"} +{"file": "./src/transformers/models/udop/tokenization_udop.py", "line": 340, "col": 38, "module": "regex", "call": "search", "pattern": "", "test_string": "Example likes ."} +{"file": "./src/transformers/models/byt5/convert_byt5_original_tf_checkpoint_to_pytorch.py", "line": 74, "col": 15, "module": "regex", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} +{"file": "./src/transformers/models/byt5/convert_byt5_original_tf_checkpoint_to_pytorch.py", "line": 75, "col": 30, "module": "regex", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} +{"file": "./src/transformers/models/bart/tokenization_bart.py", "line": 194, "col": 19, "module": "regex", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} +{"file": "./src/transformers/models/vits/tokenization_vits.py", "line": 39, "col": 24, "module": "regex", "call": "compile", "pattern": "[^\\x00-\\x7F]", "test_string": "Example likes ?."} +{"file": "./src/transformers/models/vits/tokenization_vits.py", "line": 200, "col": 28, "module": "regex", "call": "sub", "pattern": "\\s+", "test_string": "Example likes x."} +{"file": "./src/transformers/models/idefics3/processing_idefics3.py", "line": 145, "col": 53, "module": "regex", "call": "compile", "pattern": "(\\n?\\n?|\\n?)+", "test_string": "Example likes ."} +{"file": "./src/transformers/models/clvp/tokenization_clvp.py", "line": 177, "col": 19, "module": "regex", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} +{"file": "./src/transformers/models/clvp/number_normalizer.py", "line": 209, "col": 15, "module": "regex", "call": "sub", "pattern": "([0-9][0-9,]+[0-9])", "test_string": "Example likes 444."} +{"file": "./src/transformers/models/clvp/number_normalizer.py", "line": 210, "col": 15, "module": "regex", "call": "sub", "pattern": "£([0-9,]*[0-9])", "test_string": "Example likes £44."} +{"file": "./src/transformers/models/clvp/number_normalizer.py", "line": 211, "col": 15, "module": "regex", "call": "sub", "pattern": "\\$([0-9.,]*[0-9])", "test_string": "Example likes $44."} +{"file": "./src/transformers/models/clvp/number_normalizer.py", "line": 212, "col": 15, "module": "regex", "call": "sub", "pattern": "([0-9]++\\.[0-9]+)", "test_string": "Example likes a.4."} +{"file": "./src/transformers/models/clvp/number_normalizer.py", "line": 213, "col": 15, "module": "regex", "call": "sub", "pattern": "[0-9]++(st|nd|rd|th)", "test_string": "Example likes alpha."} +{"file": "./src/transformers/models/clvp/number_normalizer.py", "line": 214, "col": 15, "module": "regex", "call": "sub", "pattern": "[0-9]+", "test_string": "Example likes 4."} +{"file": "./src/transformers/models/clvp/number_normalizer.py", "line": 229, "col": 22, "module": "regex", "call": "compile", "pattern": "\\s+", "test_string": "Example likes x."} +{"file": "./src/transformers/models/mobilebert/convert_mobilebert_original_tf_checkpoint_to_pytorch.py", "line": 69, "col": 15, "module": "regex", "call": "fullmatch", "pattern": "[A-Za-z]+_\\d+", "test_string": "Example likes M x."} +{"file": "./src/transformers/models/mobilebert/convert_mobilebert_original_tf_checkpoint_to_pytorch.py", "line": 70, "col": 30, "module": "regex", "call": "split", "pattern": "_(\\d+)", "test_string": "Example likes x."} +{"file": "./src/transformers/models/florence2/processing_florence2.py", "line": 681, "col": 32, "module": "regex", "call": "sub", "pattern": "^", "test_string": "Example likes ."} +{"file": "./src/transformers/models/florence2/processing_florence2.py", "line": 702, "col": 58, "module": "regex", "call": "finditer", "pattern": "", "test_string": "Example likes ."} +{"file": "./src/transformers/models/florence2/modular_florence2.py", "line": 874, "col": 32, "module": "regex", "call": "sub", "pattern": "^", "test_string": "Example likes ."} +{"file": "./src/transformers/models/florence2/modular_florence2.py", "line": 895, "col": 58, "module": "regex", "call": "finditer", "pattern": "", "test_string": "Example likes ."} +{"file": "./src/transformers/models/herbert/tokenization_herbert.py", "line": 53, "col": 11, "module": "regex", "call": "sub", "pattern": "。\\s*", "test_string": "Example likes 。x."} +{"file": "./src/transformers/models/herbert/tokenization_herbert.py", "line": 78, "col": 11, "module": "regex", "call": "sub", "pattern": ".\\s*", "test_string": "Example likes .x."} +{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 58, "col": 14, "module": "regex", "call": "sub", "pattern": "blocks", "test_string": "Example likes alpha."} +{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 60, "col": 14, "module": "regex", "call": "sub", "pattern": "attn", "test_string": "Example likes alpha."} +{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 62, "col": 14, "module": "regex", "call": "sub", "pattern": "norm1", "test_string": "Example likes alpha1."} +{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 64, "col": 14, "module": "regex", "call": "sub", "pattern": "norm2", "test_string": "Example likes alpha2."} +{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 66, "col": 14, "module": "regex", "call": "sub", "pattern": "encoder.norm", "test_string": "Example likes alpha."} +{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 68, "col": 14, "module": "regex", "call": "sub", "pattern": "encoder.patch_embed.proj", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 71, "col": 14, "module": "regex", "call": "sub", "pattern": "encoder.pos_embed", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 73, "col": 14, "module": "regex", "call": "sub", "pattern": "encoder.cls_token", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py", "line": 76, "col": 14, "module": "regex", "call": "sub", "pattern": "self_attn.proj", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py", "line": 150, "col": 15, "module": "regex", "call": "sub", "pattern": "@@$", "test_string": "alice@example.com"} +{"file": "./src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py", "line": 150, "col": 67, "module": "regex", "call": "sub", "pattern": "$", "test_string": "Example likes ."} +{"file": "./src/transformers/models/phobert/tokenization_phobert.py", "line": 279, "col": 16, "module": "regex", "call": "findall", "pattern": "\\S+\\n?", "test_string": "Example likes x."} +{"file": "./src/transformers/models/gpt_oss/convert_gpt_oss_weights_to_hf.py", "line": 202, "col": 20, "module": "regex", "call": "sub", "pattern": "qkv_proj", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/gpt_oss/convert_gpt_oss_weights_to_hf.py", "line": 203, "col": 20, "module": "regex", "call": "sub", "pattern": "qkv_proj", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/gpt_oss/convert_gpt_oss_weights_to_hf.py", "line": 204, "col": 20, "module": "regex", "call": "sub", "pattern": "qkv_proj", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/models/imagegpt/convert_imagegpt_original_tf2_to_pytorch.py", "line": 78, "col": 15, "module": "regex", "call": "fullmatch", "pattern": "[A-Za-z]+\\d+", "test_string": "Example likes Mx."} +{"file": "./src/transformers/models/imagegpt/convert_imagegpt_original_tf2_to_pytorch.py", "line": 79, "col": 30, "module": "regex", "call": "split", "pattern": "(\\d+)", "test_string": "Example likes x."} +{"file": "./src/transformers/models/luke/tokenization_luke.py", "line": 322, "col": 19, "module": "regex", "call": "compile", "pattern": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "test_string": "a simple example phrase"} +{"file": "./src/transformers/models/donut/processing_donut.py", "line": 112, "col": 30, "module": "regex", "call": "search", "pattern": "", "test_string": "/home/user/readme.txt"} +{"file": "./src/transformers/commands/add_new_model_like.py", "line": 179, "col": 11, "module": "regex", "call": "search", "pattern": "(?:tokenization)|(?:image_processing)_auto_fast.py", "test_string": "Example likes alpha alpha alpha alpha."} +{"file": "./src/transformers/commands/add_new_model_like.py", "line": 231, "col": 29, "module": "regex", "call": "sub", "pattern": "# ?", "test_string": "Example likes #."} +{"file": "./src/transformers/commands/add_fast_image_processor.py", "line": 256, "col": 34, "module": "regex", "call": "search", "pattern": " image_processing_class = .*", "test_string": "Example likes alpha alpha alpha = A."} +{"file": "./src/transformers/commands/add_fast_image_processor.py", "line": 280, "col": 21, "module": "regex", "call": "search", "pattern": "^# coding=utf-8\\n(#[^\\n]*\\n)*", "test_string": "Example likes # alpha=alpha 8\n#a."} +{"file": "./src/transformers/commands/add_fast_image_processor.py", "line": 302, "col": 21, "module": "regex", "call": "sub", "pattern": "# Copyright (\\d+)\\s", "test_string": "Example likes # alpha xx."} +{"file": "./src/transformers/commands/add_fast_image_processor.py", "line": 304, "col": 12, "module": "regex", "call": "search", "pattern": "^\"\"\"Image processor.*$", "test_string": "\"\"\"alpha alpha"} +{"file": "./src/transformers/commands/add_fast_image_processor.py", "line": 365, "col": 12, "module": "regex", "call": "search", "pattern": "def __init__.*?def ", "test_string": "Example likes alpha alpha alpha."} +{"file": "./src/transformers/commands/add_fast_image_processor.py", "line": 379, "col": 19, "module": "regex", "call": "findall", "pattern": "= (.*?)(?:,|\\))", "test_string": "Example likes = A,."} +{"file": "./src/transformers/commands/add_fast_image_processor.py", "line": 470, "col": 27, "module": "regex", "call": "findall", "pattern": "class (\\w*ImageProcessor)", "test_string": "Example likes alpha alpha."} +{"file": "./src/transformers/commands/add_fast_image_processor.py", "line": 48, "col": 22, "module": "regex", "call": "compile", "pattern": "if TYPE_CHECKING:\\n(?P.*?)(?=\\s*else:)", "test_string": "Example likes if alpha alpha:\nalpha:."} +{"file": "./src/transformers/commands/serving.py", "line": 1097, "col": 44, "module": "regex", "call": "search", "pattern": "\\\"name\\\": \\\"(.*?)\\\"", "test_string": "Example likes \"alpha\": \"A\"."} +{"file": "./src/transformers/pipelines/document_question_answering.py", "line": 498, "col": 17, "module": "regex", "call": "search", "pattern": "(.*)", "test_string": "/home/user/readme.txt"} +{"file": "./src/transformers/pipelines/document_question_answering.py", "line": 493, "col": 19, "module": "regex", "call": "sub", "pattern": "<.*?>", "test_string": "Example likes ."} +{"file": "./src/transformers/utils/chat_template_utils.py", "line": 59, "col": 17, "module": "regex", "call": "compile", "pattern": "^(.*?)[\\n\\s]*(Args:|Returns:|Raises:|\\Z)", "test_string": "Example likes A\nalpha:."} +{"file": "./src/transformers/utils/chat_template_utils.py", "line": 61, "col": 10, "module": "regex", "call": "compile", "pattern": "\\n\\s*Args:\\n\\s*(.*?)[\\n\\s]*(Returns:|Raises:|\\Z)", "test_string": "Example likes alpha:\nxA\nalpha:."} +{"file": "./src/transformers/utils/chat_template_utils.py", "line": 63, "col": 16, "module": "regex", "call": "compile", "pattern": "\n(?:^|\\n) # Match the start of the args block, or a newline\n\\s*(\\w+):\\s* # Capture the argument name and strip spacing\n(.*?)\\s* # Capture the argument description, which can span multiple lines, and strip trailing spacing\n(?=\\n\\s*\\w+:|\\Z) # Stop when you hit the next argument or the end of the block\n", "test_string": "Example likes # alpha alpha alpha of alpha alpha alpha, or a alpha\nxx:x # alpha alpha alpha alpha alpha alpha alpha\nAx # alpha alpha alpha alpha, alpha alpha alpha alpha alpha, alpha alpha alpha alpha xx: # alpha alpha alpha alpha alpha alpha alpha or alpha alpha of alpha alpha."} +{"file": "./src/transformers/utils/chat_template_utils.py", "line": 73, "col": 13, "module": "regex", "call": "compile", "pattern": "\\n\\s*Returns:\\n\\s*(.*?)[\\n\\s]*(Raises:|\\Z)", "test_string": "Example likes alpha:\nxA\nalpha:."} +{"file": "./src/transformers/utils/chat_template_utils.py", "line": 362, "col": 23, "module": "regex", "call": "search", "pattern": "\\(choices:\\s*(.*?)\\)\\s*$", "test_string": "Example likes (alpha:xA)x."} +{"file": "./src/transformers/utils/chat_template_utils.py", "line": 230, "col": 31, "module": "regex", "call": "sub", "pattern": "\\s*\\n+\\s*", "test_string": "Example likes x\nx."} +{"file": "./src/transformers/utils/chat_template_utils.py", "line": 476, "col": 44, "module": "regex", "call": "search", "pattern": "\\{\\%-?\\s*generation\\s*-?\\%\\}", "test_string": "Example likes {% alpha %}."} +{"file": "./src/transformers/utils/notebook.py", "line": 357, "col": 40, "module": "regex", "call": "sub", "pattern": "\\_loss$", "test_string": "Example likes alpha."} +{"file": "./src/transformers/utils/versions.py", "line": 69, "col": 7, "module": "regex", "call": "match", "pattern": "^[\\w_\\-\\d]+$", "test_string": ""} +{"file": "./src/transformers/utils/versions.py", "line": 72, "col": 16, "module": "regex", "call": "findall", "pattern": "^([^!=<>\\s]+)([\\s!=<>]{1,2}.+)", "test_string": "Example likes !!A."} +{"file": "./src/transformers/utils/versions.py", "line": 82, "col": 20, "module": "regex", "call": "findall", "pattern": "^([\\s!=<>]{1,2})(.+)", "test_string": "Example likes !A."} +{"file": "./src/transformers/utils/hub.py", "line": 252, "col": 13, "module": "regex", "call": "search", "pattern": "snapshots/([^/]+)/", "test_string": "/home/user/readme.txt"} +{"file": "./src/transformers/utils/auto_docstring.py", "line": 71, "col": 17, "module": "regex", "call": "compile", "pattern": "\\[(.+?)\\]\\((https://huggingface\\.co/.+?)\\)", "test_string": "http://example.com/docs"} +{"file": "./src/transformers/utils/auto_docstring.py", "line": 994, "col": 20, "module": "regex", "call": "compile", "pattern": "(of shape\\s*(?:`.*?`|\\(.*?\\)))", "test_string": "Example likes of alpha`A`."} +{"file": "./src/transformers/utils/auto_docstring.py", "line": 1002, "col": 22, "module": "regex", "call": "compile", "pattern": "(defaults to \\s*[^)]*)", "test_string": "Example likes alpha to xa."} +{"file": "./src/transformers/utils/auto_docstring.py", "line": 1023, "col": 12, "module": "regex", "call": "search", "pattern": "(?m)^([ \\t]*)(?=Example|Return)", "test_string": "Example likes alpha."} +{"file": "./src/transformers/utils/auto_docstring.py", "line": 1029, "col": 19, "module": "regex", "call": "compile", "pattern": "(?:Args:)(\\n.*)?(\\n)?$", "test_string": "Example likes alpha:\nA."} +{"file": "./src/transformers/utils/auto_docstring.py", "line": 1159, "col": 23, "module": "regex", "call": "findall", "pattern": "{(.*?)}", "test_string": "Example likes {A}."} +{"file": "./src/transformers/utils/auto_docstring.py", "line": 1601, "col": 20, "module": "regex", "call": "search", "pattern": "(?m)^([ \\t]*)(?=Example)", "test_string": "Example likes alpha."} +{"file": "./src/transformers/utils/auto_docstring.py", "line": 1065, "col": 32, "module": "regex", "call": "sub", "pattern": "^", "test_string": "Example likes ."} +{"file": "./src/transformers/utils/auto_docstring.py", "line": 1297, "col": 25, "module": "regex", "call": "sub", "pattern": "ForwardRef\\('([\\w.]+)'\\)", "test_string": "Example likes alpha('.')."} +{"file": "./src/transformers/utils/auto_docstring.py", "line": 1299, "col": 25, "module": "regex", "call": "sub", "pattern": "Optional\\[(.*?)\\]", "test_string": "Example likes alpha[A]."} +{"file": "./src/transformers/utils/auto_docstring.py", "line": 1642, "col": 52, "module": "regex", "call": "search", "pattern": "(?m)^([ \\t]*)(?=Example)", "test_string": "Example likes alpha."} +{"file": "./src/transformers/utils/auto_docstring.py", "line": 1599, "col": 28, "module": "regex", "call": "search", "pattern": "(?m)^([ \\t]*)(?=Return)", "test_string": "Example likes alpha."} +{"file": "./src/transformers/utils/auto_docstring.py", "line": 1499, "col": 33, "module": "regex", "call": "sub", "pattern": "ForwardRef\\('([\\w.]+)'\\)", "test_string": "Example likes alpha('.')."} +{"file": "./src/transformers/utils/auto_docstring.py", "line": 1501, "col": 33, "module": "regex", "call": "sub", "pattern": "Optional\\[(.*?)\\]", "test_string": "Example likes alpha[A]."} +{"file": "./src/transformers/utils/auto_docstring.py", "line": 1842, "col": 33, "module": "regex", "call": "sub", "pattern": "ForwardRef\\('([\\w.]+)'\\)", "test_string": "Example likes alpha('.')."} +{"file": "./src/transformers/utils/auto_docstring.py", "line": 1844, "col": 33, "module": "regex", "call": "sub", "pattern": "Optional\\[(.*?)\\]", "test_string": "Example likes alpha[A]."} +{"file": "./src/transformers/utils/doc.py", "line": 100, "col": 13, "module": "regex", "call": "search", "pattern": "^(\\s*)\\S", "test_string": "Example likes xx."} +{"file": "./src/transformers/utils/doc.py", "line": 124, "col": 20, "module": "regex", "call": "sub", "pattern": "^(\\s+)(\\S+)(\\s+)", "test_string": "Example likes alpha."} +{"file": "./src/transformers/utils/doc.py", "line": 125, "col": 20, "module": "regex", "call": "sub", "pattern": ":\\s*\\n\\s*(\\S)", "test_string": "Example likes :x\nxx."} +{"file": "./src/transformers/utils/doc.py", "line": 1085, "col": 15, "module": "regex", "call": "match", "pattern": "^refs/pr/\\\\d+", "test_string": "/home/user/readme.txt"} +{"file": "./src/transformers/utils/doc.py", "line": 140, "col": 33, "module": "regex", "call": "search", "pattern": "^\\s*(Args|Parameters):\\s*$", "test_string": "alpha:x"} +{"file": "./src/transformers/utils/doc.py", "line": 1105, "col": 33, "module": "regex", "call": "search", "pattern": "^\\s*Returns?:\\s*$", "test_string": "alpha:x"} +{"file": "./src/transformers/data/metrics/squad_metrics.py", "line": 40, "col": 16, "module": "regex", "call": "compile", "pattern": "\\b(a|an|the)\\b", "test_string": "Example likes alpha."} +{"file": "./tests/test_modeling_common.py", "line": 905, "col": 27, "module": "regex", "call": "search", "pattern": "^# Copyright (\\d{4})", "test_string": "Example likes # alpha alpha."} +{"file": "./tests/test_modeling_common.py", "line": 953, "col": 19, "module": "regex", "call": "search", "pattern": "\\.parametrizations\\..*?\\.original[01]", "test_string": "Example likes .alpha.A.alpha0."} +{"file": "./tests/test_tokenization_common.py", "line": 270, "col": 37, "module": "regex", "call": "match", "pattern": "^[ a-zA-Z]+$", "test_string": "m"} +{"file": "./tests/test_pipeline_mixin.py", "line": 918, "col": 17, "module": "regex", "call": "match", "pattern": "(\\w+)\\W", "test_string": "Example likes xx."} +{"file": "./tests/models/janus/test_modeling_janus.py", "line": 237, "col": 23, "module": "regex", "call": "finditer", "pattern": "Attention(?!Pool)", "test_string": "Example likes alpha."} +{"file": "./tests/models/janus/test_modeling_janus.py", "line": 242, "col": 23, "module": "regex", "call": "finditer", "pattern": "Attention(?!Pool)", "test_string": "Example likes alpha."} +{"file": "./tests/models/whisper/test_modeling_whisper.py", "line": 2533, "col": 31, "module": "regex", "call": "split", "pattern": "<\\|[\\d\\.]+\\|>", "test_string": "Example likes <|.|>."} +{"file": "./tests/models/whisper/test_modeling_whisper.py", "line": 2537, "col": 28, "module": "regex", "call": "findall", "pattern": "<\\|[\\d\\.]+\\|>", "test_string": "Example likes <|.|>."} +{"file": "./tests/models/deepseek_vl_hybrid/test_modeling_deepseek_vl_hybrid.py", "line": 258, "col": 24, "module": "regex", "call": "finditer", "pattern": "Attention(?!Pool)", "test_string": "Example likes alpha."} +{"file": "./tests/models/deepseek_vl_hybrid/test_modeling_deepseek_vl_hybrid.py", "line": 267, "col": 24, "module": "regex", "call": "finditer", "pattern": "Attention(?!Pool)", "test_string": "Example likes alpha."} +{"file": "./tests/models/byt5/test_tokenization_byt5.py", "line": 62, "col": 37, "module": "regex", "call": "match", "pattern": "^[ a-zA-Z]+$", "test_string": "m"} +{"file": "./tests/models/markuplm/test_tokenization_markuplm.py", "line": 1553, "col": 37, "module": "regex", "call": "match", "pattern": "^[ a-zA-Z]+$", "test_string": "m"} +{"file": "./tests/models/layoutlmv2/test_tokenization_layoutlmv2.py", "line": 1768, "col": 37, "module": "regex", "call": "match", "pattern": "^[ a-zA-Z]+$", "test_string": "m"} +{"file": "./tests/models/deepseek_vl/test_modeling_deepseek_vl.py", "line": 222, "col": 23, "module": "regex", "call": "finditer", "pattern": "Attention(?!Pool)", "test_string": "Example likes alpha."} +{"file": "./tests/models/deepseek_vl/test_modeling_deepseek_vl.py", "line": 227, "col": 23, "module": "regex", "call": "finditer", "pattern": "Attention(?!Pool)", "test_string": "Example likes alpha."} +{"file": "./tests/models/perceiver/test_tokenization_perceiver.py", "line": 63, "col": 37, "module": "regex", "call": "match", "pattern": "^[ a-zA-Z]+$", "test_string": "m"} +{"file": "./tests/models/t5/test_tokenization_t5.py", "line": 374, "col": 25, "module": "regex", "call": "search", "pattern": "", "test_string": "Example likes ."} +{"file": "./tests/models/t5/test_tokenization_t5.py", "line": 385, "col": 25, "module": "regex", "call": "search", "pattern": "", "test_string": "Example likes ."} +{"file": "./tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py", "line": 1302, "col": 19, "module": "regex", "call": "sub", "pattern": "<.*?>", "test_string": "Example likes ."} +{"file": "./tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py", "line": 1365, "col": 19, "module": "regex", "call": "sub", "pattern": "<.*?>", "test_string": "Example likes ."} +{"file": "./tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py", "line": 1427, "col": 19, "module": "regex", "call": "sub", "pattern": "<.*?>", "test_string": "Example likes ."} +{"file": "./tests/models/layoutlmv3/test_tokenization_layoutlmv3.py", "line": 1658, "col": 37, "module": "regex", "call": "match", "pattern": "^[ a-zA-Z]+$", "test_string": "m"} +{"file": "./tests/utils/test_attention_visualizer.py", "line": 24, "col": 10, "module": "regex", "call": "compile", "pattern": "\\x1b\\[[0-9;]*m", "test_string": "Example likes \u001b[4m."} From 53dc06af8519da6d832f493012addee3ae40384e Mon Sep 17 00:00:00 2001 From: Qubitium Date: Thu, 9 Oct 2025 11:33:25 +0000 Subject: [PATCH 4/4] fix inaccurate \u[4] and \U[8] conversion to pcre \x{[4,8]} format --- README.md | 13 +++--- pcre/flags.py | 6 +-- pcre/pcre.py | 79 +++----------------------------- pcre_ext/pcre2.c | 111 +++++++++++++++++++++++++++++++++++++++++++++ tests/test_core.py | 37 +++++++++++++-- 5 files changed, 161 insertions(+), 85 deletions(-) diff --git a/README.md b/README.md index 239abed..a108aa1 100644 --- a/README.md +++ b/README.md @@ -59,17 +59,16 @@ exposing PCRE2’s extended flag set through the Pythonic `Flag` enum ### `regex` package compatibility -The [`regex`](https://pypi.org/project/regex/) package interprets both -`\uXXXX`/`\u{...}` and `\UXXXXXXXX` escapes as UTF-8 code points, while -PCRE2 expects hexadecimal escapes to use the `\x{...}` form. Enable -`Flag.COMPAT_REGEX` to translate those escapes automatically when compiling -patterns: +The [`regex`](https://pypi.org/project/regex/) package interprets +`\uXXXX` and `\UXXXXXXXX` escapes as UTF-8 code points, while PCRE2 expects +hexadecimal escapes to use the `\x{...}` form. Enable `Flag.COMPAT_UNICODE_ESCAPE` to +translate those escapes automatically when compiling patterns: ```python from pcre import compile, Flag -pattern = compile(r"\\u{1F600}", flags=Flag.COMPAT_REGEX) -assert pattern.pattern == r"\\x{1F600}" +pattern = compile(r"\\U0001F600", flags=Flag.COMPAT_UNICODE_ESCAPE) +assert pattern.pattern == r"\\x{0001F600}" ``` Set the default behaviour globally with `pcre.configure(compat_regex=True)` diff --git a/pcre/flags.py b/pcre/flags.py index eeb0dd7..e36fd50 100644 --- a/pcre/flags.py +++ b/pcre/flags.py @@ -38,7 +38,7 @@ def _next_power_of_two(value: int) -> int: NO_JIT: int = _EXTRA_BASE << 3 THREADS: int = _EXTRA_BASE << 4 NO_THREADS: int = _EXTRA_BASE << 5 -COMPAT_REGEX: int = _EXTRA_BASE << 6 +COMPAT_UNICODE_ESCAPE: int = _EXTRA_BASE << 6 PY_ONLY_FLAG_MEMBERS: Dict[str, int] = { "NO_UTF": NO_UTF, @@ -47,11 +47,11 @@ def _next_power_of_two(value: int) -> int: "NO_JIT": NO_JIT, "THREADS": THREADS, "NO_THREADS": NO_THREADS, - "COMPAT_REGEX": COMPAT_REGEX, + "COMPAT_UNICODE_ESCAPE": COMPAT_UNICODE_ESCAPE, } PY_ONLY_FLAG_MASK: int = ( - NO_UTF | NO_UCP | JIT | NO_JIT | THREADS | NO_THREADS | COMPAT_REGEX + NO_UTF | NO_UCP | JIT | NO_JIT | THREADS | NO_THREADS | COMPAT_UNICODE_ESCAPE ) diff --git a/pcre/pcre.py b/pcre/pcre.py index 48380f4..937efc7 100644 --- a/pcre/pcre.py +++ b/pcre/pcre.py @@ -16,7 +16,7 @@ from .cache import cached_compile from .cache import clear_cache as _clear_cache from .flags import ( - COMPAT_REGEX, + COMPAT_UNICODE_ESCAPE, JIT, NO_JIT, NO_THREADS, @@ -96,73 +96,8 @@ def _extract_jit_override(flags: int) -> bool | None: _STD_RE_FLAG_MASK |= int(_flag) -_HEX_DIGITS = frozenset("0123456789abcdefABCDEF") - - -def _is_hex_string(value: str) -> bool: - return bool(value) and all(char in _HEX_DIGITS for char in value) - - def _convert_regex_compat(pattern: str) -> str: - length = len(pattern) - if length < 2: - return pattern - - pieces: list[str] = [] - index = 0 - modified = False - - while index < length: - char = pattern[index] - if char == "\\" and index + 1 < length: - marker = pattern[index + 1] - - if marker == "u": - brace_pos = index + 2 - if brace_pos < length and pattern[brace_pos] == "{": - cursor = brace_pos + 1 - while cursor < length and pattern[cursor] != "}": - cursor += 1 - if cursor < length: - payload = pattern[brace_pos + 1 : cursor] - if _is_hex_string(payload): - pieces.append("\\x{") - pieces.append(payload) - pieces.append("}") - index = cursor + 1 - modified = True - continue - else: - payload = pattern[index + 2 : index + 6] - if len(payload) == 4 and _is_hex_string(payload): - pieces.append("\\x{") - pieces.append(payload) - pieces.append("}") - index += 6 - modified = True - continue - - if marker == "U": - payload = pattern[index + 2 : index + 10] - if len(payload) == 8 and _is_hex_string(payload): - pieces.append("\\x{") - pieces.append(payload.lstrip("0") or "0") - pieces.append("}") - index += 10 - modified = True - continue - - pieces.append(char) - pieces.append(marker) - index += 2 - continue - - pieces.append(char) - index += 1 - - if not modified: - return pattern - return "".join(pieces) + return _pcre2.translate_unicode_escapes(pattern) def _apply_regex_compat(pattern: Any, enabled: bool) -> Any: @@ -534,11 +469,11 @@ def compile(pattern: Any, flags: FlagInput = 0) -> Pattern: resolved_flags = _normalise_flags(flags) threads_requested = bool(resolved_flags & THREADS) no_threads_requested = bool(resolved_flags & NO_THREADS) - compat_requested = bool(resolved_flags & COMPAT_REGEX) + compat_requested = bool(resolved_flags & COMPAT_UNICODE_ESCAPE) if threads_requested and no_threads_requested: raise ValueError("Flag.THREADS and Flag.NO_THREADS cannot be combined") - resolved_flags_no_thread_markers = resolved_flags & ~(THREADS | NO_THREADS | COMPAT_REGEX) + resolved_flags_no_thread_markers = resolved_flags & ~(THREADS | NO_THREADS | COMPAT_UNICODE_ESCAPE) jit_override = _extract_jit_override(resolved_flags_no_thread_markers) resolved_jit = _resolve_jit_setting(jit_override) compat_enabled = bool(_DEFAULT_COMPAT_REGEX or compat_requested) @@ -555,7 +490,7 @@ def compile(pattern: Any, flags: FlagInput = 0) -> Pattern: raise ValueError("Cannot supply flags when using a Pattern instance.") if compat_requested: raise ValueError( - "Cannot supply Flag.COMPAT_REGEX when using a Pattern instance." + "Cannot supply Flag.COMPAT_UNICODE_ESCAPE when using a Pattern instance." ) if threads_requested: pattern.enable_threads() @@ -572,7 +507,7 @@ def compile(pattern: Any, flags: FlagInput = 0) -> Pattern: raise ValueError("Cannot supply jit when using a compiled pattern instance.") if compat_requested: raise ValueError( - "Cannot supply Flag.COMPAT_REGEX when using a compiled pattern instance." + "Cannot supply Flag.COMPAT_UNICODE_ESCAPE when using a compiled pattern instance." ) wrapper = Pattern(pattern) if threads_requested: @@ -735,7 +670,7 @@ def configure(*, jit: bool | None = None, compat_regex: bool | None = None) -> b """Adjust global defaults for the high-level wrapper. Returns the effective default JIT setting after applying any updates. Supply - ``compat_regex`` to change the default behaviour for :data:`Flag.COMPAT_REGEX`. + ``compat_regex`` to change the default behaviour for :data:`Flag.COMPAT_UNICODE_ESCAPE`. """ global _DEFAULT_JIT, _DEFAULT_COMPAT_REGEX diff --git a/pcre_ext/pcre2.c b/pcre_ext/pcre2.c index b8c5df3..0e84be3 100644 --- a/pcre_ext/pcre2.c +++ b/pcre_ext/pcre2.c @@ -10,6 +10,26 @@ #include #endif +static inline int +is_hex_digit(unsigned char value) +{ + return (value >= '0' && value <= '9') || + (value >= 'a' && value <= 'f') || + (value >= 'A' && value <= 'F'); +} + +static inline unsigned int +hex_value(unsigned char value) +{ + if (value >= '0' && value <= '9') { + return (unsigned int)(value - '0'); + } + if (value >= 'a' && value <= 'f') { + return (unsigned int)(value - 'a' + 10); + } + return (unsigned int)(value - 'A' + 10); +} + #define STRINGIFY_DETAIL(value) #value #define STRINGIFY(value) STRINGIFY_DETAIL(value) @@ -1896,6 +1916,96 @@ static PyObject *module_memory_allocator(PyObject *Py_UNUSED(module), PyObject * static PyObject *module_get_pcre2_version(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)); static void initialize_pcre2_version(void); +static PyObject * +module_translate_unicode_escapes(PyObject *Py_UNUSED(module), PyObject *arg) +{ + if (!PyUnicode_Check(arg)) { + PyErr_SetString(PyExc_TypeError, "pattern must be str"); + return NULL; + } + + Py_ssize_t byte_length = 0; + const char *src = PyUnicode_AsUTF8AndSize(arg, &byte_length); + if (src == NULL) { + return NULL; + } + + if (byte_length < 2) { + return Py_NewRef(arg); + } + + if (byte_length > (PY_SSIZE_T_MAX - 1) / 2) { + PyErr_SetString(PyExc_OverflowError, "pattern too large to translate"); + return NULL; + } + + Py_ssize_t capacity = (byte_length * 2) + 1; + char *buffer = PyMem_Malloc((size_t)capacity); + if (buffer == NULL) { + PyErr_NoMemory(); + return NULL; + } + + const char *p = src; + const char *end = src + byte_length; + char *out = buffer; + int modified = 0; + + while (p < end) { + if (p + 1 < end && p[0] == '\\' && (p[1] == 'u' || p[1] == 'U')) { + int is_upper = (p[1] == 'U'); + int hex_len = is_upper ? 8 : 4; + if (p + 2 + hex_len <= end) { + unsigned int codepoint = 0; + int valid = 1; + for (int offset = 0; offset < hex_len; ++offset) { + unsigned char digit = (unsigned char)p[2 + offset]; + if (!is_hex_digit(digit)) { + valid = 0; + break; + } + codepoint = (codepoint << 4) | hex_value(digit); + } + if (valid) { + if (codepoint > 0x10FFFFu) { + PyMem_Free(buffer); + PyErr_Format( + PcreError, + "Unicode escape \\%c%.*s exceeds 0x10FFFF", + p[1], + hex_len, + p + 2 + ); + return NULL; + } + + *out++ = '\\'; + *out++ = 'x'; + *out++ = '{'; + memcpy(out, p + 2, (size_t)hex_len); + out += hex_len; + *out++ = '}'; + p += 2 + hex_len; + modified = 1; + continue; + } + } + } + + *out++ = *p++; + } + + if (!modified) { + PyMem_Free(buffer); + return Py_NewRef(arg); + } + + Py_ssize_t result_length = out - buffer; + PyObject *result = PyUnicode_DecodeUTF8(buffer, result_length, "strict"); + PyMem_Free(buffer); + return result; +} + static PyMethodDef module_methods[] = { {"compile", (PyCFunction)module_compile, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("Compile a pattern into a PCRE2 Pattern object." )}, {"match", (PyCFunction)module_match, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("Match a pattern against the beginning of a string." )}, @@ -1915,6 +2025,7 @@ static PyMethodDef module_methods[] = { {"get_library_version", (PyCFunction)module_get_pcre2_version, METH_NOARGS, PyDoc_STR("Return the PCRE2 library version string." )}, {"get_allocator", (PyCFunction)module_memory_allocator, METH_NOARGS, PyDoc_STR("Return the name of the active heap allocator (tcmalloc/jemalloc/malloc)." )}, {"_cpu_ascii_vector_mode", (PyCFunction)module_cpu_ascii_vector_mode, METH_NOARGS, PyDoc_STR("Return the active ASCII vector width (0=scalar,1=SSE2,2=AVX2,3=AVX512)." )}, + {"translate_unicode_escapes", (PyCFunction)module_translate_unicode_escapes, METH_O, PyDoc_STR("Translate literal \\uXXXX/\\UXXXXXXXX escapes to PCRE2-compatible \\x{...} sequences." )}, {NULL, NULL, 0, NULL}, }; diff --git a/tests/test_core.py b/tests/test_core.py index c946ddf..d4b33c9 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -175,8 +175,7 @@ def fake_cached(pattern, flags, wrapper, *, jit): ("pattern", "expected"), [ ("\\u0041", "\\x{0041}"), - ("\\u{1F600}", "\\x{1F600}"), - ("\\U0001F600", "\\x{1F600}"), + ("\\U0001F600", "\\x{0001F600}"), ], ) def test_compile_converts_regex_compat_sequences(pattern, expected, monkeypatch): @@ -197,12 +196,44 @@ def fake_cached(pattern_value, flags, wrapper, *, jit): monkeypatch.setattr(core, "cached_compile", fake_cached) - compiled = core.compile(pattern, flags=Flag.COMPAT_REGEX) + compiled = core.compile(pattern, flags=Flag.COMPAT_UNICODE_ESCAPE) assert captured["pattern"] == expected assert compiled.pattern == expected +def test_compile_leaves_brace_style_unicode_escape(monkeypatch): + captured = {} + + def fake_cached(pattern_value, flags, wrapper, *, jit): + captured["pattern"] = pattern_value + fake_cpattern = types.SimpleNamespace( + pattern=pattern_value, + groupindex={}, + flags=flags, + match=MethodRecorder("match"), + search=MethodRecorder("search"), + fullmatch=MethodRecorder("fullmatch"), + jit=jit, + ) + return wrapper(fake_cpattern) + + monkeypatch.setattr(core, "cached_compile", fake_cached) + + compiled = core.compile("\\u{1F600}", flags=Flag.COMPAT_UNICODE_ESCAPE) + + assert captured["pattern"] == "\\u{1F600}" + assert compiled.pattern == "\\u{1F600}" + + +def test_compile_rejects_out_of_range_unicode_escape(): + with pytest.raises(core.PcreError) as excinfo: + core.compile("\\U00110000", flags=Flag.COMPAT_UNICODE_ESCAPE) + + message = str(excinfo.value) + assert "exceeds 0x10FFFF" in message + + def test_compile_uses_global_regex_compat(monkeypatch): captured = {}