diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8d88a0d6..73c49988 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -37,6 +37,9 @@ jobs: test-current-versions: name: Tests runs-on: ubuntu-latest + strategy: + matrix: + ext-parallel: [ '', 'parallel' ] steps: - name: Checkout code uses: actions/checkout@v4 @@ -45,10 +48,13 @@ jobs: - name: Setup PHP uses: shivammathur/setup-php@v2 + id: setup-php with: php-version: 8.3 coverage: xdebug - extensions: ast + extensions: ast, ${{ matrix.ext-parallel }} + env: + phpts: zts - name: Build project run: make build --no-print-directory @@ -56,9 +62,6 @@ jobs: - name: πŸ§ͺ PHPUnit Tests run: make test --no-print-directory - - name: πŸ‘ Code Quality - run: make codestyle --no-print-directory - - name: Uploading coverage to coveralls continue-on-error: true env: @@ -67,10 +70,14 @@ jobs: - name: SonarCloud Scan uses: SonarSource/sonarcloud-github-action@master + continue-on-error: true env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} + - name: πŸ‘ Code Quality + run: make codestyle --no-print-directory + - name: Upload Artifacts uses: actions/upload-artifact@v4 continue-on-error: true @@ -266,11 +273,25 @@ jobs: run: | ! $BLUEPRINT_DOCKER $CMD_VALIDATE $INVALID_TEST - - name: πŸ”₯ Quick Benchmark πŸ”₯ + - name: πŸ‘ Valid CSV files (Parallel) + run: $BLUEPRINT_DOCKER $CMD_VALIDATE $VALID_TEST --parallel + + - name: πŸ‘Ž Invalid CSV files (Parallel) + run: | + ! $BLUEPRINT_DOCKER $CMD_VALIDATE $INVALID_TEST --parallel + + - name: Prepare Benchmark + run: | + make build --no-print-directory + make bench-create-csv --no-print-directory + + - name: πŸ”₯ Benchmark (single thread) + run: | + ! make bench-docker-quick --no-print-directory + + - name: πŸ”₯ Benchmark (multi threads) run: | - make build > /dev/null 2>&1 - make bench-create-csv > /dev/null - make bench-docker-quick --no-print-directory + ! make bench-docker-quick-parallel --no-print-directory - name: Push Docker Image (master) uses: docker/build-push-action@v5 diff --git a/.gitignore b/.gitignore index 068ef66f..236d48a4 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ vendor phpunit.xml /docker/preload.php /docker/random_data.csv +/docker/included_files.php *.cache *.phar .version diff --git a/.phan.php b/.phan.php index 5fc6b5f2..7d8c295f 100644 --- a/.phan.php +++ b/.phan.php @@ -23,15 +23,21 @@ 'directory_list' => [ 'src', - 'vendor/jbzoo/ci-report-converter/src', - 'vendor/jbzoo/cli/src', 'vendor/jbzoo/data/src', + 'vendor/jbzoo/cli/src', 'vendor/jbzoo/utils/src', - 'vendor/league/csv/src', + 'vendor/jbzoo/ci-report-converter/src', + 'vendor/symfony/console', 'vendor/symfony/finder', 'vendor/symfony/yaml', + + 'vendor/league/csv/src', 'vendor/markrogoyski/math-php/src', 'vendor/respect/validation', + 'vendor/fidry/cpu-core-counter', + ], + 'autoload_internal_extension_signatures' => [ + 'parallel' => 'tests/stubs/parallel.stub_php', ], ]); diff --git a/.php-cs-fixer.php b/.php-cs-fixer.php index f79aa7e6..71e595b2 100644 --- a/.php-cs-fixer.php +++ b/.php-cs-fixer.php @@ -16,7 +16,20 @@ namespace JBZoo\Codestyle\PhpCsFixer; -return (new PhpCsFixerCodingStandard(__DIR__))->getFixerConfig(null, [ +use Symfony\Component\Finder\Finder; + +$finder = (new Finder()) + ->files() + ->followLinks() + ->ignoreVCS(true) + ->ignoreDotFiles(false) + ->in(__DIR__) + ->exclude('vendor') + ->exclude('docker') // preload.php! + ->exclude('build') + ->name('/\.php$/'); + +return (new PhpCsFixerCodingStandard(__DIR__))->getFixerConfig($finder, [ 'binary_operator_spaces' => [ 'operators' => [ '=' => 'single_space', diff --git a/Dockerfile b/Dockerfile index 30f8666c..b4aed754 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,37 +17,42 @@ COPY . /tmp RUN make build-version ######################################################################################## -FROM php:8.3-cli-alpine +FROM php:8.3-zts-alpine # Install PHP extensions ADD --chmod=0755 https://github.com/mlocati/docker-php-extension-installer/releases/latest/download/install-php-extensions /usr/local/bin/ -RUN install-php-extensions opcache @composer +RUN install-php-extensions opcache parallel @composer # Install application -# run `make build-version` before! WORKDIR /app ENV COMPOSER_ALLOW_SUPERUSER=1 COPY . /app COPY --from=preparatory /tmp/.version /app/.version -RUN composer install --no-dev \ - --classmap-authoritative \ - --no-progress \ - --no-suggest \ - --optimize-autoloader \ - && rm -rf ./.git \ - && composer clear-cache \ - && chmod +x ./csv-blueprint +RUN composer install --no-dev --classmap-authoritative --no-progress \ + && rm -rf ./.git \ + && composer clear-cache \ + && chmod +x ./csv-blueprint \ + && chmod +x ./docker/entrypoint.sh RUN mv "$PHP_INI_DIR/php.ini-production" "$PHP_INI_DIR/php.ini" COPY ./docker/php.ini /usr/local/etc/php/conf.d/docker-z99-php.ini -# Quick test -RUN time ./csv-blueprint --version --ansi \ - && time ./csv-blueprint validate:csv --help --ansi - # Warmup caches -#RUN php ./docker/build-preloader.php \ -# && php ./docker/preload.php \ +RUN php ./docker/random-csv.php \ + && JBZOO_BUILD_PRELOADER=1 \ + ./csv-blueprint validate:csv \ + --schema=/app/schema-examples/full.yml \ + --csv=/app/docker/random_data.csv \ + --apply-all=yes \ + --report=text --mute-errors | grep issues \ + && rm ./docker/random_data.csv \ + && php ./docker/build-preloader.php \ + && php ./docker/preload.php \ + && du -sh /app/docker # && echo "opcache.preload=/app/docker/preload.php" >> /usr/local/etc/php/conf.d/docker-z99-php.ini -ENTRYPOINT ["/app/csv-blueprint"] +# Quick test +RUN time ./csv-blueprint -V + +ENTRYPOINT ["/app/docker/entrypoint.sh"] +#ENTRYPOINT ["/app/csv-blueprint"] diff --git a/Makefile b/Makefile index b5f8db92..6a8a5525 100644 --- a/Makefile +++ b/Makefile @@ -111,9 +111,15 @@ bench-create-csv: ##@Benchmarks Create CSV file @time bash ./tests/Benchmarks/create-csv.sh @echo "::endgroup::" -bench-docker-quick: ##@Benchmarks Run CSV file with Docker (Quick) +bench-docker-quick: + $(call title,"Single thread mode") @docker run --rm $(DOCKER_IMAGE) --ansi --version - -$(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMAS_0) $(BENCH_FLAGS) + $(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMAS_0) $(BENCH_FLAGS) + +bench-docker-quick-parallel: + $(call title,"Multi-thread mode") + @docker run --rm $(DOCKER_IMAGE) --ansi --version + $(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMAS_0) $(BENCH_FLAGS) --parallel bench-docker: ##@Benchmarks Run CSV file with Docker @docker run --rm $(DOCKER_IMAGE) --ansi --version diff --git a/README.md b/README.md index e88d5fbc..9fa5964e 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ specifications, making it invaluable in scenarios where data quality and consist - [Usage](#usage) - [Schema definition](#schema-definition) - [Presets and reusable schemas](#presets-and-reusable-schemas) +- [Parallel processing](#parallel-processing) - [Complete CLI help message](#complete-cli-help-message) - [Report examples](#report-examples) - [Benchmarks](#benchmarks) @@ -160,14 +161,15 @@ You can find launch examples in the [workflow demo](https://github.com/JBZoo/Csv # Extra options for the CSV Blueprint. Only for debbuging and profiling. # Available options: - # ANSI output. You can disable ANSI colors if you want with `--no-ansi`. - # Verbosity level: Available options: `-v`, `-vv`, `-vvv`. - # Add flag `--profile` if you want to see profiling info. Add details with `-vvv`. - # Add flag `--debug` if you want to see more really deep details. + # Add flag `--parallel` if you want to validate CSV files in parallel. # Add flag `--dump-schema` if you want to see the final schema after all includes and inheritance. - # Default value: 'options: --ansi -v' + # Add flag `--debug` if you want to see more really deep details. + # Add flag `--profile` if you want to see profiling info. Add details with `-vvv`. + # Verbosity level: Available options: `-v`, `-vv`, `-vvv` + # ANSI output. You can disable ANSI colors if you want with `--no-ansi`. + # Default value: 'options: --ansi' # You can skip it. - extra: 'options: --ansi -v' + extra: 'options: --ansi' ``` @@ -1412,6 +1414,30 @@ columns: These are intended solely for demonstration and to illustrate potential configurations and features. +## Parallel processing + +The `--parallel` option is available for speeding up the validation of CSV files by utilizing more CPU resources +effectively. + +### Key Points + +- **Experimental Feature:** This feature is currently experimental and requires further debugging and testing. Although + it performs well in synthetic autotests and benchmarks. More practical use cases are needed to validate its stability. +- **Use Case:** This option is beneficial if you are processing dozens of CSV files, with each file taking 1 second or + more to process. +- **Default Behavior:** If you use `--parallel` without specifying a value, it defaults to using the maximum number of + available CPU cores. +- **Thread Pool Size:** You can set a specific number of threads for the pool. For example, `--parallel=10` will set the + thread pool size to 10. It doesn't make much sense to specify more than the number of logical cores in your CPU. +- **Disabling Parallelism:** Using `--parallel=1` disables parallel processing, which is the default setting if the + option is not specified. +- **Implementation:** The feature relies on the `ext-parallel` PHP extension, which enables the creation of lightweight + threads rather than processes. This extension is already included in our Docker image. Ensure that you have + the `ext-parallel` extension installed if you are not using our Docker image. This extension is crucial for the + operation of the parallel processing feature. The application always runs in single-threaded mode if the extension is + not installed. + + ## Complete CLI help message This section outlines all available options and commands provided by the tool, leveraging the JBZoo/Cli package for its @@ -1466,6 +1492,10 @@ Options: --debug Intended solely for debugging and advanced profiling purposes. Activating this option provides detailed process insights, useful for troubleshooting and performance analysis. + --parallel[=PARALLEL] EXPERIMENTAL! Launches the process in parallel mode (if possible). Works only with ext-parallel. + You can specify the number of threads. + If you do not specify a value, the number of threads will be equal to the number of CPU cores. + By default, the process is launched in a single-threaded mode. [default: "1"] --no-progress Disable progress bar animation for logs. It will be used only for text output format. --mute-errors Mute any sort of errors. So exit code will be always "0" (if it's possible). It has major priority then --non-zero-on-error. It's on your own risk! @@ -1522,6 +1552,10 @@ Options: --debug Intended solely for debugging and advanced profiling purposes. Activating this option provides detailed process insights, useful for troubleshooting and performance analysis. + --parallel[=PARALLEL] EXPERIMENTAL! Launches the process in parallel mode (if possible). Works only with ext-parallel. + You can specify the number of threads. + If you do not specify a value, the number of threads will be equal to the number of CPU cores. + By default, the process is launched in a single-threaded mode. [default: "1"] --no-progress Disable progress bar animation for logs. It will be used only for text output format. --mute-errors Mute any sort of errors. So exit code will be always "0" (if it's possible). It has major priority then --non-zero-on-error. It's on your own risk! diff --git a/action.yml b/action.yml index b5cfb985..9f971197 100644 --- a/action.yml +++ b/action.yml @@ -58,12 +58,13 @@ inputs: description: | Extra options for the CSV Blueprint. Only for debbuging and profiling. Available options: - ANSI output. You can disable ANSI colors if you want with `--no-ansi`. - Verbosity level: Available options: `-v`, `-vv`, `-vvv`. - Add flag `--profile` if you want to see profiling info. Add details with `-vvv`. - Add flag `--debug` if you want to see more really deep details. + Add flag `--parallel` if you want to validate CSV files in parallel. Add flag `--dump-schema` if you want to see the final schema after all includes and inheritance. - default: 'options: --ansi -v' + Add flag `--debug` if you want to see more really deep details. + Add flag `--profile` if you want to see profiling info. Add details with `-vvv`. + Verbosity level: Available options: `-v`, `-vv`, `-vvv` + ANSI output. You can disable ANSI colors if you want with `--no-ansi`. + default: 'options: --ansi' runs: using: 'docker' diff --git a/composer.json b/composer.json index 1e304018..57020b12 100644 --- a/composer.json +++ b/composer.json @@ -43,7 +43,8 @@ "respect/validation" : "^2.3.6", "giggsey/libphonenumber-for-php-lite" : "^8.13.34", "giggsey/locale" : "^2.5", - "symfony/polyfill-mbstring" : "^1.29.0" + "symfony/polyfill-mbstring" : "^1.29.0", + "fidry/cpu-core-counter" : "^1.1.0" }, "require-dev" : { diff --git a/composer.lock b/composer.lock index 792694dd..96d04770 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "69288ac6734744736672881ea230a32d", + "content-hash": "3d13c88792c56b5b98e15821829dbf0e", "packages": [ { "name": "bluepsyduck/symfony-process-manager", @@ -63,6 +63,67 @@ }, "time": "2021-12-03T21:30:28+00:00" }, + { + "name": "fidry/cpu-core-counter", + "version": "1.1.0", + "source": { + "type": "git", + "url": "https://github.com/theofidry/cpu-core-counter.git", + "reference": "f92996c4d5c1a696a6a970e20f7c4216200fcc42" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/theofidry/cpu-core-counter/zipball/f92996c4d5c1a696a6a970e20f7c4216200fcc42", + "reference": "f92996c4d5c1a696a6a970e20f7c4216200fcc42", + "shasum": "" + }, + "require": { + "php": "^7.2 || ^8.0" + }, + "require-dev": { + "fidry/makefile": "^0.2.0", + "fidry/php-cs-fixer-config": "^1.1.2", + "phpstan/extension-installer": "^1.2.0", + "phpstan/phpstan": "^1.9.2", + "phpstan/phpstan-deprecation-rules": "^1.0.0", + "phpstan/phpstan-phpunit": "^1.2.2", + "phpstan/phpstan-strict-rules": "^1.4.4", + "phpunit/phpunit": "^8.5.31 || ^9.5.26", + "webmozarts/strict-phpunit": "^7.5" + }, + "type": "library", + "autoload": { + "psr-4": { + "Fidry\\CpuCoreCounter\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "ThΓ©o FIDRY", + "email": "theo.fidry@gmail.com" + } + ], + "description": "Tiny utility to get the number of CPU cores.", + "keywords": [ + "CPU", + "core" + ], + "support": { + "issues": "https://github.com/theofidry/cpu-core-counter/issues", + "source": "https://github.com/theofidry/cpu-core-counter/tree/1.1.0" + }, + "funding": [ + { + "url": "https://github.com/theofidry", + "type": "github" + } + ], + "time": "2024-02-07T09:43:46+00:00" + }, { "name": "giggsey/libphonenumber-for-php-lite", "version": "8.13.34", @@ -2769,67 +2830,6 @@ }, "time": "2022-03-02T22:36:06+00:00" }, - { - "name": "fidry/cpu-core-counter", - "version": "1.1.0", - "source": { - "type": "git", - "url": "https://github.com/theofidry/cpu-core-counter.git", - "reference": "f92996c4d5c1a696a6a970e20f7c4216200fcc42" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/theofidry/cpu-core-counter/zipball/f92996c4d5c1a696a6a970e20f7c4216200fcc42", - "reference": "f92996c4d5c1a696a6a970e20f7c4216200fcc42", - "shasum": "" - }, - "require": { - "php": "^7.2 || ^8.0" - }, - "require-dev": { - "fidry/makefile": "^0.2.0", - "fidry/php-cs-fixer-config": "^1.1.2", - "phpstan/extension-installer": "^1.2.0", - "phpstan/phpstan": "^1.9.2", - "phpstan/phpstan-deprecation-rules": "^1.0.0", - "phpstan/phpstan-phpunit": "^1.2.2", - "phpstan/phpstan-strict-rules": "^1.4.4", - "phpunit/phpunit": "^8.5.31 || ^9.5.26", - "webmozarts/strict-phpunit": "^7.5" - }, - "type": "library", - "autoload": { - "psr-4": { - "Fidry\\CpuCoreCounter\\": "src/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "ThΓ©o FIDRY", - "email": "theo.fidry@gmail.com" - } - ], - "description": "Tiny utility to get the number of CPU cores.", - "keywords": [ - "CPU", - "core" - ], - "support": { - "issues": "https://github.com/theofidry/cpu-core-counter/issues", - "source": "https://github.com/theofidry/cpu-core-counter/tree/1.1.0" - }, - "funding": [ - { - "url": "https://github.com/theofidry", - "type": "github" - } - ], - "time": "2024-02-07T09:43:46+00:00" - }, { "name": "friendsofphp/php-cs-fixer", "version": "v3.53.0", @@ -5462,12 +5462,12 @@ "source": { "type": "git", "url": "https://github.com/Roave/SecurityAdvisories.git", - "reference": "00077527dfa5415280a28a044eca385eb3feb7ee" + "reference": "31f373849a62ccfe23cba594e91b488e3ec2270b" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/Roave/SecurityAdvisories/zipball/00077527dfa5415280a28a044eca385eb3feb7ee", - "reference": "00077527dfa5415280a28a044eca385eb3feb7ee", + "url": "https://api.github.com/repos/Roave/SecurityAdvisories/zipball/31f373849a62ccfe23cba594e91b488e3ec2270b", + "reference": "31f373849a62ccfe23cba594e91b488e3ec2270b", "shasum": "" }, "conflict": { @@ -5554,9 +5554,10 @@ "concrete5/concrete5": "<9.2.8", "concrete5/core": "<8.5.8|>=9,<9.1", "contao-components/mediaelement": ">=2.14.2,<2.21.1", + "contao/comments-bundle": ">=2,<4.13.40|>=5.0.0.0-RC1-dev,<5.3.4", "contao/contao": ">=4,<4.4.56|>=4.5,<4.9.40|>=4.10,<4.11.7|>=4.13,<4.13.21|>=5.1,<5.1.4", "contao/core": ">=2,<3.5.39", - "contao/core-bundle": ">=3,<3.5.35|>=4,<4.9.42|>=4.10,<4.13.28|>=5,<5.1.10", + "contao/core-bundle": "<4.13.40|>=5,<5.3.4", "contao/listing-bundle": ">=4,<4.4.8", "contao/managed-edition": "<=1.5", "corveda/phpsandbox": "<1.3.5", @@ -6199,7 +6200,7 @@ "type": "tidelift" } ], - "time": "2024-04-08T16:06:11+00:00" + "time": "2024-04-09T19:04:27+00:00" }, { "name": "sabre/event", diff --git a/csv-blueprint.php b/csv-blueprint.php index 315221d1..6962ed5c 100644 --- a/csv-blueprint.php +++ b/csv-blueprint.php @@ -16,26 +16,26 @@ namespace JBZoo\CsvBlueprint; +use JBZoo\CsvBlueprint\Workers\WorkerPool; + \define('PATH_ROOT', __DIR__); -require_once __DIR__ . '/vendor/autoload.php'; +require_once PATH_ROOT . '/vendor/autoload.php'; if ('cli' !== \PHP_SAPI) { throw new Exception('This script must be run from the command line.'); } +WorkerPool::setBootstrap( + \file_exists(PATH_ROOT . '/docker/preload.php') + ? PATH_ROOT . '/docker/preload.php' + : PATH_ROOT . '/vendor/autoload.php', +); + // Fix for GitHub actions. See action.yml $_SERVER['argv'] = Utils::fixArgv($_SERVER['argv'] ?? []); $_SERVER['argc'] = \count($_SERVER['argv']); -// Set default timezone -\date_default_timezone_set('UTC'); - -// Convert all errors to exceptions. Looks like we have critical case, and we need to stop or handle it. -// We have to do it becase tool uses 3rd-party libraries, and we can't trust them. -// So, we need to catch all errors and handle them. -\set_error_handler(static function ($severity, $message, $file, $line): void { - throw new Exception($message, 0, $severity, $file, $line); -}); +Utils::init(); (new CliApplication('CSV Blueprint', Utils::getVersion(true))) ->registerCommandsByPath(PATH_ROOT . '/src/Commands', __NAMESPACE__) diff --git a/docker/build-preloader.php b/docker/build-preloader.php index 6e57dc7c..562749db 100644 --- a/docker/build-preloader.php +++ b/docker/build-preloader.php @@ -14,54 +14,38 @@ declare(strict_types=1); -$classes = include_once __DIR__ . '/../vendor/composer/autoload_classmap.php'; +$files = include_once __DIR__ . '/included_files.php'; $header = <<<'TEXT' filePath, 'w'); + + \fputcsv($fileHandle, $this->columns); + + for ($i = 0; $i < $this->rows; $i++) { + $rowData = []; + + foreach (\array_keys($this->columns) as $columnIndex) { + $rowData[$columnIndex] = \random_int(1, 10000); + } + + \fputcsv($fileHandle, $rowData); + } + + \fclose($fileHandle); + + echo "CSV file created: {$this->filePath}.\n"; + } +} + +(new CsvGenerator( + 1000, + __DIR__ . '/random_data.csv', + ['Column Name (header)', 'another_column', 'inherited_column_login', 'inherited_column_full_name'], +))->generateCsv(); diff --git a/phpunit.xml.dist b/phpunit.xml.dist index 5048883a..65ec38a2 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -31,13 +31,8 @@ - + - diff --git a/psalm.xml b/psalm.xml index 2fecab37..83498d58 100644 --- a/psalm.xml +++ b/psalm.xml @@ -35,4 +35,7 @@ + + + diff --git a/src/CliApplication.php b/src/CliApplication.php index 9aef3a6f..9236e640 100644 --- a/src/CliApplication.php +++ b/src/CliApplication.php @@ -16,7 +16,7 @@ namespace JBZoo\CsvBlueprint; -class CliApplication extends \JBZoo\Cli\CliApplication +final class CliApplication extends \JBZoo\Cli\CliApplication { private array $appLogo = [ ' __________ __ ___ __ _ __ ', diff --git a/src/Commands/AbstractValidate.php b/src/Commands/AbstractValidate.php index 8f95a9c7..d093cb09 100644 --- a/src/Commands/AbstractValidate.php +++ b/src/Commands/AbstractValidate.php @@ -20,9 +20,12 @@ use JBZoo\CsvBlueprint\Schema; use JBZoo\CsvBlueprint\Utils; use JBZoo\CsvBlueprint\Validators\ErrorSuite; +use JBZoo\CsvBlueprint\Workers\WorkerPool; +use JBZoo\Utils\Env; use Symfony\Component\Console\Input\InputOption; use Symfony\Component\Finder\SplFileInfo; +use function JBZoo\Data\phpArray; use function JBZoo\Utils\bool; /** @@ -75,6 +78,18 @@ protected function configure(): void 'Activating this option provides detailed process insights,', 'useful for troubleshooting and performance analysis.', ]), + ) + ->addOption( + 'parallel', + null, + InputOption::VALUE_OPTIONAL, + \implode("\n", [ + 'EXPERIMENTAL! Launches the process in parallel mode (if possible). Works only with ext-parallel.', + 'You can specify the number of threads.', + 'If you do not specify a value, the number of threads will be equal to the number of CPU cores.', + 'By default, the process is launched in a single-threaded mode.', + ]), + '1', ); parent::configure(); @@ -86,9 +101,16 @@ protected function preparation(): void $this->_('CSV Blueprint: ' . Utils::getVersion(true)); } - if ($this->getOptBool('debug')) { - \define('DEBUG_MODE', true); + $threads = $this->getNumberOfThreads(); + if ($threads !== 1) { + $this->_( + $threads > 0 + ? "Parallel mode: {$threads} threads" + : 'Parallel mode: ' . WorkerPool::getCpuCount() . ' threads (auto)', + ); } + + Utils::setDebugMode($this->getOptBool('debug')); } protected function isHumanReadableMode(): bool @@ -109,6 +131,16 @@ protected function isQuickMode(): bool return $value === '' || bool($value); } + protected function getNumberOfThreads(): int + { + $threads = \trim($this->getOptString('parallel')); + if ($threads === '') { + return 0; // auto + } + + return $this->getOptInt('parallel'); // custom threads number + } + /** * @return SplFileInfo[] */ @@ -166,22 +198,37 @@ protected function renderIssues(string $prefix, int $number, string $filepath, i $this->out("{$prefix}{$number} {$issues} in {$filepath}", $indent); } - protected function printDumpOfSchema(?Schema $schema): void + protected function printDumpOfSchema(?string $schemaFilename): void { - if ($schema === null) { + if ($schemaFilename === null) { return; } - $dump = $schema->dumpAsYamlString(); - $dump = \preg_replace('/^([ \t]*)([^:\n]+:)/m', '$1$2', $dump); if ($this->getOptBool('dump-schema')) { + $filename = Utils::cutPath($schemaFilename); + + try { + $schema = new Schema($schemaFilename); + $dump = $schema->dumpAsYamlString(); + $dump = \preg_replace('/^([ \t]*)([^:\n]+:)/m', '$1$2', $dump); + } catch (\Throwable $e) { + $dump = 'Unable to parse schema file: ' . $e->getMessage(); + } + $this->_('```yaml'); - $this->_("# File: {$schema->getFilename()}"); + $this->_("# File: {$filename}"); $this->_($dump); $this->_('```'); } } + protected static function dumpPreloader(): void + { + if (Env::bool('JBZOO_BUILD_PRELOADER')) { + \file_put_contents(__DIR__ . '/../../docker/included_files.php', (string)phpArray(\get_included_files())); + } + } + protected static function renderPrefix(int $index, int $totalFiles): string { if ($totalFiles <= 1) { diff --git a/src/Commands/Exception.php b/src/Commands/Exception.php index f4b84a02..1d45ab73 100644 --- a/src/Commands/Exception.php +++ b/src/Commands/Exception.php @@ -16,6 +16,6 @@ namespace JBZoo\CsvBlueprint\Commands; -class Exception extends \JBZoo\CsvBlueprint\Exception +final class Exception extends \JBZoo\CsvBlueprint\Exception { } diff --git a/src/Commands/ValidateCsv.php b/src/Commands/ValidateCsv.php index 9cdbcea8..f6e91343 100644 --- a/src/Commands/ValidateCsv.php +++ b/src/Commands/ValidateCsv.php @@ -16,10 +16,11 @@ namespace JBZoo\CsvBlueprint\Commands; -use JBZoo\CsvBlueprint\Csv\CsvFile; use JBZoo\CsvBlueprint\Schema; use JBZoo\CsvBlueprint\Utils; use JBZoo\CsvBlueprint\Validators\ErrorSuite; +use JBZoo\CsvBlueprint\Workers\Tasks\ValidationCsvTask; +use JBZoo\CsvBlueprint\Workers\WorkerPool; use Symfony\Component\Console\Input\InputOption; use Symfony\Component\Finder\SplFileInfo; @@ -117,7 +118,7 @@ protected function executeAction(): int [$invalidFiles, $errorInCsvCounter] = $this->validateCsvFiles($matchedFiles); - return $this->printSummary( + $exitCode = $this->printSummary( \count($csvFilenames), \count($schemaFilenames), $invalidFiles, @@ -125,6 +126,10 @@ protected function executeAction(): int $errorInSchemaCounter, $matchedFiles, ); + + self::dumpPreloader(); // Experimental feature + + return $exitCode; } /** @@ -151,8 +156,6 @@ private function validateSchemas(array $schemaFilenames): int continue; } - $schema = null; - try { $schema = new Schema($schemaFilename->getPathname()); $schemaErrors = $schema->validate($quickCheck); @@ -170,7 +173,7 @@ private function validateSchemas(array $schemaFilenames): int "{$prefix}Exception: {$e->getMessage()}", ], 2); } - $this->printDumpOfSchema($schema); + $this->printDumpOfSchema($schemaFilename->getPathname()); } $this->out(''); @@ -184,44 +187,67 @@ private function validateCsvFiles(array $matchedFiles): array $totalFiles = $matchedFiles['count_pairs']; $invalidFiles = 0; $errorCounter = 0; - $errorSuite = null; $quickCheck = $this->isQuickMode(); + $workerPool = new WorkerPool($this->getNumberOfThreads()); + foreach ($matchedFiles['found_pairs'] as $schema => $csvs) { + foreach ($csvs as $csv) { + $workerPool->addTask("{$csv} => {$schema}", ValidationCsvTask::class, [$csv, $schema, $quickCheck]); + } + } + $this->out("CSV file validation: {$totalFiles}"); $index = 0; - $isFirst = true; - foreach ($matchedFiles['found_pairs'] as $schema => $csvs) { - if ($isFirst) { - $isFirst = false; + $currentSchemaFilename = null; + + $exectionCallback = function ( + string $pair, + ErrorSuite $errorSuite, + ) use ( + &$index, + &$currentSchemaFilename, + &$invalidFiles, + &$errorCounter, + $totalFiles, + $quickCheck + ): void { + $index++; + $filesAsKey = \explode(' => ', $pair, 2); + if (\count($filesAsKey) > 1) { + [$csvFilename, $schemaFilename] = $filesAsKey; } else { - $this->out(''); // Add empty line between schema files + throw new Exception("Invalid pair: {$pair}"); } - $this->out('Schema: ' . Utils::printFile($schema)); - foreach ($csvs as $csv) { - $index++; - $prefix = AbstractValidate::renderPrefix($index, $totalFiles); - - $currentCsvTitle = Utils::printFile($csv, 'blue') . '; Size: ' . Utils::getFileSize($csv); - if ($quickCheck && $errorSuite !== null && $errorSuite->count() > 0) { - $this->out("Skipped (Quick mode) {$currentCsvTitle}", 2); - continue; + if ($currentSchemaFilename !== $schemaFilename) { + $currentSchemaFilename = $schemaFilename; + if ($index !== 1) { // Add empty line between schema files + $this->out(''); } + $this->out('Schema: ' . Utils::printFile($schemaFilename)); + } + + $prefix = AbstractValidate::renderPrefix($index, $totalFiles); + $currentCsvTitle = Utils::printFile($csvFilename, 'blue') . '; Size: ' . Utils::getFileSize($csvFilename); - $errorSuite = (new CsvFile($csv, $schema))->validate($quickCheck); + if ($quickCheck && $errorSuite->count() > 0) { + $this->out("Skipped (Quick mode) {$currentCsvTitle}", 2); + return; + } - if ($errorSuite->count() > 0) { - $invalidFiles++; - $errorCounter += $errorSuite->count(); + if ($errorSuite->count() > 0) { + $invalidFiles++; + $errorCounter += $errorSuite->count(); - $this->renderIssues($prefix, $errorSuite->count(), $currentCsvTitle, 2); - $this->outReport($errorSuite, 4); - } else { - $this->out("{$prefix}OK {$currentCsvTitle}", 2); - } + $this->renderIssues($prefix, $errorSuite->count(), $currentCsvTitle, 2); + $this->outReport($errorSuite, 4); + } else { + $this->out("{$prefix}OK {$currentCsvTitle}", 2); } - } + }; + + $workerPool->run($exectionCallback); return [$invalidFiles, $errorCounter]; } @@ -261,7 +287,7 @@ private function printSummary( if ($errorInSchemaCounter > 0) { $this->out("Found {$errorInSchemaCounter} issues in {$totalSchemaFiles} schemas.", $indent); - } else { + } elseif (!$this->isQuickMode()) { $this->out("No issues in {$totalSchemaFiles} schemas.", $indent); } @@ -271,7 +297,7 @@ private function printSummary( "out of {$totalCsvFiles} CSV files.", $indent, ); - } else { + } elseif (!$this->isQuickMode()) { $this->out("No issues in {$totalCsvFiles} CSV files.", $indent); } diff --git a/src/Commands/ValidateSchema.php b/src/Commands/ValidateSchema.php index 834c4458..db58244b 100644 --- a/src/Commands/ValidateSchema.php +++ b/src/Commands/ValidateSchema.php @@ -16,12 +16,11 @@ namespace JBZoo\CsvBlueprint\Commands; -use JBZoo\CsvBlueprint\Schema; use JBZoo\CsvBlueprint\Utils; -use JBZoo\CsvBlueprint\Validators\Error; use JBZoo\CsvBlueprint\Validators\ErrorSuite; +use JBZoo\CsvBlueprint\Workers\Tasks\ValidationSchemaTask; +use JBZoo\CsvBlueprint\Workers\WorkerPool; use Symfony\Component\Console\Input\InputOption; -use Symfony\Component\Yaml\Exception\ParseException; /** * @psalm-suppress PropertyNotSetInConstructor @@ -65,34 +64,34 @@ protected function executeAction(): int $this->out("Found schemas: {$totalFiles}"); $this->out(''); + $workerPool = new WorkerPool($this->getNumberOfThreads()); + foreach ($schemas as $schema) { + $filename = (string)$schema->getRealPath(); + $workerPool->addTask($filename, ValidationSchemaTask::class, [$filename]); + } + $foundIssues = 0; $index = 0; - foreach ($this->findFiles('schema') as $file) { - $index++; - $prefix = self::renderPrefix($index, $totalFiles); - $filename = (string)$file->getRealPath(); - $coloredPath = Utils::printFile($filename); - $schemaErrors = new ErrorSuite($filename); - - try { - $schema = new Schema($filename); - $schemaErrors = $schema->validate($this->isQuickMode()); - $this->printDumpOfSchema(new Schema($filename)); - } catch (ParseException $e) { - $schemaErrors->addError(new Error('schema.syntax', $e->getMessage(), '', $e->getParsedLine())); - } catch (\Throwable $e) { - $schemaErrors->addError(new Error('schema.error', $e->getMessage())); - } - - if ($schemaErrors->count() > 0) { - $this->renderIssues($prefix, $schemaErrors->count(), $coloredPath); - $this->outReport($schemaErrors, 2); - } else { - $this->out("{$prefix}OK {$coloredPath}"); - } - - $foundIssues += $schemaErrors->count(); - } + $workerPool->run( + function (string $filename, ErrorSuite $schemaErrors) use (&$index, &$foundIssues, $totalFiles): void { + $index++; + $prefix = self::renderPrefix($index, $totalFiles); + $coloredPath = Utils::printFile($filename); + + if ($schemaErrors->count() > 0) { + $this->renderIssues($prefix, $schemaErrors->count(), $coloredPath); + $this->outReport($schemaErrors, 2); + } else { + $this->out("{$prefix}OK {$coloredPath}"); + } + + $this->printDumpOfSchema($filename); + + $foundIssues += $schemaErrors->count(); + }, + ); + + self::dumpPreloader(); return $foundIssues === 0 ? self::SUCCESS : self::FAILURE; } diff --git a/src/Csv/Exception.php b/src/Csv/Exception.php index 2e1cca41..8f983161 100644 --- a/src/Csv/Exception.php +++ b/src/Csv/Exception.php @@ -16,6 +16,6 @@ namespace JBZoo\CsvBlueprint\Csv; -class Exception extends \JBZoo\CsvBlueprint\Exception +final class Exception extends \JBZoo\CsvBlueprint\Exception { } diff --git a/src/Rules/Aggregate/Exception.php b/src/Rules/Aggregate/Exception.php index 0657c8d9..361731d3 100644 --- a/src/Rules/Aggregate/Exception.php +++ b/src/Rules/Aggregate/Exception.php @@ -16,6 +16,6 @@ namespace JBZoo\CsvBlueprint\Rules\Aggregate; -class Exception extends \JBZoo\CsvBlueprint\Rules\Exception +final class Exception extends \JBZoo\CsvBlueprint\Rules\Exception { } diff --git a/src/Rules/Cell/CountryCode.php b/src/Rules/Cell/CountryCode.php index 5e6662ac..168f5f1d 100644 --- a/src/Rules/Cell/CountryCode.php +++ b/src/Rules/Cell/CountryCode.php @@ -20,7 +20,7 @@ use Respect\Validation\Rules\CountryCode as RespectCountryCode; use Respect\Validation\Validator; -class CountryCode extends AbstractCellRule +final class CountryCode extends AbstractCellRule { public function getHelpMeta(): array { diff --git a/src/Rules/Cell/Exception.php b/src/Rules/Cell/Exception.php index a7898c35..79ce7a76 100644 --- a/src/Rules/Cell/Exception.php +++ b/src/Rules/Cell/Exception.php @@ -16,6 +16,6 @@ namespace JBZoo\CsvBlueprint\Rules\Cell; -class Exception extends \JBZoo\CsvBlueprint\Rules\Exception +final class Exception extends \JBZoo\CsvBlueprint\Rules\Exception { } diff --git a/src/Rules/Cell/Hash.php b/src/Rules/Cell/Hash.php index 5edce141..34b921df 100644 --- a/src/Rules/Cell/Hash.php +++ b/src/Rules/Cell/Hash.php @@ -16,7 +16,7 @@ namespace JBZoo\CsvBlueprint\Rules\Cell; -class Hash extends AbstractCellRule +final class Hash extends AbstractCellRule { public function getHelpMeta(): array { diff --git a/src/Rules/Cell/IsDate.php b/src/Rules/Cell/IsDate.php index 8d3955f3..44a4f9b1 100644 --- a/src/Rules/Cell/IsDate.php +++ b/src/Rules/Cell/IsDate.php @@ -16,7 +16,7 @@ namespace JBZoo\CsvBlueprint\Rules\Cell; -class IsDate extends AbstractCellRule +final class IsDate extends AbstractCellRule { public function getHelpMeta(): array { diff --git a/src/Rules/Cell/IsGeohash.php b/src/Rules/Cell/IsGeohash.php index 648a6598..1813db7e 100644 --- a/src/Rules/Cell/IsGeohash.php +++ b/src/Rules/Cell/IsGeohash.php @@ -18,7 +18,7 @@ use JBZoo\CsvBlueprint\Utils; -class IsGeohash extends AbstractCellRule +final class IsGeohash extends AbstractCellRule { public function getHelpMeta(): array { diff --git a/src/Rules/Cell/IsLeapYear.php b/src/Rules/Cell/IsLeapYear.php index 01ab2bbc..c91afcf8 100644 --- a/src/Rules/Cell/IsLeapYear.php +++ b/src/Rules/Cell/IsLeapYear.php @@ -18,7 +18,7 @@ use Respect\Validation\Validator; -class IsLeapYear extends AbstractCellRule +final class IsLeapYear extends AbstractCellRule { public function getHelpMeta(): array { diff --git a/src/Rules/Cell/IsTime.php b/src/Rules/Cell/IsTime.php index d19b152d..7b720ce8 100644 --- a/src/Rules/Cell/IsTime.php +++ b/src/Rules/Cell/IsTime.php @@ -16,7 +16,7 @@ namespace JBZoo\CsvBlueprint\Rules\Cell; -class IsTime extends AbstractCellRule +final class IsTime extends AbstractCellRule { public function getHelpMeta(): array { diff --git a/src/Rules/Cell/IsTimezone.php b/src/Rules/Cell/IsTimezone.php index 775746ec..7ca5e980 100644 --- a/src/Rules/Cell/IsTimezone.php +++ b/src/Rules/Cell/IsTimezone.php @@ -16,7 +16,7 @@ namespace JBZoo\CsvBlueprint\Rules\Cell; -class IsTimezone extends AbstractCellRule +final class IsTimezone extends AbstractCellRule { public function getHelpMeta(): array { diff --git a/src/Rules/Cell/IsTimezoneOffset.php b/src/Rules/Cell/IsTimezoneOffset.php index d9776b59..3b48642d 100644 --- a/src/Rules/Cell/IsTimezoneOffset.php +++ b/src/Rules/Cell/IsTimezoneOffset.php @@ -18,7 +18,7 @@ use JBZoo\CsvBlueprint\Utils; -class IsTimezoneOffset extends AbstractCellRule +final class IsTimezoneOffset extends AbstractCellRule { public function getHelpMeta(): array { diff --git a/src/Rules/Cell/LanguageCode.php b/src/Rules/Cell/LanguageCode.php index fd2a8da7..2f010642 100644 --- a/src/Rules/Cell/LanguageCode.php +++ b/src/Rules/Cell/LanguageCode.php @@ -20,7 +20,7 @@ use Respect\Validation\Rules\LanguageCode as RespectLanguageCode; use Respect\Validation\Validator; -class LanguageCode extends AbstractCellRule +final class LanguageCode extends AbstractCellRule { public function getHelpMeta(): array { diff --git a/src/Rules/Cell/NotAllowValues.php b/src/Rules/Cell/NotAllowValues.php index bb786bba..58105f0d 100644 --- a/src/Rules/Cell/NotAllowValues.php +++ b/src/Rules/Cell/NotAllowValues.php @@ -16,7 +16,7 @@ namespace JBZoo\CsvBlueprint\Rules\Cell; -class NotAllowValues extends AbstractCellRule +final class NotAllowValues extends AbstractCellRule { public function getHelpMeta(): array { diff --git a/src/Rules/Cell/PostalCode.php b/src/Rules/Cell/PostalCode.php index 5c38992f..69747cdf 100644 --- a/src/Rules/Cell/PostalCode.php +++ b/src/Rules/Cell/PostalCode.php @@ -18,7 +18,7 @@ use Respect\Validation\Validator; -class PostalCode extends AbstractCellRule +final class PostalCode extends AbstractCellRule { public function getHelpMeta(): array { diff --git a/src/Utils.php b/src/Utils.php index c37bb99e..3cd3de1b 100644 --- a/src/Utils.php +++ b/src/Utils.php @@ -29,6 +29,8 @@ final class Utils { public const MAX_DIRECTORY_DEPTH = 10; + private static bool $debugMode = false; + public static function isArrayInOrder(array $array, array $correctOrder): bool { $orderIndex = 0; @@ -70,16 +72,20 @@ public static function printList(null|array|bool|float|int|string $items, string return "[\"<{$color}>" . \implode("\", \"<{$color}>", $items) . "\"]"; } - public static function debug(int|string $message): void + public static function debug(string $message): void { - if (\defined('DEBUG_MODE')) { - cli($message); + if (self::$debugMode) { + try { + cli($message); + } catch (\Throwable) { + Cli::out(\strip_tags($message)); + } } } public static function debugSpeed(string $messPrefix, int $lines, float $startTimer): void { - if (\defined('DEBUG_MODE')) { + if (self::$debugMode) { $kiloLines = \round(($lines / (\microtime(true) - $startTimer)) / 1000); self::debug("{$messPrefix} " . \number_format($kiloLines) . 'K lines/sec'); } @@ -292,15 +298,7 @@ public static function testRegex(?string $regex, string $subject): bool return false; } - try { - if (\preg_match($regex, $subject) === 0) { - return true; - } - } catch (\Throwable) { - return false; - } - - return false; + return \preg_match($regex, $subject) === 0; } /** @@ -484,6 +482,35 @@ public static function mergeConfigs(array ...$configs): array return $merged; } + public static function setDebugMode(bool $debugMode): void + { + self::$debugMode = $debugMode; + } + + public static function getDebugMode(): bool + { + return self::$debugMode; + } + + public static function init(): void + { + // Set default timezone + \date_default_timezone_set('UTC'); + + // Convert all errors to exceptions. Looks like we have critical case, and we need to stop or handle it. + // We have to do it becase tool uses 3rd-party libraries, and we can't trust them. + // So, we need to catch all errors and handle them. + \set_error_handler(static function (int $severity, string $message, string $file, int $line): bool { + $severity = match ($severity) { + \E_ERROR, \E_CORE_ERROR, \E_COMPILE_ERROR, \E_USER_ERROR => 'Error', + \E_WARNING, \E_CORE_WARNING, \E_COMPILE_WARNING, \E_USER_WARNING => 'Warning', + \E_NOTICE, \E_USER_NOTICE => 'Notice', + default => 'Unknown', + }; + throw new Exception("Unexpected {$severity}: \"{$message}\" in file \"{$file}:{$line}\""); + }); + } + /** * @param SplFileInfo[] $files */ diff --git a/src/Validators/Exception.php b/src/Validators/Exception.php index 46a595ca..b9bc415b 100644 --- a/src/Validators/Exception.php +++ b/src/Validators/Exception.php @@ -16,6 +16,6 @@ namespace JBZoo\CsvBlueprint\Validators; -class Exception extends \JBZoo\CsvBlueprint\Exception +final class Exception extends \JBZoo\CsvBlueprint\Exception { } diff --git a/src/Workers/Tasks/AbstractTask.php b/src/Workers/Tasks/AbstractTask.php new file mode 100644 index 00000000..2dce2b54 --- /dev/null +++ b/src/Workers/Tasks/AbstractTask.php @@ -0,0 +1,22 @@ +csvFilename, $this->schemaFilename))->validate($this->isQuickMode); + } +} diff --git a/src/Workers/Tasks/ValidationSchemaTask.php b/src/Workers/Tasks/ValidationSchemaTask.php new file mode 100644 index 00000000..efd34e35 --- /dev/null +++ b/src/Workers/Tasks/ValidationSchemaTask.php @@ -0,0 +1,47 @@ +schemaFilename); + + try { + $schema = new Schema($this->schemaFilename); + $schemaErrors = $schema->validate($this->isQuickMode); + } catch (ParseException $e) { + $schemaErrors->addError(new Error('schema.syntax', $e->getMessage(), '', $e->getParsedLine())); + } catch (\Throwable $e) { + $schemaErrors->addError(new Error('schema.error', $e->getMessage())); + } + + return $schemaErrors; + } +} diff --git a/src/Workers/Worker.php b/src/Workers/Worker.php new file mode 100644 index 00000000..28f5351d --- /dev/null +++ b/src/Workers/Worker.php @@ -0,0 +1,59 @@ +key; + } + + public function execute(): mixed + { + $className = $this->className; + if (\class_exists($className) === false) { + throw new \InvalidArgumentException("Class '{$className}' not found"); + } + + $task = new $className(...$this->arguments); + if (!$task instanceof AbstractTask) { + throw new \InvalidArgumentException("Class '{$className}' is not allowed"); + } + + return $task->process(); + } + + public function getClass(): string + { + return $this->className; + } + + public function getArguments(): array + { + return $this->arguments; + } +} diff --git a/src/Workers/WorkerPool.php b/src/Workers/WorkerPool.php new file mode 100644 index 00000000..d9fe5a5d --- /dev/null +++ b/src/Workers/WorkerPool.php @@ -0,0 +1,151 @@ +maxThreads = $maxThreads === 0 ? self::getCpuCount() : $maxThreads; + $this->tasksQueue = new \SplQueue(); + } + + public function getMaxThreads(): int + { + return $this->maxThreads; + } + + public function addTask(string $key, string $taskClass, array $arguments = []): void + { + $this->tasksQueue->enqueue(new Worker($key, $taskClass, $arguments)); + } + + public function run(?\Closure $callback = null): array + { + return $this->isParallel() ? $this->runInParallel($callback) : $this->runSequentially($callback); + } + + public function isParallel(): bool + { + return $this->getMaxThreads() > 1 && self::extLoaded(); + } + + public static function extLoaded(): bool + { + return \extension_loaded('parallel'); + } + + public static function setBootstrap(string $autoloader): void + { + if (self::extLoaded() && self::$bootstrap === null) { + $realpath = \realpath($autoloader); + if ($realpath !== false) { + self::$bootstrap = $realpath; + // \parallel\bootstrap($autoloader); // Hm... Does it work? + } + } + } + + public static function getCpuCount(): int + { + try { + return (new CpuCoreCounter())->getCount(); + } catch (\Throwable) { + return self::FALLBACK_CPU_COUNT; + } + } + + private function runSequentially(?\Closure $callback = null): array + { + $results = []; + + while (!$this->tasksQueue->isEmpty()) { + /** @var Worker $worker */ + $worker = $this->tasksQueue->dequeue(); + + if ($callback !== null) { + $callback($worker->getKey(), $worker->execute()); + } else { + $results[$worker->getKey()] = $worker->execute(); + } + } + + return $results; + } + + private function runInParallel(?\Closure $callback = null): array + { + $results = []; + + while (!$this->tasksQueue->isEmpty() || \count($this->runningTasks) > 0) { + $this->maintainTaskPool(); + + foreach ($this->runningTasks as $index => $future) { + if ($future !== null && $future->done()) { + if ($callback !== null) { + $callback($index, $future->value()); + } else { + $results[$index] = $future->value(); + } + unset($this->runningTasks[$index]); + } + } + + \usleep(self::POOL_MAINTENANCE_DELAY); + } + + return $results; + } + + private function maintainTaskPool(): void + { + $bootstrap = self::$bootstrap; + if ($bootstrap === null) { + throw new Exception('Bootstrap file is not set'); + } + + while (\count($this->runningTasks) < $this->maxThreads && !$this->tasksQueue->isEmpty()) { + /** @var Worker $worker */ + $worker = $this->tasksQueue->dequeue(); + $runtime = new Runtime($bootstrap); + $future = $runtime->run( + static function (string $key, string $class, array $args, bool $debugMode): mixed { + Utils::init(); + Utils::setDebugMode($debugMode); + return (new Worker($key, $class, $args))->execute(); + }, + [$worker->getKey(), $worker->getClass(), $worker->getArguments(), Utils::getDebugMode()], + ); + + $this->runningTasks[$worker->getKey()] = $future; + } + } +} diff --git a/tests/Commands/ValidateCsvQuickTest.php b/tests/Commands/ValidateCsvQuickTest.php index d9235acd..6548806c 100644 --- a/tests/Commands/ValidateCsvQuickTest.php +++ b/tests/Commands/ValidateCsvQuickTest.php @@ -42,16 +42,13 @@ public function testEnabled(): void CSV file validation: 3 Schema: ./tests/schemas/demo_invalid.yml - (1/3) 1 issue in ./tests/fixtures/batch/demo-1.csv; Size: 123.34 MB - "allow_extra_columns" at line 1. Column(s) not found in CSV: "wrong_column_name". - + Skipped (Quick mode) ./tests/fixtures/batch/demo-1.csv; Size: 123.34 MB Skipped (Quick mode) ./tests/fixtures/batch/demo-2.csv; Size: 123.34 MB Skipped (Quick mode) ./tests/fixtures/batch/sub/demo-3.csv; Size: 123.34 MB Summary: 3 pairs (schema to csv) were found based on `filename_pattern`. Found 1 issues in 1 schemas. - Found 1 issues in 1 out of 3 CSV files. TXT; diff --git a/tests/Commands/ValidateSchemaTest.php b/tests/Commands/ValidateSchemaTest.php index a0e66b13..8779cea3 100644 --- a/tests/Commands/ValidateSchemaTest.php +++ b/tests/Commands/ValidateSchemaTest.php @@ -93,6 +93,60 @@ public function testInvalidSchemas(): void isSame(1, $exitCode, $actual); } + public function testInvalidSchemasWithSchemaDump(): void + { + [$actual, $exitCode] = Tools::virtualExecution('validate:schema', [ + 'schema' => './tests/schemas/broken/*.yml', + 'dump-schema' => null, + ]); + + $expected = <<<'TXT' + CSV Blueprint: Unknown version (PhpUnit) + Found schemas: 2 + + (1/2) 1 issue in ./tests/schemas/broken/invalid_schema.yml + +-------+-----------+--------+----------------------------------+ + | Line | id:Column | Rule | Message | + +-------+-----------+--------+----------------------------------+ + | undef | meta | schema | Unknown key: .unknow_root_option | + +-------+-----------+--------+----------------------------------+ + ```yaml + # File: ./tests/schemas/broken/invalid_schema.yml + name: '' + description: '' + presets: [] + filename_pattern: /invalid-pattern\.csv$/i + csv: + header: true + delimiter: ',' + quote_char: \ + enclosure: '"' + encoding: utf-8 + bom: false + structural_rules: + strict_column_order: true + allow_extra_columns: false + columns: [] + unknow_root_option: true + + ``` + (2/2) 1 issue in ./tests/schemas/broken/syntax.yml + +------+-----------+---------------+---------------------------------------------------+ + | Line | id:Column | Rule | Message | + +------+-----------+---------------+---------------------------------------------------+ + | 15 | | schema.syntax | Unable to parse at line 15 (near "(*$#)@(@$*)("). | + +------+-----------+---------------+---------------------------------------------------+ + ```yaml + # File: ./tests/schemas/broken/syntax.yml + Unable to parse schema file: Unable to parse at line 15 (near "(*$#)@(@$*)("). + ``` + + TXT; + + isSame($expected, $actual); + isSame(1, $exitCode, $actual); + } + public function testInvalidSchemasTextReport(): void { [$actual, $exitCode] = Tools::virtualExecution('validate:schema', [ diff --git a/tests/GithubActionsTest.php b/tests/GithubActionsTest.php index 3d92e1ab..e2ea1bdb 100644 --- a/tests/GithubActionsTest.php +++ b/tests/GithubActionsTest.php @@ -55,7 +55,7 @@ public function testGitHubActionsReadMe(): void 'apply-all' => "'auto'", 'quick' => "'no'", 'skip-schema' => "'no'", - 'extra' => "'options: --ansi -v'", + 'extra' => "'options: --ansi'", ]; $expectedMessage = [ diff --git a/tests/TestCase.php b/tests/TestCase.php index 6716e699..384b07ba 100644 --- a/tests/TestCase.php +++ b/tests/TestCase.php @@ -16,6 +16,8 @@ namespace JBZoo\PHPUnit; +use JBZoo\CsvBlueprint\Workers\WorkerPool; + abstract class TestCase extends PHPUnit { protected function setUp(): void @@ -25,5 +27,6 @@ protected function setUp(): void \date_default_timezone_set('UTC'); \putenv('COLUMNS=200'); \chdir(PROJECT_ROOT); + WorkerPool::setBootstrap(PROJECT_ROOT . '/vendor/autoload.php'); } } diff --git a/tests/Workers/TaskRunnerTest.php b/tests/Workers/TaskRunnerTest.php new file mode 100644 index 00000000..df214516 --- /dev/null +++ b/tests/Workers/TaskRunnerTest.php @@ -0,0 +1,75 @@ +isParallel()); + + $runner = new WorkerPool(1); + isFalse($runner->isParallel()); + } + + public function testExecuteSequentially(): void + { + $runner = new WorkerPool(1); + $runner->addTask('q', TestTask::class, [1]); + $runner->addTask('qq', TestTask::class, [2]); + $runner->addTask('qqq', TestTask::class, [3]); + + $startTime = \microtime(true); + isSame(['q' => 1, 'qq' => 2, 'qqq' => 3], $runner->run()); + $time = \microtime(true) - $startTime; + + isTrue($time >= TestTask::DELAY * 3, (string)$time); + } + + public function testExecuteParallel(): void + { + self::onlyParallel(); + + $runner = new WorkerPool(); + isTrue($runner->getMaxThreads() > 1); + $runner->addTask('q', TestTask::class, [1]); + $runner->addTask('qq', TestTask::class, [2]); + $runner->addTask('qqq', TestTask::class, [3]); + + $startTime = \microtime(true); + isSame(['q' => 1, 'qq' => 2, 'qqq' => 3], $runner->run()); + $time = \microtime(true) - $startTime; + + isTrue($time < TestTask::DELAY * 3, (string)$time); + } + + private static function onlyParallel(): void + { + if (!\extension_loaded('parallel')) { + skip('The parallel extension is not available.'); + } + } +} diff --git a/tests/Workers/TestTask.php b/tests/Workers/TestTask.php new file mode 100644 index 00000000..0cfc02b7 --- /dev/null +++ b/tests/Workers/TestTask.php @@ -0,0 +1,36 @@ +id; + } +} diff --git a/tests/autoload.php b/tests/autoload.php index 90acf384..4429d093 100644 --- a/tests/autoload.php +++ b/tests/autoload.php @@ -23,3 +23,7 @@ echo 'Please execute "composer update" !' . \PHP_EOL; exit(1); } + +if (\extension_loaded('parallel')) { + \parallel\bootstrap(__DIR__ . '/vendor/autoload.php'); +} diff --git a/tests/schemas/demo_invalid.yml b/tests/schemas/demo_invalid.yml index 6aef0ba5..9678d16e 100644 --- a/tests/schemas/demo_invalid.yml +++ b/tests/schemas/demo_invalid.yml @@ -12,7 +12,7 @@ # This schema is invalid because does not match the CSV file (tests/fixtures/demo.csv). -filename_pattern: /(demo-[12]|demo)\.csv)$/i +filename_pattern: /demo(-[123])?\.csv$/ columns: - name: Name diff --git a/tests/stubs/parallel.stub_php b/tests/stubs/parallel.stub_php new file mode 100644 index 00000000..0ef6542a --- /dev/null +++ b/tests/stubs/parallel.stub_php @@ -0,0 +1,392 @@ +