diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 8d88a0d6..73c49988 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -37,6 +37,9 @@ jobs:
test-current-versions:
name: Tests
runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ ext-parallel: [ '', 'parallel' ]
steps:
- name: Checkout code
uses: actions/checkout@v4
@@ -45,10 +48,13 @@ jobs:
- name: Setup PHP
uses: shivammathur/setup-php@v2
+ id: setup-php
with:
php-version: 8.3
coverage: xdebug
- extensions: ast
+ extensions: ast, ${{ matrix.ext-parallel }}
+ env:
+ phpts: zts
- name: Build project
run: make build --no-print-directory
@@ -56,9 +62,6 @@ jobs:
- name: π§ͺ PHPUnit Tests
run: make test --no-print-directory
- - name: π Code Quality
- run: make codestyle --no-print-directory
-
- name: Uploading coverage to coveralls
continue-on-error: true
env:
@@ -67,10 +70,14 @@ jobs:
- name: SonarCloud Scan
uses: SonarSource/sonarcloud-github-action@master
+ continue-on-error: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
+ - name: π Code Quality
+ run: make codestyle --no-print-directory
+
- name: Upload Artifacts
uses: actions/upload-artifact@v4
continue-on-error: true
@@ -266,11 +273,25 @@ jobs:
run: |
! $BLUEPRINT_DOCKER $CMD_VALIDATE $INVALID_TEST
- - name: π₯ Quick Benchmark π₯
+ - name: π Valid CSV files (Parallel)
+ run: $BLUEPRINT_DOCKER $CMD_VALIDATE $VALID_TEST --parallel
+
+ - name: π Invalid CSV files (Parallel)
+ run: |
+ ! $BLUEPRINT_DOCKER $CMD_VALIDATE $INVALID_TEST --parallel
+
+ - name: Prepare Benchmark
+ run: |
+ make build --no-print-directory
+ make bench-create-csv --no-print-directory
+
+ - name: π₯ Benchmark (single thread)
+ run: |
+ ! make bench-docker-quick --no-print-directory
+
+ - name: π₯ Benchmark (multi threads)
run: |
- make build > /dev/null 2>&1
- make bench-create-csv > /dev/null
- make bench-docker-quick --no-print-directory
+ ! make bench-docker-quick-parallel --no-print-directory
- name: Push Docker Image (master)
uses: docker/build-push-action@v5
diff --git a/.gitignore b/.gitignore
index 068ef66f..236d48a4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,7 @@ vendor
phpunit.xml
/docker/preload.php
/docker/random_data.csv
+/docker/included_files.php
*.cache
*.phar
.version
diff --git a/.phan.php b/.phan.php
index 5fc6b5f2..7d8c295f 100644
--- a/.phan.php
+++ b/.phan.php
@@ -23,15 +23,21 @@
'directory_list' => [
'src',
- 'vendor/jbzoo/ci-report-converter/src',
- 'vendor/jbzoo/cli/src',
'vendor/jbzoo/data/src',
+ 'vendor/jbzoo/cli/src',
'vendor/jbzoo/utils/src',
- 'vendor/league/csv/src',
+ 'vendor/jbzoo/ci-report-converter/src',
+
'vendor/symfony/console',
'vendor/symfony/finder',
'vendor/symfony/yaml',
+
+ 'vendor/league/csv/src',
'vendor/markrogoyski/math-php/src',
'vendor/respect/validation',
+ 'vendor/fidry/cpu-core-counter',
+ ],
+ 'autoload_internal_extension_signatures' => [
+ 'parallel' => 'tests/stubs/parallel.stub_php',
],
]);
diff --git a/.php-cs-fixer.php b/.php-cs-fixer.php
index f79aa7e6..71e595b2 100644
--- a/.php-cs-fixer.php
+++ b/.php-cs-fixer.php
@@ -16,7 +16,20 @@
namespace JBZoo\Codestyle\PhpCsFixer;
-return (new PhpCsFixerCodingStandard(__DIR__))->getFixerConfig(null, [
+use Symfony\Component\Finder\Finder;
+
+$finder = (new Finder())
+ ->files()
+ ->followLinks()
+ ->ignoreVCS(true)
+ ->ignoreDotFiles(false)
+ ->in(__DIR__)
+ ->exclude('vendor')
+ ->exclude('docker') // preload.php!
+ ->exclude('build')
+ ->name('/\.php$/');
+
+return (new PhpCsFixerCodingStandard(__DIR__))->getFixerConfig($finder, [
'binary_operator_spaces' => [
'operators' => [
'=' => 'single_space',
diff --git a/Dockerfile b/Dockerfile
index 30f8666c..b4aed754 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -17,37 +17,42 @@ COPY . /tmp
RUN make build-version
########################################################################################
-FROM php:8.3-cli-alpine
+FROM php:8.3-zts-alpine
# Install PHP extensions
ADD --chmod=0755 https://github.com/mlocati/docker-php-extension-installer/releases/latest/download/install-php-extensions /usr/local/bin/
-RUN install-php-extensions opcache @composer
+RUN install-php-extensions opcache parallel @composer
# Install application
-# run `make build-version` before!
WORKDIR /app
ENV COMPOSER_ALLOW_SUPERUSER=1
COPY . /app
COPY --from=preparatory /tmp/.version /app/.version
-RUN composer install --no-dev \
- --classmap-authoritative \
- --no-progress \
- --no-suggest \
- --optimize-autoloader \
- && rm -rf ./.git \
- && composer clear-cache \
- && chmod +x ./csv-blueprint
+RUN composer install --no-dev --classmap-authoritative --no-progress \
+ && rm -rf ./.git \
+ && composer clear-cache \
+ && chmod +x ./csv-blueprint \
+ && chmod +x ./docker/entrypoint.sh
RUN mv "$PHP_INI_DIR/php.ini-production" "$PHP_INI_DIR/php.ini"
COPY ./docker/php.ini /usr/local/etc/php/conf.d/docker-z99-php.ini
-# Quick test
-RUN time ./csv-blueprint --version --ansi \
- && time ./csv-blueprint validate:csv --help --ansi
-
# Warmup caches
-#RUN php ./docker/build-preloader.php \
-# && php ./docker/preload.php \
+RUN php ./docker/random-csv.php \
+ && JBZOO_BUILD_PRELOADER=1 \
+ ./csv-blueprint validate:csv \
+ --schema=/app/schema-examples/full.yml \
+ --csv=/app/docker/random_data.csv \
+ --apply-all=yes \
+ --report=text --mute-errors | grep issues \
+ && rm ./docker/random_data.csv \
+ && php ./docker/build-preloader.php \
+ && php ./docker/preload.php \
+ && du -sh /app/docker
# && echo "opcache.preload=/app/docker/preload.php" >> /usr/local/etc/php/conf.d/docker-z99-php.ini
-ENTRYPOINT ["/app/csv-blueprint"]
+# Quick test
+RUN time ./csv-blueprint -V
+
+ENTRYPOINT ["/app/docker/entrypoint.sh"]
+#ENTRYPOINT ["/app/csv-blueprint"]
diff --git a/Makefile b/Makefile
index b5f8db92..6a8a5525 100644
--- a/Makefile
+++ b/Makefile
@@ -111,9 +111,15 @@ bench-create-csv: ##@Benchmarks Create CSV file
@time bash ./tests/Benchmarks/create-csv.sh
@echo "::endgroup::"
-bench-docker-quick: ##@Benchmarks Run CSV file with Docker (Quick)
+bench-docker-quick:
+ $(call title,"Single thread mode")
@docker run --rm $(DOCKER_IMAGE) --ansi --version
- -$(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMAS_0) $(BENCH_FLAGS)
+ $(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMAS_0) $(BENCH_FLAGS)
+
+bench-docker-quick-parallel:
+ $(call title,"Multi-thread mode")
+ @docker run --rm $(DOCKER_IMAGE) --ansi --version
+ $(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMAS_0) $(BENCH_FLAGS) --parallel
bench-docker: ##@Benchmarks Run CSV file with Docker
@docker run --rm $(DOCKER_IMAGE) --ansi --version
diff --git a/README.md b/README.md
index e88d5fbc..9fa5964e 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,7 @@ specifications, making it invaluable in scenarios where data quality and consist
- [Usage](#usage)
- [Schema definition](#schema-definition)
- [Presets and reusable schemas](#presets-and-reusable-schemas)
+- [Parallel processing](#parallel-processing)
- [Complete CLI help message](#complete-cli-help-message)
- [Report examples](#report-examples)
- [Benchmarks](#benchmarks)
@@ -160,14 +161,15 @@ You can find launch examples in the [workflow demo](https://github.com/JBZoo/Csv
# Extra options for the CSV Blueprint. Only for debbuging and profiling.
# Available options:
- # ANSI output. You can disable ANSI colors if you want with `--no-ansi`.
- # Verbosity level: Available options: `-v`, `-vv`, `-vvv`.
- # Add flag `--profile` if you want to see profiling info. Add details with `-vvv`.
- # Add flag `--debug` if you want to see more really deep details.
+ # Add flag `--parallel` if you want to validate CSV files in parallel.
# Add flag `--dump-schema` if you want to see the final schema after all includes and inheritance.
- # Default value: 'options: --ansi -v'
+ # Add flag `--debug` if you want to see more really deep details.
+ # Add flag `--profile` if you want to see profiling info. Add details with `-vvv`.
+ # Verbosity level: Available options: `-v`, `-vv`, `-vvv`
+ # ANSI output. You can disable ANSI colors if you want with `--no-ansi`.
+ # Default value: 'options: --ansi'
# You can skip it.
- extra: 'options: --ansi -v'
+ extra: 'options: --ansi'
```
@@ -1412,6 +1414,30 @@ columns:
These are intended solely for demonstration and to illustrate potential configurations and features.
+## Parallel processing
+
+The `--parallel` option is available for speeding up the validation of CSV files by utilizing more CPU resources
+effectively.
+
+### Key Points
+
+- **Experimental Feature:** This feature is currently experimental and requires further debugging and testing. Although
+ it performs well in synthetic autotests and benchmarks. More practical use cases are needed to validate its stability.
+- **Use Case:** This option is beneficial if you are processing dozens of CSV files, with each file taking 1 second or
+ more to process.
+- **Default Behavior:** If you use `--parallel` without specifying a value, it defaults to using the maximum number of
+ available CPU cores.
+- **Thread Pool Size:** You can set a specific number of threads for the pool. For example, `--parallel=10` will set the
+ thread pool size to 10. It doesn't make much sense to specify more than the number of logical cores in your CPU.
+- **Disabling Parallelism:** Using `--parallel=1` disables parallel processing, which is the default setting if the
+ option is not specified.
+- **Implementation:** The feature relies on the `ext-parallel` PHP extension, which enables the creation of lightweight
+ threads rather than processes. This extension is already included in our Docker image. Ensure that you have
+ the `ext-parallel` extension installed if you are not using our Docker image. This extension is crucial for the
+ operation of the parallel processing feature. The application always runs in single-threaded mode if the extension is
+ not installed.
+
+
## Complete CLI help message
This section outlines all available options and commands provided by the tool, leveraging the JBZoo/Cli package for its
@@ -1466,6 +1492,10 @@ Options:
--debug Intended solely for debugging and advanced profiling purposes.
Activating this option provides detailed process insights,
useful for troubleshooting and performance analysis.
+ --parallel[=PARALLEL] EXPERIMENTAL! Launches the process in parallel mode (if possible). Works only with ext-parallel.
+ You can specify the number of threads.
+ If you do not specify a value, the number of threads will be equal to the number of CPU cores.
+ By default, the process is launched in a single-threaded mode. [default: "1"]
--no-progress Disable progress bar animation for logs. It will be used only for text output format.
--mute-errors Mute any sort of errors. So exit code will be always "0" (if it's possible).
It has major priority then --non-zero-on-error. It's on your own risk!
@@ -1522,6 +1552,10 @@ Options:
--debug Intended solely for debugging and advanced profiling purposes.
Activating this option provides detailed process insights,
useful for troubleshooting and performance analysis.
+ --parallel[=PARALLEL] EXPERIMENTAL! Launches the process in parallel mode (if possible). Works only with ext-parallel.
+ You can specify the number of threads.
+ If you do not specify a value, the number of threads will be equal to the number of CPU cores.
+ By default, the process is launched in a single-threaded mode. [default: "1"]
--no-progress Disable progress bar animation for logs. It will be used only for text output format.
--mute-errors Mute any sort of errors. So exit code will be always "0" (if it's possible).
It has major priority then --non-zero-on-error. It's on your own risk!
diff --git a/action.yml b/action.yml
index b5cfb985..9f971197 100644
--- a/action.yml
+++ b/action.yml
@@ -58,12 +58,13 @@ inputs:
description: |
Extra options for the CSV Blueprint. Only for debbuging and profiling.
Available options:
- ANSI output. You can disable ANSI colors if you want with `--no-ansi`.
- Verbosity level: Available options: `-v`, `-vv`, `-vvv`.
- Add flag `--profile` if you want to see profiling info. Add details with `-vvv`.
- Add flag `--debug` if you want to see more really deep details.
+ Add flag `--parallel` if you want to validate CSV files in parallel.
Add flag `--dump-schema` if you want to see the final schema after all includes and inheritance.
- default: 'options: --ansi -v'
+ Add flag `--debug` if you want to see more really deep details.
+ Add flag `--profile` if you want to see profiling info. Add details with `-vvv`.
+ Verbosity level: Available options: `-v`, `-vv`, `-vvv`
+ ANSI output. You can disable ANSI colors if you want with `--no-ansi`.
+ default: 'options: --ansi'
runs:
using: 'docker'
diff --git a/composer.json b/composer.json
index 1e304018..57020b12 100644
--- a/composer.json
+++ b/composer.json
@@ -43,7 +43,8 @@
"respect/validation" : "^2.3.6",
"giggsey/libphonenumber-for-php-lite" : "^8.13.34",
"giggsey/locale" : "^2.5",
- "symfony/polyfill-mbstring" : "^1.29.0"
+ "symfony/polyfill-mbstring" : "^1.29.0",
+ "fidry/cpu-core-counter" : "^1.1.0"
},
"require-dev" : {
diff --git a/composer.lock b/composer.lock
index 792694dd..96d04770 100644
--- a/composer.lock
+++ b/composer.lock
@@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
- "content-hash": "69288ac6734744736672881ea230a32d",
+ "content-hash": "3d13c88792c56b5b98e15821829dbf0e",
"packages": [
{
"name": "bluepsyduck/symfony-process-manager",
@@ -63,6 +63,67 @@
},
"time": "2021-12-03T21:30:28+00:00"
},
+ {
+ "name": "fidry/cpu-core-counter",
+ "version": "1.1.0",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/theofidry/cpu-core-counter.git",
+ "reference": "f92996c4d5c1a696a6a970e20f7c4216200fcc42"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/theofidry/cpu-core-counter/zipball/f92996c4d5c1a696a6a970e20f7c4216200fcc42",
+ "reference": "f92996c4d5c1a696a6a970e20f7c4216200fcc42",
+ "shasum": ""
+ },
+ "require": {
+ "php": "^7.2 || ^8.0"
+ },
+ "require-dev": {
+ "fidry/makefile": "^0.2.0",
+ "fidry/php-cs-fixer-config": "^1.1.2",
+ "phpstan/extension-installer": "^1.2.0",
+ "phpstan/phpstan": "^1.9.2",
+ "phpstan/phpstan-deprecation-rules": "^1.0.0",
+ "phpstan/phpstan-phpunit": "^1.2.2",
+ "phpstan/phpstan-strict-rules": "^1.4.4",
+ "phpunit/phpunit": "^8.5.31 || ^9.5.26",
+ "webmozarts/strict-phpunit": "^7.5"
+ },
+ "type": "library",
+ "autoload": {
+ "psr-4": {
+ "Fidry\\CpuCoreCounter\\": "src/"
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "authors": [
+ {
+ "name": "ThΓ©o FIDRY",
+ "email": "theo.fidry@gmail.com"
+ }
+ ],
+ "description": "Tiny utility to get the number of CPU cores.",
+ "keywords": [
+ "CPU",
+ "core"
+ ],
+ "support": {
+ "issues": "https://github.com/theofidry/cpu-core-counter/issues",
+ "source": "https://github.com/theofidry/cpu-core-counter/tree/1.1.0"
+ },
+ "funding": [
+ {
+ "url": "https://github.com/theofidry",
+ "type": "github"
+ }
+ ],
+ "time": "2024-02-07T09:43:46+00:00"
+ },
{
"name": "giggsey/libphonenumber-for-php-lite",
"version": "8.13.34",
@@ -2769,67 +2830,6 @@
},
"time": "2022-03-02T22:36:06+00:00"
},
- {
- "name": "fidry/cpu-core-counter",
- "version": "1.1.0",
- "source": {
- "type": "git",
- "url": "https://github.com/theofidry/cpu-core-counter.git",
- "reference": "f92996c4d5c1a696a6a970e20f7c4216200fcc42"
- },
- "dist": {
- "type": "zip",
- "url": "https://api.github.com/repos/theofidry/cpu-core-counter/zipball/f92996c4d5c1a696a6a970e20f7c4216200fcc42",
- "reference": "f92996c4d5c1a696a6a970e20f7c4216200fcc42",
- "shasum": ""
- },
- "require": {
- "php": "^7.2 || ^8.0"
- },
- "require-dev": {
- "fidry/makefile": "^0.2.0",
- "fidry/php-cs-fixer-config": "^1.1.2",
- "phpstan/extension-installer": "^1.2.0",
- "phpstan/phpstan": "^1.9.2",
- "phpstan/phpstan-deprecation-rules": "^1.0.0",
- "phpstan/phpstan-phpunit": "^1.2.2",
- "phpstan/phpstan-strict-rules": "^1.4.4",
- "phpunit/phpunit": "^8.5.31 || ^9.5.26",
- "webmozarts/strict-phpunit": "^7.5"
- },
- "type": "library",
- "autoload": {
- "psr-4": {
- "Fidry\\CpuCoreCounter\\": "src/"
- }
- },
- "notification-url": "https://packagist.org/downloads/",
- "license": [
- "MIT"
- ],
- "authors": [
- {
- "name": "ThΓ©o FIDRY",
- "email": "theo.fidry@gmail.com"
- }
- ],
- "description": "Tiny utility to get the number of CPU cores.",
- "keywords": [
- "CPU",
- "core"
- ],
- "support": {
- "issues": "https://github.com/theofidry/cpu-core-counter/issues",
- "source": "https://github.com/theofidry/cpu-core-counter/tree/1.1.0"
- },
- "funding": [
- {
- "url": "https://github.com/theofidry",
- "type": "github"
- }
- ],
- "time": "2024-02-07T09:43:46+00:00"
- },
{
"name": "friendsofphp/php-cs-fixer",
"version": "v3.53.0",
@@ -5462,12 +5462,12 @@
"source": {
"type": "git",
"url": "https://github.com/Roave/SecurityAdvisories.git",
- "reference": "00077527dfa5415280a28a044eca385eb3feb7ee"
+ "reference": "31f373849a62ccfe23cba594e91b488e3ec2270b"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/Roave/SecurityAdvisories/zipball/00077527dfa5415280a28a044eca385eb3feb7ee",
- "reference": "00077527dfa5415280a28a044eca385eb3feb7ee",
+ "url": "https://api.github.com/repos/Roave/SecurityAdvisories/zipball/31f373849a62ccfe23cba594e91b488e3ec2270b",
+ "reference": "31f373849a62ccfe23cba594e91b488e3ec2270b",
"shasum": ""
},
"conflict": {
@@ -5554,9 +5554,10 @@
"concrete5/concrete5": "<9.2.8",
"concrete5/core": "<8.5.8|>=9,<9.1",
"contao-components/mediaelement": ">=2.14.2,<2.21.1",
+ "contao/comments-bundle": ">=2,<4.13.40|>=5.0.0.0-RC1-dev,<5.3.4",
"contao/contao": ">=4,<4.4.56|>=4.5,<4.9.40|>=4.10,<4.11.7|>=4.13,<4.13.21|>=5.1,<5.1.4",
"contao/core": ">=2,<3.5.39",
- "contao/core-bundle": ">=3,<3.5.35|>=4,<4.9.42|>=4.10,<4.13.28|>=5,<5.1.10",
+ "contao/core-bundle": "<4.13.40|>=5,<5.3.4",
"contao/listing-bundle": ">=4,<4.4.8",
"contao/managed-edition": "<=1.5",
"corveda/phpsandbox": "<1.3.5",
@@ -6199,7 +6200,7 @@
"type": "tidelift"
}
],
- "time": "2024-04-08T16:06:11+00:00"
+ "time": "2024-04-09T19:04:27+00:00"
},
{
"name": "sabre/event",
diff --git a/csv-blueprint.php b/csv-blueprint.php
index 315221d1..6962ed5c 100644
--- a/csv-blueprint.php
+++ b/csv-blueprint.php
@@ -16,26 +16,26 @@
namespace JBZoo\CsvBlueprint;
+use JBZoo\CsvBlueprint\Workers\WorkerPool;
+
\define('PATH_ROOT', __DIR__);
-require_once __DIR__ . '/vendor/autoload.php';
+require_once PATH_ROOT . '/vendor/autoload.php';
if ('cli' !== \PHP_SAPI) {
throw new Exception('This script must be run from the command line.');
}
+WorkerPool::setBootstrap(
+ \file_exists(PATH_ROOT . '/docker/preload.php')
+ ? PATH_ROOT . '/docker/preload.php'
+ : PATH_ROOT . '/vendor/autoload.php',
+);
+
// Fix for GitHub actions. See action.yml
$_SERVER['argv'] = Utils::fixArgv($_SERVER['argv'] ?? []);
$_SERVER['argc'] = \count($_SERVER['argv']);
-// Set default timezone
-\date_default_timezone_set('UTC');
-
-// Convert all errors to exceptions. Looks like we have critical case, and we need to stop or handle it.
-// We have to do it becase tool uses 3rd-party libraries, and we can't trust them.
-// So, we need to catch all errors and handle them.
-\set_error_handler(static function ($severity, $message, $file, $line): void {
- throw new Exception($message, 0, $severity, $file, $line);
-});
+Utils::init();
(new CliApplication('CSV Blueprint', Utils::getVersion(true)))
->registerCommandsByPath(PATH_ROOT . '/src/Commands', __NAMESPACE__)
diff --git a/docker/build-preloader.php b/docker/build-preloader.php
index 6e57dc7c..562749db 100644
--- a/docker/build-preloader.php
+++ b/docker/build-preloader.php
@@ -14,54 +14,38 @@
declare(strict_types=1);
-$classes = include_once __DIR__ . '/../vendor/composer/autoload_classmap.php';
+$files = include_once __DIR__ . '/included_files.php';
$header = <<<'TEXT'
filePath, 'w');
+
+ \fputcsv($fileHandle, $this->columns);
+
+ for ($i = 0; $i < $this->rows; $i++) {
+ $rowData = [];
+
+ foreach (\array_keys($this->columns) as $columnIndex) {
+ $rowData[$columnIndex] = \random_int(1, 10000);
+ }
+
+ \fputcsv($fileHandle, $rowData);
+ }
+
+ \fclose($fileHandle);
+
+ echo "CSV file created: {$this->filePath}.\n";
+ }
+}
+
+(new CsvGenerator(
+ 1000,
+ __DIR__ . '/random_data.csv',
+ ['Column Name (header)', 'another_column', 'inherited_column_login', 'inherited_column_full_name'],
+))->generateCsv();
diff --git a/phpunit.xml.dist b/phpunit.xml.dist
index 5048883a..65ec38a2 100644
--- a/phpunit.xml.dist
+++ b/phpunit.xml.dist
@@ -31,13 +31,8 @@
-
+
-
diff --git a/psalm.xml b/psalm.xml
index 2fecab37..83498d58 100644
--- a/psalm.xml
+++ b/psalm.xml
@@ -35,4 +35,7 @@
+
+
+
diff --git a/src/CliApplication.php b/src/CliApplication.php
index 9aef3a6f..9236e640 100644
--- a/src/CliApplication.php
+++ b/src/CliApplication.php
@@ -16,7 +16,7 @@
namespace JBZoo\CsvBlueprint;
-class CliApplication extends \JBZoo\Cli\CliApplication
+final class CliApplication extends \JBZoo\Cli\CliApplication
{
private array $appLogo = [
' __________ __ ___ __ _ __ ',
diff --git a/src/Commands/AbstractValidate.php b/src/Commands/AbstractValidate.php
index 8f95a9c7..d093cb09 100644
--- a/src/Commands/AbstractValidate.php
+++ b/src/Commands/AbstractValidate.php
@@ -20,9 +20,12 @@
use JBZoo\CsvBlueprint\Schema;
use JBZoo\CsvBlueprint\Utils;
use JBZoo\CsvBlueprint\Validators\ErrorSuite;
+use JBZoo\CsvBlueprint\Workers\WorkerPool;
+use JBZoo\Utils\Env;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Finder\SplFileInfo;
+use function JBZoo\Data\phpArray;
use function JBZoo\Utils\bool;
/**
@@ -75,6 +78,18 @@ protected function configure(): void
'Activating this option provides detailed process insights,',
'useful for troubleshooting and performance analysis.',
]),
+ )
+ ->addOption(
+ 'parallel',
+ null,
+ InputOption::VALUE_OPTIONAL,
+ \implode("\n", [
+ 'EXPERIMENTAL! Launches the process in parallel mode (if possible). Works only with ext-parallel.',
+ 'You can specify the number of threads.',
+ 'If you do not specify a value, the number of threads will be equal to the number of CPU cores.',
+ 'By default, the process is launched in a single-threaded mode.',
+ ]),
+ '1',
);
parent::configure();
@@ -86,9 +101,16 @@ protected function preparation(): void
$this->_('CSV Blueprint: ' . Utils::getVersion(true));
}
- if ($this->getOptBool('debug')) {
- \define('DEBUG_MODE', true);
+ $threads = $this->getNumberOfThreads();
+ if ($threads !== 1) {
+ $this->_(
+ $threads > 0
+ ? "Parallel mode: {$threads} threads"
+ : 'Parallel mode: ' . WorkerPool::getCpuCount() . ' threads (auto)',
+ );
}
+
+ Utils::setDebugMode($this->getOptBool('debug'));
}
protected function isHumanReadableMode(): bool
@@ -109,6 +131,16 @@ protected function isQuickMode(): bool
return $value === '' || bool($value);
}
+ protected function getNumberOfThreads(): int
+ {
+ $threads = \trim($this->getOptString('parallel'));
+ if ($threads === '') {
+ return 0; // auto
+ }
+
+ return $this->getOptInt('parallel'); // custom threads number
+ }
+
/**
* @return SplFileInfo[]
*/
@@ -166,22 +198,37 @@ protected function renderIssues(string $prefix, int $number, string $filepath, i
$this->out("{$prefix}{$number} {$issues} in {$filepath}", $indent);
}
- protected function printDumpOfSchema(?Schema $schema): void
+ protected function printDumpOfSchema(?string $schemaFilename): void
{
- if ($schema === null) {
+ if ($schemaFilename === null) {
return;
}
- $dump = $schema->dumpAsYamlString();
- $dump = \preg_replace('/^([ \t]*)([^:\n]+:)/m', '$1$2', $dump);
if ($this->getOptBool('dump-schema')) {
+ $filename = Utils::cutPath($schemaFilename);
+
+ try {
+ $schema = new Schema($schemaFilename);
+ $dump = $schema->dumpAsYamlString();
+ $dump = \preg_replace('/^([ \t]*)([^:\n]+:)/m', '$1$2', $dump);
+ } catch (\Throwable $e) {
+ $dump = 'Unable to parse schema file: ' . $e->getMessage();
+ }
+
$this->_('```yaml');
- $this->_("# File: {$schema->getFilename()}");
+ $this->_("# File: {$filename}");
$this->_($dump);
$this->_('```');
}
}
+ protected static function dumpPreloader(): void
+ {
+ if (Env::bool('JBZOO_BUILD_PRELOADER')) {
+ \file_put_contents(__DIR__ . '/../../docker/included_files.php', (string)phpArray(\get_included_files()));
+ }
+ }
+
protected static function renderPrefix(int $index, int $totalFiles): string
{
if ($totalFiles <= 1) {
diff --git a/src/Commands/Exception.php b/src/Commands/Exception.php
index f4b84a02..1d45ab73 100644
--- a/src/Commands/Exception.php
+++ b/src/Commands/Exception.php
@@ -16,6 +16,6 @@
namespace JBZoo\CsvBlueprint\Commands;
-class Exception extends \JBZoo\CsvBlueprint\Exception
+final class Exception extends \JBZoo\CsvBlueprint\Exception
{
}
diff --git a/src/Commands/ValidateCsv.php b/src/Commands/ValidateCsv.php
index 9cdbcea8..f6e91343 100644
--- a/src/Commands/ValidateCsv.php
+++ b/src/Commands/ValidateCsv.php
@@ -16,10 +16,11 @@
namespace JBZoo\CsvBlueprint\Commands;
-use JBZoo\CsvBlueprint\Csv\CsvFile;
use JBZoo\CsvBlueprint\Schema;
use JBZoo\CsvBlueprint\Utils;
use JBZoo\CsvBlueprint\Validators\ErrorSuite;
+use JBZoo\CsvBlueprint\Workers\Tasks\ValidationCsvTask;
+use JBZoo\CsvBlueprint\Workers\WorkerPool;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Finder\SplFileInfo;
@@ -117,7 +118,7 @@ protected function executeAction(): int
[$invalidFiles, $errorInCsvCounter] = $this->validateCsvFiles($matchedFiles);
- return $this->printSummary(
+ $exitCode = $this->printSummary(
\count($csvFilenames),
\count($schemaFilenames),
$invalidFiles,
@@ -125,6 +126,10 @@ protected function executeAction(): int
$errorInSchemaCounter,
$matchedFiles,
);
+
+ self::dumpPreloader(); // Experimental feature
+
+ return $exitCode;
}
/**
@@ -151,8 +156,6 @@ private function validateSchemas(array $schemaFilenames): int
continue;
}
- $schema = null;
-
try {
$schema = new Schema($schemaFilename->getPathname());
$schemaErrors = $schema->validate($quickCheck);
@@ -170,7 +173,7 @@ private function validateSchemas(array $schemaFilenames): int
"{$prefix}Exception: {$e->getMessage()}",
], 2);
}
- $this->printDumpOfSchema($schema);
+ $this->printDumpOfSchema($schemaFilename->getPathname());
}
$this->out('');
@@ -184,44 +187,67 @@ private function validateCsvFiles(array $matchedFiles): array
$totalFiles = $matchedFiles['count_pairs'];
$invalidFiles = 0;
$errorCounter = 0;
- $errorSuite = null;
$quickCheck = $this->isQuickMode();
+ $workerPool = new WorkerPool($this->getNumberOfThreads());
+ foreach ($matchedFiles['found_pairs'] as $schema => $csvs) {
+ foreach ($csvs as $csv) {
+ $workerPool->addTask("{$csv} => {$schema}", ValidationCsvTask::class, [$csv, $schema, $quickCheck]);
+ }
+ }
+
$this->out("CSV file validation: {$totalFiles}");
$index = 0;
- $isFirst = true;
- foreach ($matchedFiles['found_pairs'] as $schema => $csvs) {
- if ($isFirst) {
- $isFirst = false;
+ $currentSchemaFilename = null;
+
+ $exectionCallback = function (
+ string $pair,
+ ErrorSuite $errorSuite,
+ ) use (
+ &$index,
+ &$currentSchemaFilename,
+ &$invalidFiles,
+ &$errorCounter,
+ $totalFiles,
+ $quickCheck
+ ): void {
+ $index++;
+ $filesAsKey = \explode(' => ', $pair, 2);
+ if (\count($filesAsKey) > 1) {
+ [$csvFilename, $schemaFilename] = $filesAsKey;
} else {
- $this->out(''); // Add empty line between schema files
+ throw new Exception("Invalid pair: {$pair}");
}
- $this->out('Schema: ' . Utils::printFile($schema));
- foreach ($csvs as $csv) {
- $index++;
- $prefix = AbstractValidate::renderPrefix($index, $totalFiles);
-
- $currentCsvTitle = Utils::printFile($csv, 'blue') . '; Size: ' . Utils::getFileSize($csv);
- if ($quickCheck && $errorSuite !== null && $errorSuite->count() > 0) {
- $this->out("Skipped (Quick mode) {$currentCsvTitle}", 2);
- continue;
+ if ($currentSchemaFilename !== $schemaFilename) {
+ $currentSchemaFilename = $schemaFilename;
+ if ($index !== 1) { // Add empty line between schema files
+ $this->out('');
}
+ $this->out('Schema: ' . Utils::printFile($schemaFilename));
+ }
+
+ $prefix = AbstractValidate::renderPrefix($index, $totalFiles);
+ $currentCsvTitle = Utils::printFile($csvFilename, 'blue') . '; Size: ' . Utils::getFileSize($csvFilename);
- $errorSuite = (new CsvFile($csv, $schema))->validate($quickCheck);
+ if ($quickCheck && $errorSuite->count() > 0) {
+ $this->out("Skipped (Quick mode) {$currentCsvTitle}", 2);
+ return;
+ }
- if ($errorSuite->count() > 0) {
- $invalidFiles++;
- $errorCounter += $errorSuite->count();
+ if ($errorSuite->count() > 0) {
+ $invalidFiles++;
+ $errorCounter += $errorSuite->count();
- $this->renderIssues($prefix, $errorSuite->count(), $currentCsvTitle, 2);
- $this->outReport($errorSuite, 4);
- } else {
- $this->out("{$prefix}OK {$currentCsvTitle}", 2);
- }
+ $this->renderIssues($prefix, $errorSuite->count(), $currentCsvTitle, 2);
+ $this->outReport($errorSuite, 4);
+ } else {
+ $this->out("{$prefix}OK {$currentCsvTitle}", 2);
}
- }
+ };
+
+ $workerPool->run($exectionCallback);
return [$invalidFiles, $errorCounter];
}
@@ -261,7 +287,7 @@ private function printSummary(
if ($errorInSchemaCounter > 0) {
$this->out("Found {$errorInSchemaCounter} issues in {$totalSchemaFiles} schemas.", $indent);
- } else {
+ } elseif (!$this->isQuickMode()) {
$this->out("No issues in {$totalSchemaFiles} schemas.", $indent);
}
@@ -271,7 +297,7 @@ private function printSummary(
"out of {$totalCsvFiles} CSV files.",
$indent,
);
- } else {
+ } elseif (!$this->isQuickMode()) {
$this->out("No issues in {$totalCsvFiles} CSV files.", $indent);
}
diff --git a/src/Commands/ValidateSchema.php b/src/Commands/ValidateSchema.php
index 834c4458..db58244b 100644
--- a/src/Commands/ValidateSchema.php
+++ b/src/Commands/ValidateSchema.php
@@ -16,12 +16,11 @@
namespace JBZoo\CsvBlueprint\Commands;
-use JBZoo\CsvBlueprint\Schema;
use JBZoo\CsvBlueprint\Utils;
-use JBZoo\CsvBlueprint\Validators\Error;
use JBZoo\CsvBlueprint\Validators\ErrorSuite;
+use JBZoo\CsvBlueprint\Workers\Tasks\ValidationSchemaTask;
+use JBZoo\CsvBlueprint\Workers\WorkerPool;
use Symfony\Component\Console\Input\InputOption;
-use Symfony\Component\Yaml\Exception\ParseException;
/**
* @psalm-suppress PropertyNotSetInConstructor
@@ -65,34 +64,34 @@ protected function executeAction(): int
$this->out("Found schemas: {$totalFiles}");
$this->out('');
+ $workerPool = new WorkerPool($this->getNumberOfThreads());
+ foreach ($schemas as $schema) {
+ $filename = (string)$schema->getRealPath();
+ $workerPool->addTask($filename, ValidationSchemaTask::class, [$filename]);
+ }
+
$foundIssues = 0;
$index = 0;
- foreach ($this->findFiles('schema') as $file) {
- $index++;
- $prefix = self::renderPrefix($index, $totalFiles);
- $filename = (string)$file->getRealPath();
- $coloredPath = Utils::printFile($filename);
- $schemaErrors = new ErrorSuite($filename);
-
- try {
- $schema = new Schema($filename);
- $schemaErrors = $schema->validate($this->isQuickMode());
- $this->printDumpOfSchema(new Schema($filename));
- } catch (ParseException $e) {
- $schemaErrors->addError(new Error('schema.syntax', $e->getMessage(), '', $e->getParsedLine()));
- } catch (\Throwable $e) {
- $schemaErrors->addError(new Error('schema.error', $e->getMessage()));
- }
-
- if ($schemaErrors->count() > 0) {
- $this->renderIssues($prefix, $schemaErrors->count(), $coloredPath);
- $this->outReport($schemaErrors, 2);
- } else {
- $this->out("{$prefix}OK {$coloredPath}");
- }
-
- $foundIssues += $schemaErrors->count();
- }
+ $workerPool->run(
+ function (string $filename, ErrorSuite $schemaErrors) use (&$index, &$foundIssues, $totalFiles): void {
+ $index++;
+ $prefix = self::renderPrefix($index, $totalFiles);
+ $coloredPath = Utils::printFile($filename);
+
+ if ($schemaErrors->count() > 0) {
+ $this->renderIssues($prefix, $schemaErrors->count(), $coloredPath);
+ $this->outReport($schemaErrors, 2);
+ } else {
+ $this->out("{$prefix}OK {$coloredPath}");
+ }
+
+ $this->printDumpOfSchema($filename);
+
+ $foundIssues += $schemaErrors->count();
+ },
+ );
+
+ self::dumpPreloader();
return $foundIssues === 0 ? self::SUCCESS : self::FAILURE;
}
diff --git a/src/Csv/Exception.php b/src/Csv/Exception.php
index 2e1cca41..8f983161 100644
--- a/src/Csv/Exception.php
+++ b/src/Csv/Exception.php
@@ -16,6 +16,6 @@
namespace JBZoo\CsvBlueprint\Csv;
-class Exception extends \JBZoo\CsvBlueprint\Exception
+final class Exception extends \JBZoo\CsvBlueprint\Exception
{
}
diff --git a/src/Rules/Aggregate/Exception.php b/src/Rules/Aggregate/Exception.php
index 0657c8d9..361731d3 100644
--- a/src/Rules/Aggregate/Exception.php
+++ b/src/Rules/Aggregate/Exception.php
@@ -16,6 +16,6 @@
namespace JBZoo\CsvBlueprint\Rules\Aggregate;
-class Exception extends \JBZoo\CsvBlueprint\Rules\Exception
+final class Exception extends \JBZoo\CsvBlueprint\Rules\Exception
{
}
diff --git a/src/Rules/Cell/CountryCode.php b/src/Rules/Cell/CountryCode.php
index 5e6662ac..168f5f1d 100644
--- a/src/Rules/Cell/CountryCode.php
+++ b/src/Rules/Cell/CountryCode.php
@@ -20,7 +20,7 @@
use Respect\Validation\Rules\CountryCode as RespectCountryCode;
use Respect\Validation\Validator;
-class CountryCode extends AbstractCellRule
+final class CountryCode extends AbstractCellRule
{
public function getHelpMeta(): array
{
diff --git a/src/Rules/Cell/Exception.php b/src/Rules/Cell/Exception.php
index a7898c35..79ce7a76 100644
--- a/src/Rules/Cell/Exception.php
+++ b/src/Rules/Cell/Exception.php
@@ -16,6 +16,6 @@
namespace JBZoo\CsvBlueprint\Rules\Cell;
-class Exception extends \JBZoo\CsvBlueprint\Rules\Exception
+final class Exception extends \JBZoo\CsvBlueprint\Rules\Exception
{
}
diff --git a/src/Rules/Cell/Hash.php b/src/Rules/Cell/Hash.php
index 5edce141..34b921df 100644
--- a/src/Rules/Cell/Hash.php
+++ b/src/Rules/Cell/Hash.php
@@ -16,7 +16,7 @@
namespace JBZoo\CsvBlueprint\Rules\Cell;
-class Hash extends AbstractCellRule
+final class Hash extends AbstractCellRule
{
public function getHelpMeta(): array
{
diff --git a/src/Rules/Cell/IsDate.php b/src/Rules/Cell/IsDate.php
index 8d3955f3..44a4f9b1 100644
--- a/src/Rules/Cell/IsDate.php
+++ b/src/Rules/Cell/IsDate.php
@@ -16,7 +16,7 @@
namespace JBZoo\CsvBlueprint\Rules\Cell;
-class IsDate extends AbstractCellRule
+final class IsDate extends AbstractCellRule
{
public function getHelpMeta(): array
{
diff --git a/src/Rules/Cell/IsGeohash.php b/src/Rules/Cell/IsGeohash.php
index 648a6598..1813db7e 100644
--- a/src/Rules/Cell/IsGeohash.php
+++ b/src/Rules/Cell/IsGeohash.php
@@ -18,7 +18,7 @@
use JBZoo\CsvBlueprint\Utils;
-class IsGeohash extends AbstractCellRule
+final class IsGeohash extends AbstractCellRule
{
public function getHelpMeta(): array
{
diff --git a/src/Rules/Cell/IsLeapYear.php b/src/Rules/Cell/IsLeapYear.php
index 01ab2bbc..c91afcf8 100644
--- a/src/Rules/Cell/IsLeapYear.php
+++ b/src/Rules/Cell/IsLeapYear.php
@@ -18,7 +18,7 @@
use Respect\Validation\Validator;
-class IsLeapYear extends AbstractCellRule
+final class IsLeapYear extends AbstractCellRule
{
public function getHelpMeta(): array
{
diff --git a/src/Rules/Cell/IsTime.php b/src/Rules/Cell/IsTime.php
index d19b152d..7b720ce8 100644
--- a/src/Rules/Cell/IsTime.php
+++ b/src/Rules/Cell/IsTime.php
@@ -16,7 +16,7 @@
namespace JBZoo\CsvBlueprint\Rules\Cell;
-class IsTime extends AbstractCellRule
+final class IsTime extends AbstractCellRule
{
public function getHelpMeta(): array
{
diff --git a/src/Rules/Cell/IsTimezone.php b/src/Rules/Cell/IsTimezone.php
index 775746ec..7ca5e980 100644
--- a/src/Rules/Cell/IsTimezone.php
+++ b/src/Rules/Cell/IsTimezone.php
@@ -16,7 +16,7 @@
namespace JBZoo\CsvBlueprint\Rules\Cell;
-class IsTimezone extends AbstractCellRule
+final class IsTimezone extends AbstractCellRule
{
public function getHelpMeta(): array
{
diff --git a/src/Rules/Cell/IsTimezoneOffset.php b/src/Rules/Cell/IsTimezoneOffset.php
index d9776b59..3b48642d 100644
--- a/src/Rules/Cell/IsTimezoneOffset.php
+++ b/src/Rules/Cell/IsTimezoneOffset.php
@@ -18,7 +18,7 @@
use JBZoo\CsvBlueprint\Utils;
-class IsTimezoneOffset extends AbstractCellRule
+final class IsTimezoneOffset extends AbstractCellRule
{
public function getHelpMeta(): array
{
diff --git a/src/Rules/Cell/LanguageCode.php b/src/Rules/Cell/LanguageCode.php
index fd2a8da7..2f010642 100644
--- a/src/Rules/Cell/LanguageCode.php
+++ b/src/Rules/Cell/LanguageCode.php
@@ -20,7 +20,7 @@
use Respect\Validation\Rules\LanguageCode as RespectLanguageCode;
use Respect\Validation\Validator;
-class LanguageCode extends AbstractCellRule
+final class LanguageCode extends AbstractCellRule
{
public function getHelpMeta(): array
{
diff --git a/src/Rules/Cell/NotAllowValues.php b/src/Rules/Cell/NotAllowValues.php
index bb786bba..58105f0d 100644
--- a/src/Rules/Cell/NotAllowValues.php
+++ b/src/Rules/Cell/NotAllowValues.php
@@ -16,7 +16,7 @@
namespace JBZoo\CsvBlueprint\Rules\Cell;
-class NotAllowValues extends AbstractCellRule
+final class NotAllowValues extends AbstractCellRule
{
public function getHelpMeta(): array
{
diff --git a/src/Rules/Cell/PostalCode.php b/src/Rules/Cell/PostalCode.php
index 5c38992f..69747cdf 100644
--- a/src/Rules/Cell/PostalCode.php
+++ b/src/Rules/Cell/PostalCode.php
@@ -18,7 +18,7 @@
use Respect\Validation\Validator;
-class PostalCode extends AbstractCellRule
+final class PostalCode extends AbstractCellRule
{
public function getHelpMeta(): array
{
diff --git a/src/Utils.php b/src/Utils.php
index c37bb99e..3cd3de1b 100644
--- a/src/Utils.php
+++ b/src/Utils.php
@@ -29,6 +29,8 @@ final class Utils
{
public const MAX_DIRECTORY_DEPTH = 10;
+ private static bool $debugMode = false;
+
public static function isArrayInOrder(array $array, array $correctOrder): bool
{
$orderIndex = 0;
@@ -70,16 +72,20 @@ public static function printList(null|array|bool|float|int|string $items, string
return "[\"<{$color}>" . \implode("{$color}>\", \"<{$color}>", $items) . "{$color}>\"]";
}
- public static function debug(int|string $message): void
+ public static function debug(string $message): void
{
- if (\defined('DEBUG_MODE')) {
- cli($message);
+ if (self::$debugMode) {
+ try {
+ cli($message);
+ } catch (\Throwable) {
+ Cli::out(\strip_tags($message));
+ }
}
}
public static function debugSpeed(string $messPrefix, int $lines, float $startTimer): void
{
- if (\defined('DEBUG_MODE')) {
+ if (self::$debugMode) {
$kiloLines = \round(($lines / (\microtime(true) - $startTimer)) / 1000);
self::debug("{$messPrefix} " . \number_format($kiloLines) . 'K lines/sec');
}
@@ -292,15 +298,7 @@ public static function testRegex(?string $regex, string $subject): bool
return false;
}
- try {
- if (\preg_match($regex, $subject) === 0) {
- return true;
- }
- } catch (\Throwable) {
- return false;
- }
-
- return false;
+ return \preg_match($regex, $subject) === 0;
}
/**
@@ -484,6 +482,35 @@ public static function mergeConfigs(array ...$configs): array
return $merged;
}
+ public static function setDebugMode(bool $debugMode): void
+ {
+ self::$debugMode = $debugMode;
+ }
+
+ public static function getDebugMode(): bool
+ {
+ return self::$debugMode;
+ }
+
+ public static function init(): void
+ {
+ // Set default timezone
+ \date_default_timezone_set('UTC');
+
+ // Convert all errors to exceptions. Looks like we have critical case, and we need to stop or handle it.
+ // We have to do it becase tool uses 3rd-party libraries, and we can't trust them.
+ // So, we need to catch all errors and handle them.
+ \set_error_handler(static function (int $severity, string $message, string $file, int $line): bool {
+ $severity = match ($severity) {
+ \E_ERROR, \E_CORE_ERROR, \E_COMPILE_ERROR, \E_USER_ERROR => 'Error',
+ \E_WARNING, \E_CORE_WARNING, \E_COMPILE_WARNING, \E_USER_WARNING => 'Warning',
+ \E_NOTICE, \E_USER_NOTICE => 'Notice',
+ default => 'Unknown',
+ };
+ throw new Exception("Unexpected {$severity}: \"{$message}\" in file \"{$file}:{$line}\"");
+ });
+ }
+
/**
* @param SplFileInfo[] $files
*/
diff --git a/src/Validators/Exception.php b/src/Validators/Exception.php
index 46a595ca..b9bc415b 100644
--- a/src/Validators/Exception.php
+++ b/src/Validators/Exception.php
@@ -16,6 +16,6 @@
namespace JBZoo\CsvBlueprint\Validators;
-class Exception extends \JBZoo\CsvBlueprint\Exception
+final class Exception extends \JBZoo\CsvBlueprint\Exception
{
}
diff --git a/src/Workers/Tasks/AbstractTask.php b/src/Workers/Tasks/AbstractTask.php
new file mode 100644
index 00000000..2dce2b54
--- /dev/null
+++ b/src/Workers/Tasks/AbstractTask.php
@@ -0,0 +1,22 @@
+csvFilename, $this->schemaFilename))->validate($this->isQuickMode);
+ }
+}
diff --git a/src/Workers/Tasks/ValidationSchemaTask.php b/src/Workers/Tasks/ValidationSchemaTask.php
new file mode 100644
index 00000000..efd34e35
--- /dev/null
+++ b/src/Workers/Tasks/ValidationSchemaTask.php
@@ -0,0 +1,47 @@
+schemaFilename);
+
+ try {
+ $schema = new Schema($this->schemaFilename);
+ $schemaErrors = $schema->validate($this->isQuickMode);
+ } catch (ParseException $e) {
+ $schemaErrors->addError(new Error('schema.syntax', $e->getMessage(), '', $e->getParsedLine()));
+ } catch (\Throwable $e) {
+ $schemaErrors->addError(new Error('schema.error', $e->getMessage()));
+ }
+
+ return $schemaErrors;
+ }
+}
diff --git a/src/Workers/Worker.php b/src/Workers/Worker.php
new file mode 100644
index 00000000..28f5351d
--- /dev/null
+++ b/src/Workers/Worker.php
@@ -0,0 +1,59 @@
+key;
+ }
+
+ public function execute(): mixed
+ {
+ $className = $this->className;
+ if (\class_exists($className) === false) {
+ throw new \InvalidArgumentException("Class '{$className}' not found");
+ }
+
+ $task = new $className(...$this->arguments);
+ if (!$task instanceof AbstractTask) {
+ throw new \InvalidArgumentException("Class '{$className}' is not allowed");
+ }
+
+ return $task->process();
+ }
+
+ public function getClass(): string
+ {
+ return $this->className;
+ }
+
+ public function getArguments(): array
+ {
+ return $this->arguments;
+ }
+}
diff --git a/src/Workers/WorkerPool.php b/src/Workers/WorkerPool.php
new file mode 100644
index 00000000..d9fe5a5d
--- /dev/null
+++ b/src/Workers/WorkerPool.php
@@ -0,0 +1,151 @@
+maxThreads = $maxThreads === 0 ? self::getCpuCount() : $maxThreads;
+ $this->tasksQueue = new \SplQueue();
+ }
+
+ public function getMaxThreads(): int
+ {
+ return $this->maxThreads;
+ }
+
+ public function addTask(string $key, string $taskClass, array $arguments = []): void
+ {
+ $this->tasksQueue->enqueue(new Worker($key, $taskClass, $arguments));
+ }
+
+ public function run(?\Closure $callback = null): array
+ {
+ return $this->isParallel() ? $this->runInParallel($callback) : $this->runSequentially($callback);
+ }
+
+ public function isParallel(): bool
+ {
+ return $this->getMaxThreads() > 1 && self::extLoaded();
+ }
+
+ public static function extLoaded(): bool
+ {
+ return \extension_loaded('parallel');
+ }
+
+ public static function setBootstrap(string $autoloader): void
+ {
+ if (self::extLoaded() && self::$bootstrap === null) {
+ $realpath = \realpath($autoloader);
+ if ($realpath !== false) {
+ self::$bootstrap = $realpath;
+ // \parallel\bootstrap($autoloader); // Hm... Does it work?
+ }
+ }
+ }
+
+ public static function getCpuCount(): int
+ {
+ try {
+ return (new CpuCoreCounter())->getCount();
+ } catch (\Throwable) {
+ return self::FALLBACK_CPU_COUNT;
+ }
+ }
+
+ private function runSequentially(?\Closure $callback = null): array
+ {
+ $results = [];
+
+ while (!$this->tasksQueue->isEmpty()) {
+ /** @var Worker $worker */
+ $worker = $this->tasksQueue->dequeue();
+
+ if ($callback !== null) {
+ $callback($worker->getKey(), $worker->execute());
+ } else {
+ $results[$worker->getKey()] = $worker->execute();
+ }
+ }
+
+ return $results;
+ }
+
+ private function runInParallel(?\Closure $callback = null): array
+ {
+ $results = [];
+
+ while (!$this->tasksQueue->isEmpty() || \count($this->runningTasks) > 0) {
+ $this->maintainTaskPool();
+
+ foreach ($this->runningTasks as $index => $future) {
+ if ($future !== null && $future->done()) {
+ if ($callback !== null) {
+ $callback($index, $future->value());
+ } else {
+ $results[$index] = $future->value();
+ }
+ unset($this->runningTasks[$index]);
+ }
+ }
+
+ \usleep(self::POOL_MAINTENANCE_DELAY);
+ }
+
+ return $results;
+ }
+
+ private function maintainTaskPool(): void
+ {
+ $bootstrap = self::$bootstrap;
+ if ($bootstrap === null) {
+ throw new Exception('Bootstrap file is not set');
+ }
+
+ while (\count($this->runningTasks) < $this->maxThreads && !$this->tasksQueue->isEmpty()) {
+ /** @var Worker $worker */
+ $worker = $this->tasksQueue->dequeue();
+ $runtime = new Runtime($bootstrap);
+ $future = $runtime->run(
+ static function (string $key, string $class, array $args, bool $debugMode): mixed {
+ Utils::init();
+ Utils::setDebugMode($debugMode);
+ return (new Worker($key, $class, $args))->execute();
+ },
+ [$worker->getKey(), $worker->getClass(), $worker->getArguments(), Utils::getDebugMode()],
+ );
+
+ $this->runningTasks[$worker->getKey()] = $future;
+ }
+ }
+}
diff --git a/tests/Commands/ValidateCsvQuickTest.php b/tests/Commands/ValidateCsvQuickTest.php
index d9235acd..6548806c 100644
--- a/tests/Commands/ValidateCsvQuickTest.php
+++ b/tests/Commands/ValidateCsvQuickTest.php
@@ -42,16 +42,13 @@ public function testEnabled(): void
CSV file validation: 3
Schema: ./tests/schemas/demo_invalid.yml
- (1/3) 1 issue in ./tests/fixtures/batch/demo-1.csv; Size: 123.34 MB
- "allow_extra_columns" at line 1. Column(s) not found in CSV: "wrong_column_name".
-
+ Skipped (Quick mode) ./tests/fixtures/batch/demo-1.csv; Size: 123.34 MB
Skipped (Quick mode) ./tests/fixtures/batch/demo-2.csv; Size: 123.34 MB
Skipped (Quick mode) ./tests/fixtures/batch/sub/demo-3.csv; Size: 123.34 MB
Summary:
3 pairs (schema to csv) were found based on `filename_pattern`.
Found 1 issues in 1 schemas.
- Found 1 issues in 1 out of 3 CSV files.
TXT;
diff --git a/tests/Commands/ValidateSchemaTest.php b/tests/Commands/ValidateSchemaTest.php
index a0e66b13..8779cea3 100644
--- a/tests/Commands/ValidateSchemaTest.php
+++ b/tests/Commands/ValidateSchemaTest.php
@@ -93,6 +93,60 @@ public function testInvalidSchemas(): void
isSame(1, $exitCode, $actual);
}
+ public function testInvalidSchemasWithSchemaDump(): void
+ {
+ [$actual, $exitCode] = Tools::virtualExecution('validate:schema', [
+ 'schema' => './tests/schemas/broken/*.yml',
+ 'dump-schema' => null,
+ ]);
+
+ $expected = <<<'TXT'
+ CSV Blueprint: Unknown version (PhpUnit)
+ Found schemas: 2
+
+ (1/2) 1 issue in ./tests/schemas/broken/invalid_schema.yml
+ +-------+-----------+--------+----------------------------------+
+ | Line | id:Column | Rule | Message |
+ +-------+-----------+--------+----------------------------------+
+ | undef | meta | schema | Unknown key: .unknow_root_option |
+ +-------+-----------+--------+----------------------------------+
+ ```yaml
+ # File: ./tests/schemas/broken/invalid_schema.yml
+ name: ''
+ description: ''
+ presets: []
+ filename_pattern: /invalid-pattern\.csv$/i
+ csv:
+ header: true
+ delimiter: ','
+ quote_char: \
+ enclosure: '"'
+ encoding: utf-8
+ bom: false
+ structural_rules:
+ strict_column_order: true
+ allow_extra_columns: false
+ columns: []
+ unknow_root_option: true
+
+ ```
+ (2/2) 1 issue in ./tests/schemas/broken/syntax.yml
+ +------+-----------+---------------+---------------------------------------------------+
+ | Line | id:Column | Rule | Message |
+ +------+-----------+---------------+---------------------------------------------------+
+ | 15 | | schema.syntax | Unable to parse at line 15 (near "(*$#)@(@$*)("). |
+ +------+-----------+---------------+---------------------------------------------------+
+ ```yaml
+ # File: ./tests/schemas/broken/syntax.yml
+ Unable to parse schema file: Unable to parse at line 15 (near "(*$#)@(@$*)(").
+ ```
+
+ TXT;
+
+ isSame($expected, $actual);
+ isSame(1, $exitCode, $actual);
+ }
+
public function testInvalidSchemasTextReport(): void
{
[$actual, $exitCode] = Tools::virtualExecution('validate:schema', [
diff --git a/tests/GithubActionsTest.php b/tests/GithubActionsTest.php
index 3d92e1ab..e2ea1bdb 100644
--- a/tests/GithubActionsTest.php
+++ b/tests/GithubActionsTest.php
@@ -55,7 +55,7 @@ public function testGitHubActionsReadMe(): void
'apply-all' => "'auto'",
'quick' => "'no'",
'skip-schema' => "'no'",
- 'extra' => "'options: --ansi -v'",
+ 'extra' => "'options: --ansi'",
];
$expectedMessage = [
diff --git a/tests/TestCase.php b/tests/TestCase.php
index 6716e699..384b07ba 100644
--- a/tests/TestCase.php
+++ b/tests/TestCase.php
@@ -16,6 +16,8 @@
namespace JBZoo\PHPUnit;
+use JBZoo\CsvBlueprint\Workers\WorkerPool;
+
abstract class TestCase extends PHPUnit
{
protected function setUp(): void
@@ -25,5 +27,6 @@ protected function setUp(): void
\date_default_timezone_set('UTC');
\putenv('COLUMNS=200');
\chdir(PROJECT_ROOT);
+ WorkerPool::setBootstrap(PROJECT_ROOT . '/vendor/autoload.php');
}
}
diff --git a/tests/Workers/TaskRunnerTest.php b/tests/Workers/TaskRunnerTest.php
new file mode 100644
index 00000000..df214516
--- /dev/null
+++ b/tests/Workers/TaskRunnerTest.php
@@ -0,0 +1,75 @@
+isParallel());
+
+ $runner = new WorkerPool(1);
+ isFalse($runner->isParallel());
+ }
+
+ public function testExecuteSequentially(): void
+ {
+ $runner = new WorkerPool(1);
+ $runner->addTask('q', TestTask::class, [1]);
+ $runner->addTask('qq', TestTask::class, [2]);
+ $runner->addTask('qqq', TestTask::class, [3]);
+
+ $startTime = \microtime(true);
+ isSame(['q' => 1, 'qq' => 2, 'qqq' => 3], $runner->run());
+ $time = \microtime(true) - $startTime;
+
+ isTrue($time >= TestTask::DELAY * 3, (string)$time);
+ }
+
+ public function testExecuteParallel(): void
+ {
+ self::onlyParallel();
+
+ $runner = new WorkerPool();
+ isTrue($runner->getMaxThreads() > 1);
+ $runner->addTask('q', TestTask::class, [1]);
+ $runner->addTask('qq', TestTask::class, [2]);
+ $runner->addTask('qqq', TestTask::class, [3]);
+
+ $startTime = \microtime(true);
+ isSame(['q' => 1, 'qq' => 2, 'qqq' => 3], $runner->run());
+ $time = \microtime(true) - $startTime;
+
+ isTrue($time < TestTask::DELAY * 3, (string)$time);
+ }
+
+ private static function onlyParallel(): void
+ {
+ if (!\extension_loaded('parallel')) {
+ skip('The parallel extension is not available.');
+ }
+ }
+}
diff --git a/tests/Workers/TestTask.php b/tests/Workers/TestTask.php
new file mode 100644
index 00000000..0cfc02b7
--- /dev/null
+++ b/tests/Workers/TestTask.php
@@ -0,0 +1,36 @@
+id;
+ }
+}
diff --git a/tests/autoload.php b/tests/autoload.php
index 90acf384..4429d093 100644
--- a/tests/autoload.php
+++ b/tests/autoload.php
@@ -23,3 +23,7 @@
echo 'Please execute "composer update" !' . \PHP_EOL;
exit(1);
}
+
+if (\extension_loaded('parallel')) {
+ \parallel\bootstrap(__DIR__ . '/vendor/autoload.php');
+}
diff --git a/tests/schemas/demo_invalid.yml b/tests/schemas/demo_invalid.yml
index 6aef0ba5..9678d16e 100644
--- a/tests/schemas/demo_invalid.yml
+++ b/tests/schemas/demo_invalid.yml
@@ -12,7 +12,7 @@
# This schema is invalid because does not match the CSV file (tests/fixtures/demo.csv).
-filename_pattern: /(demo-[12]|demo)\.csv)$/i
+filename_pattern: /demo(-[123])?\.csv$/
columns:
- name: Name
diff --git a/tests/stubs/parallel.stub_php b/tests/stubs/parallel.stub_php
new file mode 100644
index 00000000..0ef6542a
--- /dev/null
+++ b/tests/stubs/parallel.stub_php
@@ -0,0 +1,392 @@
+