Skip to content

Commit

Permalink
Merge 588cc4b into fcb71a4
Browse files Browse the repository at this point in the history
  • Loading branch information
SmetDenis committed Apr 11, 2024
2 parents fcb71a4 + 588cc4b commit 5774796
Show file tree
Hide file tree
Showing 55 changed files with 1,451 additions and 286 deletions.
37 changes: 29 additions & 8 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ jobs:
test-current-versions:
name: Tests
runs-on: ubuntu-latest
strategy:
matrix:
ext-parallel: [ '', 'parallel' ]
steps:
- name: Checkout code
uses: actions/checkout@v4
Expand All @@ -45,20 +48,20 @@ jobs:

- name: Setup PHP
uses: shivammathur/setup-php@v2
id: setup-php
with:
php-version: 8.3
coverage: xdebug
extensions: ast
extensions: ast, ${{ matrix.ext-parallel }}
env:
phpts: zts

- name: Build project
run: make build --no-print-directory

- name: 🧪 PHPUnit Tests
run: make test --no-print-directory

- name: 👍 Code Quality
run: make codestyle --no-print-directory

- name: Uploading coverage to coveralls
continue-on-error: true
env:
Expand All @@ -67,10 +70,14 @@ jobs:

- name: SonarCloud Scan
uses: SonarSource/sonarcloud-github-action@master
continue-on-error: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}

- name: 👍 Code Quality
run: make codestyle --no-print-directory

- name: Upload Artifacts
uses: actions/upload-artifact@v4
continue-on-error: true
Expand Down Expand Up @@ -266,11 +273,25 @@ jobs:
run: |
! $BLUEPRINT_DOCKER $CMD_VALIDATE $INVALID_TEST
- name: 🔥 Quick Benchmark 🔥
- name: 👍 Valid CSV files (Parallel)
run: $BLUEPRINT_DOCKER $CMD_VALIDATE $VALID_TEST --parallel

- name: 👎 Invalid CSV files (Parallel)
run: |
! $BLUEPRINT_DOCKER $CMD_VALIDATE $INVALID_TEST --parallel
- name: Prepare Benchmark
run: |
make build --no-print-directory
make bench-create-csv --no-print-directory
- name: 🔥 Benchmark (single thread)
run: |
! make bench-docker-quick --no-print-directory
- name: 🔥 Benchmark (multi threads)
run: |
make build > /dev/null 2>&1
make bench-create-csv > /dev/null
make bench-docker-quick --no-print-directory
! make bench-docker-quick-parallel --no-print-directory
- name: Push Docker Image (master)
uses: docker/build-push-action@v5
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ vendor
phpunit.xml
/docker/preload.php
/docker/random_data.csv
/docker/included_files.php
*.cache
*.phar
.version
12 changes: 9 additions & 3 deletions .phan.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,21 @@
'directory_list' => [
'src',

'vendor/jbzoo/ci-report-converter/src',
'vendor/jbzoo/cli/src',
'vendor/jbzoo/data/src',
'vendor/jbzoo/cli/src',
'vendor/jbzoo/utils/src',
'vendor/league/csv/src',
'vendor/jbzoo/ci-report-converter/src',

'vendor/symfony/console',
'vendor/symfony/finder',
'vendor/symfony/yaml',

'vendor/league/csv/src',
'vendor/markrogoyski/math-php/src',
'vendor/respect/validation',
'vendor/fidry/cpu-core-counter',
],
'autoload_internal_extension_signatures' => [
'parallel' => 'tests/stubs/parallel.stub_php',
],
]);
15 changes: 14 additions & 1 deletion .php-cs-fixer.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,20 @@

namespace JBZoo\Codestyle\PhpCsFixer;

return (new PhpCsFixerCodingStandard(__DIR__))->getFixerConfig(null, [
use Symfony\Component\Finder\Finder;

$finder = (new Finder())
->files()
->followLinks()
->ignoreVCS(true)
->ignoreDotFiles(false)
->in(__DIR__)
->exclude('vendor')
->exclude('docker') // preload.php!
->exclude('build')
->name('/\.php$/');

return (new PhpCsFixerCodingStandard(__DIR__))->getFixerConfig($finder, [
'binary_operator_spaces' => [
'operators' => [
'=' => 'single_space',
Expand Down
41 changes: 23 additions & 18 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,37 +17,42 @@ COPY . /tmp
RUN make build-version

########################################################################################
FROM php:8.3-cli-alpine
FROM php:8.3-zts-alpine

# Install PHP extensions
ADD --chmod=0755 https://github.com/mlocati/docker-php-extension-installer/releases/latest/download/install-php-extensions /usr/local/bin/
RUN install-php-extensions opcache @composer
RUN install-php-extensions opcache parallel @composer

# Install application
# run `make build-version` before!
WORKDIR /app
ENV COMPOSER_ALLOW_SUPERUSER=1
COPY . /app
COPY --from=preparatory /tmp/.version /app/.version
RUN composer install --no-dev \
--classmap-authoritative \
--no-progress \
--no-suggest \
--optimize-autoloader \
&& rm -rf ./.git \
&& composer clear-cache \
&& chmod +x ./csv-blueprint
RUN composer install --no-dev --classmap-authoritative --no-progress \
&& rm -rf ./.git \
&& composer clear-cache \
&& chmod +x ./csv-blueprint \
&& chmod +x ./docker/entrypoint.sh

RUN mv "$PHP_INI_DIR/php.ini-production" "$PHP_INI_DIR/php.ini"
COPY ./docker/php.ini /usr/local/etc/php/conf.d/docker-z99-php.ini

# Quick test
RUN time ./csv-blueprint --version --ansi \
&& time ./csv-blueprint validate:csv --help --ansi

# Warmup caches
#RUN php ./docker/build-preloader.php \
# && php ./docker/preload.php \
RUN php ./docker/random-csv.php \
&& JBZOO_BUILD_PRELOADER=1 \
./csv-blueprint validate:csv \
--schema=/app/schema-examples/full.yml \
--csv=/app/docker/random_data.csv \
--apply-all=yes \
--report=text --mute-errors | grep issues \
&& rm ./docker/random_data.csv \
&& php ./docker/build-preloader.php \
&& php ./docker/preload.php \
&& du -sh /app/docker
# && echo "opcache.preload=/app/docker/preload.php" >> /usr/local/etc/php/conf.d/docker-z99-php.ini

ENTRYPOINT ["/app/csv-blueprint"]
# Quick test
RUN time ./csv-blueprint -V

ENTRYPOINT ["/app/docker/entrypoint.sh"]
#ENTRYPOINT ["/app/csv-blueprint"]
10 changes: 8 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,15 @@ bench-create-csv: ##@Benchmarks Create CSV file
@time bash ./tests/Benchmarks/create-csv.sh
@echo "::endgroup::"

bench-docker-quick: ##@Benchmarks Run CSV file with Docker (Quick)
bench-docker-quick:
$(call title,"Single thread mode")
@docker run --rm $(DOCKER_IMAGE) --ansi --version
-$(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMAS_0) $(BENCH_FLAGS)
$(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMAS_0) $(BENCH_FLAGS)

bench-docker-quick-parallel:
$(call title,"Multi-thread mode")
@docker run --rm $(DOCKER_IMAGE) --ansi --version
$(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMAS_0) $(BENCH_FLAGS) --parallel

bench-docker: ##@Benchmarks Run CSV file with Docker
@docker run --rm $(DOCKER_IMAGE) --ansi --version
Expand Down
50 changes: 42 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ specifications, making it invaluable in scenarios where data quality and consist
- [Usage](#usage)
- [Schema definition](#schema-definition)
- [Presets and reusable schemas](#presets-and-reusable-schemas)
- [Parallel processing](#parallel-processing)
- [Complete CLI help message](#complete-cli-help-message)
- [Report examples](#report-examples)
- [Benchmarks](#benchmarks)
Expand Down Expand Up @@ -160,14 +161,15 @@ You can find launch examples in the [workflow demo](https://github.com/JBZoo/Csv

# Extra options for the CSV Blueprint. Only for debbuging and profiling.
# Available options:
# ANSI output. You can disable ANSI colors if you want with `--no-ansi`.
# Verbosity level: Available options: `-v`, `-vv`, `-vvv`.
# Add flag `--profile` if you want to see profiling info. Add details with `-vvv`.
# Add flag `--debug` if you want to see more really deep details.
# Add flag `--parallel` if you want to validate CSV files in parallel.
# Add flag `--dump-schema` if you want to see the final schema after all includes and inheritance.
# Default value: 'options: --ansi -v'
# Add flag `--debug` if you want to see more really deep details.
# Add flag `--profile` if you want to see profiling info. Add details with `-vvv`.
# Verbosity level: Available options: `-v`, `-vv`, `-vvv`
# ANSI output. You can disable ANSI colors if you want with `--no-ansi`.
# Default value: 'options: --ansi'
# You can skip it.
extra: 'options: --ansi -v'
extra: 'options: --ansi'
```
<!-- auto-update:/github-actions-yml -->

Expand Down Expand Up @@ -1412,6 +1414,31 @@ columns:
These are intended solely for demonstration and to illustrate potential configurations and features.


## Parallel processing

The `--parallel` option is available for speeding up the validation of CSV files by utilizing more CPU resources
effectively.

### Key Points

- **Experimental Feature:** This feature is currently experimental and requires further debugging and testing. Although
it performs well in synthetic autotests and benchmarks. More practical use cases are needed to validate its stability.
- **Use Case:** This option is beneficial if you are processing dozens of CSV files, with each file taking 1 second or
more to process.
- **Default Behavior:** If you use `--parallel` without specifying a value, it defaults to using the maximum number of
available CPU cores.
- **Thread Pool Size:** You can set a specific number of threads for the pool. For example, `--parallel=10` will set the
thread pool size to 10. It doesn't make much sense to specify more than the number of logical cores in your CPU.
Otherwise, it will only slow things down a bit due to the system overhead to handle multithreading.
- **Disabling Parallelism:** Using `--parallel=1` disables parallel processing, which is the default setting if the
option is not specified.
- **Implementation:** The feature relies on the `ext-parallel` PHP extension, which enables the creation of lightweight
threads rather than processes. This extension is already included in our Docker image. Ensure that you have
the `ext-parallel` extension installed if you are not using our Docker image. This extension is crucial for the
operation of the parallel processing feature. The application always runs in single-threaded mode if the extension is
not installed.


## Complete CLI help message

This section outlines all available options and commands provided by the tool, leveraging the JBZoo/Cli package for its
Expand Down Expand Up @@ -1466,6 +1493,10 @@ Options:
--debug Intended solely for debugging and advanced profiling purposes.
Activating this option provides detailed process insights,
useful for troubleshooting and performance analysis.
--parallel[=PARALLEL] EXPERIMENTAL! Launches the process in parallel mode (if possible). Works only with ext-parallel.
You can specify the number of threads.
If you do not specify a value, the number of threads will be equal to the number of CPU cores.
By default, the process is launched in a single-threaded mode. [default: "1"]
--no-progress Disable progress bar animation for logs. It will be used only for text output format.
--mute-errors Mute any sort of errors. So exit code will be always "0" (if it's possible).
It has major priority then --non-zero-on-error. It's on your own risk!
Expand Down Expand Up @@ -1522,6 +1553,10 @@ Options:
--debug Intended solely for debugging and advanced profiling purposes.
Activating this option provides detailed process insights,
useful for troubleshooting and performance analysis.
--parallel[=PARALLEL] EXPERIMENTAL! Launches the process in parallel mode (if possible). Works only with ext-parallel.
You can specify the number of threads.
If you do not specify a value, the number of threads will be equal to the number of CPU cores.
By default, the process is launched in a single-threaded mode. [default: "1"]
--no-progress Disable progress bar animation for logs. It will be used only for text output format.
--mute-errors Mute any sort of errors. So exit code will be always "0" (if it's possible).
It has major priority then --non-zero-on-error. It's on your own risk!
Expand Down Expand Up @@ -1901,8 +1936,7 @@ It's random ideas and plans. No promises and deadlines. Feel free to [help me!](

* **Performance and optimization**
* Using [vectors](https://www.php.net/manual/en/class.ds-vector.php) instead of arrays to optimaze memory usage and speed of access.
* Parallel validation of schema by columns. You won't believe this, but modern PHP has multithreading support.
* Parallel validation of multiple files at once.
* Multithreading support for parallel validation of CSV by columns.

* **Mock data generation**
* Create CSV files based on the schema (like "create 1000 rows with random data based on schema and rules").
Expand Down
11 changes: 6 additions & 5 deletions action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,13 @@ inputs:
description: |
Extra options for the CSV Blueprint. Only for debbuging and profiling.
Available options:
ANSI output. You can disable ANSI colors if you want with `--no-ansi`.
Verbosity level: Available options: `-v`, `-vv`, `-vvv`.
Add flag `--profile` if you want to see profiling info. Add details with `-vvv`.
Add flag `--debug` if you want to see more really deep details.
Add flag `--parallel` if you want to validate CSV files in parallel.
Add flag `--dump-schema` if you want to see the final schema after all includes and inheritance.
default: 'options: --ansi -v'
Add flag `--debug` if you want to see more really deep details.
Add flag `--profile` if you want to see profiling info. Add details with `-vvv`.
Verbosity level: Available options: `-v`, `-vv`, `-vvv`
ANSI output. You can disable ANSI colors if you want with `--no-ansi`.
default: 'options: --ansi'

runs:
using: 'docker'
Expand Down
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@
"respect/validation" : "^2.3.6",
"giggsey/libphonenumber-for-php-lite" : "^8.13.34",
"giggsey/locale" : "^2.5",
"symfony/polyfill-mbstring" : "^1.29.0"
"symfony/polyfill-mbstring" : "^1.29.0",
"fidry/cpu-core-counter" : "^1.1.0"
},

"require-dev" : {
Expand Down
Loading

0 comments on commit 5774796

Please sign in to comment.