Skip to content

Commit

Permalink
Update README with new utility introduction and use cases (#105)
Browse files Browse the repository at this point in the history
This commit revises the introduction in the README to include detailed
information about the console utility and its broader data quality
management strategy. In addition, the primary use cases for the utility
have been outlined for better understanding of its applications.
  • Loading branch information
SmetDenis committed Mar 27, 2024
1 parent 24c28a9 commit 6a5a291
Show file tree
Hide file tree
Showing 16 changed files with 263 additions and 153 deletions.
55 changes: 55 additions & 0 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#
# JBZoo Toolbox - Csv-Blueprint.
#
# This file is part of the JBZoo Toolbox project.
# For the full copyright and license information, please view the LICENSE
# file that was distributed with this source code.
#
# @license MIT
# @copyright Copyright (C) JBZoo.com, All rights reserved.
# @see https://github.com/JBZoo/Csv-Blueprint
#

name: Benchmark

on:
push:
branches:
- 'master'
workflow_run:
workflows: [ "Publish Docker" ]
types:
- completed

jobs:
benchmark:
name: Benchmark
runs-on: ubuntu-latest
env:
DOCKER_IMAGE: jbzoo/csv-blueprint:master
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.ref }}

- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: 8.3
coverage: none
tools: composer
extensions: opcache

- name: Build project
run: make build --no-print-directory

- name: Create random CSV files with 5M rows
run: make bench-create-csv --no-print-directory

- name: Pull latest Docker image
run: docker pull ${{ env.DOCKER_IMAGE }}

- name: 🔥 Check 5M rows with Docker 🔥
run: make bench-docker --no-print-directory
23 changes: 0 additions & 23 deletions .github/workflows/demo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ env:
CSV_FILES: './tests/fixtures/batch/*.csv'
VALID_SCHEMA: './tests/schemas/demo_valid.yml'
INVALID_SCHEMA: './tests/schemas/demo_*.yml'
BLUEPRINT_DOCKER: 'docker run --rm --workdir=/parent-host -v .:/parent-host jbzoo/csv-blueprint:latest'


jobs:
Expand Down Expand Up @@ -86,25 +85,3 @@ jobs:
schema: ${{ env.INVALID_SCHEMA }}
report: junit
continue-on-error: true


docker:
name: Docker
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Pull Docker Image
run: docker pull jbzoo/csv-blueprint:latest

- name: 🎨 Test help and logo
run: $BLUEPRINT_DOCKER --ansi

- name: 👍 Valid CSV files
run: |
$BLUEPRINT_DOCKER validate:csv --ansi -vvv --csv=$CSV_FILES --schema=$VALID_SCHEMA
- name: 👎 Invalid CSV files
run: |
! $BLUEPRINT_DOCKER validate:csv --ansi -vvv --csv=$CSV_FILES --schema=$INVALID_SCHEMA
37 changes: 34 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
php-version: 8.3
coverage: xdebug
tools: composer
extensions: ast
extensions: ast, opcache

- name: Build project
run: make build --no-print-directory
Expand Down Expand Up @@ -102,7 +102,7 @@ jobs:
php-version: 8.1
coverage: none
tools: composer
extensions: ast
extensions: ast, opcache

- name: Install project
run: make build --no-print-directory
Expand Down Expand Up @@ -145,7 +145,7 @@ jobs:
php-version: 8.3
coverage: none
tools: composer
extensions: ast
extensions: ast, opcache

- name: Install project
run: make build --no-print-directory
Expand Down Expand Up @@ -219,6 +219,7 @@ jobs:
with:
php-version: ${{ matrix.php-version }}
tools: composer
extensions: opcache

- name: Build project in production mode
run: make build-prod build-phar-file --no-print-directory
Expand Down Expand Up @@ -309,3 +310,33 @@ jobs:
csv: ./tests/fixtures/batch/*.csv
schema: ./tests/schemas/demo_*.yml
continue-on-error: true


benchmark:
name: Benchmark
runs-on: ubuntu-latest
env:
DOCKER_IMAGE: jbzoo/csv-blueprint:master
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.ref }}

- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: 8.3
coverage: none
tools: composer
extensions: opcache

- name: Build project
run: make build --no-print-directory

- name: Create random CSV files with 5M rows
run: make bench-create-csv --no-print-directory

- name: 🔥 Check 5M rows with PHP Binary 🔥
run: make bench-php --no-print-directory
78 changes: 47 additions & 31 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@ ifneq (, $(wildcard ./vendor/jbzoo/codestyle/src/init.Makefile))
include ./vendor/jbzoo/codestyle/src/init.Makefile
endif

CMD_VALIDATE ?= validate:csv --ansi -vvv
DOCKER_IMAGE ?= jbzoo/csv-blueprint:local
BLUEPRINT ?= COLUMNS=300 time $(PHP_BIN) ./csv-blueprint $(CMD_VALIDATE)
BLUEPRINT_DOCKER ?= docker run --rm --workdir=/parent-host -v .:/parent-host $(DOCKER_IMAGE) $(CMD_VALIDATE)
BENCH_BIN ?= time ${PHP_BIN} ./tests/Benchmarks/bench.php
CMD_VALIDATE := validate:csv --ansi -vvv
BLUEPRINT := COLUMNS=300 time $(PHP_BIN) ./csv-blueprint $(CMD_VALIDATE)
BLUEPRINT_DOCKER := time docker run --rm --workdir=/parent-host -v .:/parent-host $(DOCKER_IMAGE) $(CMD_VALIDATE)
BENCH_BIN := time $(PHP_BIN) ./tests/Benchmarks/bench.php

VALID_CSV ?= --csv='./tests/fixtures/demo.csv'
VALID_SCHEMA ?= --schema='./tests/schemas/demo_valid.yml'
INVALID_CSV ?= --csv='./tests/fixtures/batch/*.csv'
INVALID_SCHEMA ?= --schema='./tests/schemas/demo_invalid.yml'
VALID_CSV := --csv='./tests/fixtures/demo.csv'
VALID_SCHEMA := --schema='./tests/schemas/demo_valid.yml'
INVALID_CSV := --csv='./tests/fixtures/batch/*.csv'
INVALID_SCHEMA := --schema='./tests/schemas/demo_invalid.yml'

# Build/install ########################################################################################################
build: ##@Project Build project in development mode
Expand Down Expand Up @@ -62,13 +62,13 @@ update: ##@Project Update dependencies
# Demo #################################################################################################################
demo: ##@Demo Run demo via PHP binary
$(call title,"Demo - Valid CSV \(PHP binary\)")
@${BLUEPRINT} ${VALID_CSV} ${VALID_SCHEMA}
@$(BLUEPRINT) $(VALID_CSV) $(VALID_SCHEMA)
$(call title,"Demo - Invalid CSV \(PHP binary\)")
@${BLUEPRINT} ${INVALID_CSV} ${INVALID_SCHEMA}
@$(BLUEPRINT) $(INVALID_CSV) $(INVALID_SCHEMA)

REPORT ?= table
demo-github: ##@Demo Run demo invalid CSV for GitHub Actions
@${BLUEPRINT} ${INVALID_CSV} ${INVALID_SCHEMA} --report=$(REPORT)
@$(BLUEPRINT) $(INVALID_CSV) $(INVALID_SCHEMA) --report=$(REPORT)


# Docker ###############################################################################################################
Expand All @@ -79,42 +79,58 @@ docker-build: ##@Docker (Re-)build Docker image

docker-demo: ##@Docker Run demo via Docker
$(call title,"Demo - Valid CSV \(via Docker\)")
@${BLUEPRINT_DOCKER} ${VALID_CSV} ${VALID_SCHEMA}
@$(BLUEPRINT_DOCKER) $(VALID_CSV) $(VALID_SCHEMA)
$(call title,"Demo - Invalid CSV \(via Docker\)")
@${BLUEPRINT_DOCKER} ${INVALID_CSV} ${INVALID_SCHEMA}
@$(BLUEPRINT_DOCKER) $(INVALID_CSV) $(INVALID_SCHEMA)

docker-in: ##@Docker Enter into Docker container
@docker run -it --entrypoint /bin/sh $(DOCKER_IMAGE)


# Benchmarks ###########################################################################################################
BENCH_CSV ?= --csv=./build/bench/20_1000000_header.csv
BENCH_SCHEMA ?= --schema=./tests/benchmarks/benchmark.yml
BENCH_ROWS ?= 5000000
BENCH_CSV := --csv=./build/bench/5_$(BENCH_ROWS)_header.csv
BENCH_SCHEMA_CELL := --schema=./tests/Benchmarks/benchmark-cell.yml
BENCH_SCHEMA_AGG := --schema=./tests/Benchmarks/benchmark-agg.yml

bench-php: ##@Benchmarks Run PHP binary benchmarks
$(call title,"PHP Benchmarks - PHP binary")
${BLUEPRINT} $(BENCH_CSV) $(BENCH_SCHEMA) --profile
bench-create-csv: ##@Benchmarks Create CSV file
$(call title,"PHP Benchmarks - Create $(BENCH_ROWS) CSV file")
@mkdir -pv ./build/bench/
$(BENCH_BIN) --add-header --columns=5 --rows=$(BENCH_ROWS) --ansi
ls -lah ./build/bench/*.csv;


bench-docker: ##@Benchmarks Run CSV file with Docker
$(call title,"PHP Benchmarks - CSV file with Docker")
$(call title,"Only one cell rule")
-$(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMA_CELL) --profile
$(call title,"Only one aggregation rule")
-$(BLUEPRINT_DOCKER) $(BENCH_CSV) $(BENCH_SCHEMA_AGG) --profile

bench-docker: ##@Benchmarks Run Docker benchmarks
$(call title,"PHP Benchmarks - Docker")
@time ${BLUEPRINT_DOCKER} $(BENCH_CSV) $(BENCH_SCHEMA) --profile

bench-php: ##@Benchmarks Run CSV file with PHP binary
$(call title,"PHP Benchmarks - CSV file with PHP binary")
$(call title,"Only one cell rule")
-$(BLUEPRINT) $(BENCH_CSV) $(BENCH_SCHEMA_CELL) --profile
$(call title,"Only one aggregation rule")
-$(BLUEPRINT) $(BENCH_CSV) $(BENCH_SCHEMA_AGG) --profile

BENCH_ROWS := 1000 100000 1000000

BENCH_ROWS_LIST := 100000 1000000
bench-prepare: ##@Benchmarks Create CSV files
$(call title,"PHP Benchmarks - Prepare CSV files")
exit 1; # Disabled for now. Enable if you need to generate CSV files.
@echo "Remove old CSV files"
@mkdir -pv ./build/bench/
@rm -fv ./build/bench/*.csv
@$(foreach rows,$(BENCH_ROWS), \
mkdir -pv ./build/bench/
rm -fv ./build/bench/*.csv
@$(foreach rows,$(BENCH_ROWS_LIST), \
echo "Generate CSV: rows=$(rows)"; \
${BENCH_BIN} -H --columns=1 --rows=$(rows) -q & \
${BENCH_BIN} -H --columns=3 --rows=$(rows) -q & \
${BENCH_BIN} -H --columns=5 --rows=$(rows) -q & \
${BENCH_BIN} -H --columns=10 --rows=$(rows) -q & \
${BENCH_BIN} -H --columns=20 --rows=$(rows) -q & \
$(BENCH_BIN) -H --columns=1 --rows=$(rows) -q & \
$(BENCH_BIN) -H --columns=3 --rows=$(rows) -q & \
$(BENCH_BIN) -H --columns=5 --rows=$(rows) -q & \
$(BENCH_BIN) -H --columns=10 --rows=$(rows) -q & \
$(BENCH_BIN) -H --columns=20 --rows=$(rows) -q & \
wait; \
echo "Generate CSV: rows=$(rows) - done"; \
)
@ls -lh ./build/bench/*.csv;
ls -lh ./build/bench/*.csv;

0 comments on commit 6a5a291

Please sign in to comment.