Skip to content

Commit

Permalink
Refactor CSV validation and update Docker settings
Browse files Browse the repository at this point in the history
Optimizes the CSV validation process through both sequential and parallel validation improvements. Also, introduces changes to the Docker configuration, majorly concerning PHP INI settings and preparation procedures, which includes cache warmups and script permissions modifications. Enhances the Dockerfile to generate random CSV data for testing and perform environmental cleanup operations like removing git files and clearing composer cache.
  • Loading branch information
SmetDenis committed Apr 10, 2024
1 parent aa611fd commit bf9c31c
Show file tree
Hide file tree
Showing 10 changed files with 127 additions and 96 deletions.
25 changes: 11 additions & 14 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,10 @@ WORKDIR /app
ENV COMPOSER_ALLOW_SUPERUSER=1
COPY . /app
COPY --from=preparatory /tmp/.version /app/.version
RUN composer install --no-dev \
--classmap-authoritative \
--no-progress \
--no-suggest \
--optimize-autoloader \
&& rm -rf ./.git \
&& composer clear-cache \
&& chmod +x ./csv-blueprint \
RUN composer install --no-dev --classmap-authoritative --no-progress \
&& rm -rf ./.git \
&& composer clear-cache \
&& chmod +x ./csv-blueprint \
&& chmod +x ./docker/entrypoint.sh

RUN mv "$PHP_INI_DIR/php.ini-production" "$PHP_INI_DIR/php.ini"
Expand All @@ -44,19 +40,20 @@ COPY ./docker/php.ini /usr/local/etc/php/conf.d/docker-z99-php.ini

# Warmup caches
RUN php ./docker/random-csv.php \
&& ./csv-blueprint validate:csv \
&& JBZOO_BUILD_PRELOADER=1 \
./csv-blueprint validate:csv \
--schema=/app/schema-examples/full.yml \
--csv=/app/docker/random_data.csv \
--apply-all=yes \
--report=text --mute-errors > /dev/null \
&& rm ./docker/random_data.csv \
&& php ./docker/build-preloader.php \
&& php ./docker/preload.php \
&& du -sh /app/docker \
&& echo "opcache.preload=/app/docker/preload.php" >> /usr/local/etc/php/conf.d/docker-z99-php.ini
&& du -sh /app/docker
# && echo "opcache.preload=/app/docker/preload.php" >> /usr/local/etc/php/conf.d/docker-z99-php.ini

# Quick test
RUN time ./csv-blueprint validate:csv --help --ansi
RUN time ./csv-blueprint validate:csv --help

ENTRYPOINT ["/app/docker/entrypoint.sh"]
CMD [""]
#ENTRYPOINT ["/app/docker/entrypoint.sh"]
ENTRYPOINT ["/app/csv-blueprint"]
2 changes: 1 addition & 1 deletion csv-blueprint.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
throw new Exception('This script must be run from the command line.');
}

WorkerPool::setBootstrap(__DIR__ . '/vendor/autoload.php');
WorkerPool::setBootstrap(__DIR__ . '/docker/preload.php');

// Fix for GitHub actions. See action.yml
$_SERVER['argv'] = Utils::fixArgv($_SERVER['argv'] ?? []);
Expand Down
26 changes: 9 additions & 17 deletions docker/build-preloader.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,13 @@
$header = <<<'TEXT'
<?php
declare(strict_types=1);
\set_error_handler(static function ($severity, $message, $file, $line): void {
throw new \ErrorException($message, 0, $severity, $file, $line);
});
if (!\function_exists('opcache_compile_file') || !\ini_get('opcache.enable')) {
echo 'Opcache is not available.';
die(1);
}
if ('cli' === \PHP_SAPI && !\ini_get('opcache.enable_cli')) {
echo 'Opcache is not enabled for CLI applications.';
die(2);
}
// if (!\function_exists('opcache_compile_file') ||
// !\ini_get('opcache.enable') ||
// !\ini_get('opcache.enable_cli')
// ) {
// echo 'Opcache is not available.';
// die(1);
// }
TEXT;

Expand All @@ -51,8 +43,8 @@
}
}

$result[] = "\\opcache_compile_file('{$path}');";
// $result[] = "require_once '{$path}';";
// $result[] = "\\opcache_compile_file('{$path}');";
$result[] = "require_once '{$path}';";
}

\file_put_contents(__DIR__ . '/preload.php', \implode(\PHP_EOL, $result) . \PHP_EOL);
14 changes: 13 additions & 1 deletion docker/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
#!/bin/sh
#!/usr/bin/env sh

#
# JBZoo Toolbox - Csv-Blueprint.
#
# This file is part of the JBZoo Toolbox project.
# For the full copyright and license information, please view the LICENSE
# file that was distributed with this source code.
#
# @license MIT
# @copyright Copyright (C) JBZoo.com, All rights reserved.
# @see https://github.com/JBZoo/Csv-Blueprint
#

# Check for the presence of the "--parallel" option in the command line arguments
# If the option is present, disable the opcache for the script execution
Expand Down
32 changes: 15 additions & 17 deletions docker/php.ini
Original file line number Diff line number Diff line change
Expand Up @@ -19,39 +19,37 @@ opcache.interned_strings_buffer = 32
opcache.max_accelerated_files = 10000
opcache.fast_shutdown = 1
opcache.save_comments = 1
opcache.jit_buffer_size=200M
opcache.jit=tracing
opcache.jit_buffer_size = 100M
opcache.jit = tracing

;opcache.memory_consumption = 200
opcache.memory_consumption = 200

; Experimantal for really quick script start.
; Save opcache data as files on disk inside the Docker image
opcache.memory_consumption = 0
opcache.lockfile_path = /app/docker/opcache.lock
opcache.file_cache = /app/docker/
opcache.file_cache_only = 1
;opcache.memory_consumption = 0
;opcache.lockfile_path = /app/docker/opcache.lock
;opcache.file_cache = /app/docker/
;opcache.file_cache_only = 1

; Enable aggressive opcache optimization
opcache.validate_timestamps = 0
opcache.revalidate_freq = 0
opcache.use_cwd = 0
opcache.file_cache_consistency_checks = 0
opcache.validate_permission = 0
opcache.validate_root = 0
opcache.revalidate_freq = 0
opcache.validate_timestamps = 0
opcache.validate_permission = 0
opcache.enable_file_override = 0
opcache.file_cache_consistency_checks = 0

; Base limits
max_execution_time=1200
max_input_time=60
memory_limit=2G
max_execution_time = 3600
memory_limit = 2G
realpath_cache_size = 64M
realpath_cache_ttl = 100000
allow_url_fopen=0
allow_url_include=0
allow_url_fopen = 0
allow_url_include = 0

error_reporting = E_ALL
display_errors = On
display_startup_errors = On


;opcache.preload=/app/docker/preload.php
9 changes: 9 additions & 0 deletions src/Commands/AbstractValidate.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
use JBZoo\CsvBlueprint\Utils;
use JBZoo\CsvBlueprint\Validators\ErrorSuite;
use JBZoo\CsvBlueprint\Workers\WorkerPool;
use JBZoo\Utils\Env;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Finder\SplFileInfo;

use function JBZoo\Data\phpArray;
use function JBZoo\Utils\bool;

/**
Expand Down Expand Up @@ -222,6 +224,13 @@ protected function printDumpOfSchema(?string $schemaFilename): void
}
}

protected static function dumpPreloader(): void
{
if (Env::bool('JBZOO_BUILD_PRELOADER')) {
\file_put_contents(__DIR__ . '/../../docker/included_files.php', (string)phpArray(\get_included_files()));
}
}

protected static function renderPrefix(int $index, int $totalFiles): string
{
if ($totalFiles <= 1) {
Expand Down
49 changes: 32 additions & 17 deletions src/Commands/ValidateCsv.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Finder\SplFileInfo;

use function JBZoo\Data\phpArray;
use function JBZoo\Utils\bool;

/**
Expand Down Expand Up @@ -128,7 +127,7 @@ protected function executeAction(): int
$matchedFiles,
);

\file_put_contents(__DIR__ . '/../../docker/included_files.php', (string)phpArray(\get_included_files()));
self::dumpPreloader(); // Experimental feature

return $exitCode;
}
Expand Down Expand Up @@ -188,7 +187,6 @@ private function validateCsvFiles(array $matchedFiles): array
$totalFiles = $matchedFiles['count_pairs'];
$invalidFiles = 0;
$errorCounter = 0;
$errorSuite = null;
$quickCheck = $this->isQuickMode();

$workerPool = new WorkerPool($this->getNumberOfThreads());
Expand All @@ -197,30 +195,45 @@ private function validateCsvFiles(array $matchedFiles): array
$workerPool->addTask("{$csv} => {$schema}", ValidationCsvTask::class, [$csv, $schema, $quickCheck]);
}
}
$reports = $workerPool->run();

$this->out("CSV file validation: {$totalFiles}");

$index = 0;
$currentSchema = null;
foreach ($reports as $pair => $errorSuite) {
$currentSchemaFilename = null;

$exectionCallback = function (
string $pair,
ErrorSuite $errorSuite,
) use (
&$index,
&$currentSchemaFilename,
&$invalidFiles,
&$errorCounter,
$totalFiles,
$quickCheck
): void {
$index++;
$filesAsKey = \explode(' => ', $pair, 2);
if (\count($filesAsKey) > 1) {
[$csvFilename, $schemaFilename] = $filesAsKey;
} else {
throw new Exception("Invalid pair: {$pair}");
}

[$csv, $schema] = \explode(' => ', $pair, 2);

if ($currentSchema !== $schema) {
$currentSchema = $schema;
if ($currentSchemaFilename !== $schemaFilename) {
$currentSchemaFilename = $schemaFilename;
if ($index !== 1) { // Add empty line between schema files
$this->out('');
}
$this->out('Schema: ' . Utils::printFile($schema));
$this->out('Schema: ' . Utils::printFile($schemaFilename));
}

$prefix = AbstractValidate::renderPrefix($index, $totalFiles);
$currentCsvTitle = Utils::printFile($csv, 'blue') . '; Size: ' . Utils::getFileSize($csv);
$currentCsvTitle = Utils::printFile($csvFilename, 'blue') . '; Size: ' . Utils::getFileSize($csvFilename);

if ($quickCheck && $errorSuite !== null && $errorSuite->count() > 0) {
if ($quickCheck && $errorSuite->count() > 0) {
$this->out("<yellow>Skipped (Quick mode)</yellow> {$currentCsvTitle}", 2);
continue;
return;
}

if ($errorSuite->count() > 0) {
Expand All @@ -232,7 +245,9 @@ private function validateCsvFiles(array $matchedFiles): array
} else {
$this->out("{$prefix}<green>OK</green> {$currentCsvTitle}", 2);
}
}
};

$workerPool->run($exectionCallback);

return [$invalidFiles, $errorCounter];
}
Expand Down Expand Up @@ -272,7 +287,7 @@ private function printSummary(

if ($errorInSchemaCounter > 0) {
$this->out("Found <c>{$errorInSchemaCounter}</c> issues in {$totalSchemaFiles} schemas.", $indent);
} else {
} elseif (!$this->isQuickMode()) {
$this->out("<green>No issues in {$totalSchemaFiles} schemas.</green>", $indent);
}

Expand All @@ -282,7 +297,7 @@ private function printSummary(
"out of {$totalCsvFiles} CSV files.",
$indent,
);
} else {
} elseif (!$this->isQuickMode()) {
$this->out("<green>No issues in {$totalCsvFiles} CSV files.</green>", $indent);
}

Expand Down
40 changes: 21 additions & 19 deletions src/Commands/ValidateSchema.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
namespace JBZoo\CsvBlueprint\Commands;

use JBZoo\CsvBlueprint\Utils;
use JBZoo\CsvBlueprint\Validators\ErrorSuite;
use JBZoo\CsvBlueprint\Workers\Tasks\ValidationSchemaTask;
use JBZoo\CsvBlueprint\Workers\WorkerPool;
use Symfony\Component\Console\Input\InputOption;
Expand Down Expand Up @@ -69,27 +70,28 @@ protected function executeAction(): int
$workerPool->addTask($filename, ValidationSchemaTask::class, [$filename]);
}

$reports = $workerPool->run();

$foundIssues = 0;
$index = 0;
foreach ($reports as $filename => $schemaErrors) {
$filename = (string)$filename;
$index++;
$prefix = self::renderPrefix($index, $totalFiles);
$coloredPath = Utils::printFile($filename);

if ($schemaErrors->count() > 0) {
$this->renderIssues($prefix, $schemaErrors->count(), $coloredPath);
$this->outReport($schemaErrors, 2);
} else {
$this->out("{$prefix}<green>OK</green> {$coloredPath}");
}

$this->printDumpOfSchema($filename);

$foundIssues += $schemaErrors->count();
}
$workerPool->run(
function (string $filename, ErrorSuite $schemaErrors) use (&$index, &$foundIssues, $totalFiles): void {
$index++;
$prefix = self::renderPrefix($index, $totalFiles);
$coloredPath = Utils::printFile($filename);

if ($schemaErrors->count() > 0) {
$this->renderIssues($prefix, $schemaErrors->count(), $coloredPath);
$this->outReport($schemaErrors, 2);
} else {
$this->out("{$prefix}<green>OK</green> {$coloredPath}");
}

$this->printDumpOfSchema($filename);

$foundIssues += $schemaErrors->count();
},
);

self::dumpPreloader();

return $foundIssues === 0 ? self::SUCCESS : self::FAILURE;
}
Expand Down
Loading

0 comments on commit bf9c31c

Please sign in to comment.