Skip to content

Commit

Permalink
Improve filename pattern validation and adjust opcache settings
Browse files Browse the repository at this point in the history
Enhanced error handling in filename pattern validation to identify regex related exceptions. Updated PHP settings for opcache to ensure thread-safety and prevent segmentation faults during parallel execution. Included modifications to basic configurations and addition of experimental elements in the php.ini file.
  • Loading branch information
SmetDenis committed Apr 11, 2024
1 parent 9f2883f commit d0f16ba
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 26 deletions.
12 changes: 0 additions & 12 deletions csv-blueprint.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,13 @@

namespace JBZoo\CsvBlueprint;

use JBZoo\CsvBlueprint\Workers\WorkerPool;

\define('PATH_ROOT', __DIR__);
require_once PATH_ROOT . '/vendor/autoload.php';

if ('cli' !== \PHP_SAPI) {
throw new Exception('This script must be run from the command line.');
}

WorkerPool::setBootstrap(
\file_exists(PATH_ROOT . '/docker/preload.php')
? PATH_ROOT . '/docker/preload.php'
: PATH_ROOT . '/vendor/autoload.php',
);

// Fix for GitHub actions. See action.yml
$_SERVER['argv'] = Utils::fixArgv($_SERVER['argv'] ?? []);
$_SERVER['argc'] = \count($_SERVER['argv']);

Utils::init();

(new CliApplication('CSV Blueprint', Utils::getVersion(true)))
Expand Down
3 changes: 2 additions & 1 deletion docker/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@

# Check for the presence of the "--parallel" option in the command line arguments
# If the option is present, disable the opcache for the script execution
# This is necessary because the opcache is not thread-safe and will cause segfaults
# This is necessary because the opcache is not thread-safe and will cause segfaults.
# We have to debug the segfaults and fix them, but for now, this is a workaround.
if [[ " $* " =~ " --parallel" ]] || [[ " $* " =~ " --parallel=" ]] || [[ " $* " =~ " --parallel " ]]; then
php -d opcache.enable_cli=0 /app/csv-blueprint.php "$@"
else
Expand Down
13 changes: 10 additions & 3 deletions docker/php.ini
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ opcache.memory_consumption = 200

; Experimantal for really quick script start.
; Save opcache data as files on disk inside the Docker image
; Doesn't work properly in multi-trhead mode.
;opcache.memory_consumption = 0
;opcache.lockfile_path = /app/docker/opcache.lock
;opcache.file_cache = /app/docker/
Expand All @@ -39,17 +40,23 @@ opcache.validate_timestamps = 0
opcache.validate_permission = 0
opcache.enable_file_override = 0
opcache.file_cache_consistency_checks = 0
realpath_cache_size = 64M
realpath_cache_ttl = 100000

; Base limits
; Base config
max_execution_time = 3600
memory_limit = 2G
realpath_cache_size = 64M
realpath_cache_ttl = 100000
date.timezone = UTC
precision = 14

; Security things
allow_url_fopen = 0
allow_url_include = 0

; Error handling
error_reporting = E_ALL
display_errors = On
display_startup_errors = On

; Experimental
;opcache.preload=/app/docker/preload.php
2 changes: 1 addition & 1 deletion phpunit.xml.dist
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
</include>
<report>
<clover outputFile="build/coverage_xml/main.xml"/>
<html outputDirectory="build/coverage_html" lowUpperBound="75" highLowerBound="95"/>
<html outputDirectory="build/coverage_html"/>
<php outputFile="build/coverage_cov/main.cov"/>
</report>
</coverage>
Expand Down
23 changes: 21 additions & 2 deletions src/Utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

namespace JBZoo\CsvBlueprint;

use JBZoo\CsvBlueprint\Workers\WorkerPool;
use JBZoo\Utils\Cli;
use JBZoo\Utils\Env;
use JBZoo\Utils\FS;
Expand Down Expand Up @@ -298,7 +299,11 @@ public static function testRegex(?string $regex, string $subject): bool
return false;
}

return \preg_match($regex, $subject) === 0;
try {
return \preg_match($regex, $subject) === 0;
} catch (\Throwable $exception) {
throw new Exception("Invalid regex: \"{$regex}\". Error: \"{$exception->getMessage()}\"");
}
}

/**
Expand Down Expand Up @@ -492,9 +497,23 @@ public static function getDebugMode(): bool
return self::$debugMode;
}

/**
* @SuppressWarnings(PHPMD.Superglobals)
*/
public static function init(): void
{
// Set default timezone
// Fix for GitHub actions. See action.yml
$_SERVER['argv'] = self::fixArgv($_SERVER['argv'] ?? []);
$_SERVER['argc'] = \count($_SERVER['argv']);

// Init WorkerPool aotoloader (experimental feature)
WorkerPool::setBootstrap(
\file_exists(__DIR__ . '/../docker/preload.php')
? __DIR__ . '/../docker/preload.php'
: __DIR__ . '/../vendor/autoload.php',
);

// Set default timezone to compare dates in UTC by default
\date_default_timezone_set('UTC');

// Convert all errors to exceptions. Looks like we have critical case, and we need to stop or handle it.
Expand Down
28 changes: 21 additions & 7 deletions src/Validators/ValidatorCsv.php
Original file line number Diff line number Diff line change
Expand Up @@ -209,15 +209,29 @@ private function validateFile(bool $quickStop = false): ErrorSuite
$errors = new ErrorSuite();

$filenamePattern = $this->schema->getFilenamePattern();
if (
$filenamePattern !== null
&& $filenamePattern !== ''
&& Utils::testRegex($filenamePattern, $this->csv->getCsvFilename())
) {

try {
if (
$filenamePattern !== null
&& $filenamePattern !== ''
&& Utils::testRegex($filenamePattern, $this->csv->getCsvFilename())
) {
$error = new Error(
'filename_pattern',
'Filename "<c>' . Utils::cutPath($this->csv->getCsvFilename()) . '</c>" ' .
"does not match pattern: \"<c>{$filenamePattern}</c>\"",
);

$errors->addError($error);

if ($quickStop && $errors->count() > 0) {
return $errors;
}
}
} catch (\Exception $e) {
$error = new Error(
'filename_pattern',
'Filename "<c>' . Utils::cutPath($this->csv->getCsvFilename()) . '</c>" ' .
"does not match pattern: \"<c>{$filenamePattern}</c>\"",
'Filename pattern error: ' . $e->getMessage(),
);

$errors->addError($error);
Expand Down
11 changes: 11 additions & 0 deletions tests/Validators/CsvValidatorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -582,4 +582,15 @@ public function testRequiredColumnInvalid(): void
isSame([0 => 'Name', 3 => 'Birthday'], $csv->getColumnsMappedByHeaderNamesOnly());
isSame(null, $csv->validate()->render(cleanOutput: true));
}

public function testInvalidRegex(): void
{
$csv = new CsvFile(Tools::DEMO_CSV, ['filename_pattern' => '/.*())))\.csv$/']);

isSame(
'"filename_pattern". Filename pattern error: Invalid regex: "/.*())))\.csv$/". ' .
'Error: "preg_match(): Compilation failed: unmatched closing parenthesis at offset 4".',
$csv->validate()->render(cleanOutput: true),
);
}
}

0 comments on commit d0f16ba

Please sign in to comment.