diff --git a/.github/workflows/demo.yml b/.github/workflows/demo.yml index 36533d0f..fcdbf71d 100644 --- a/.github/workflows/demo.yml +++ b/.github/workflows/demo.yml @@ -105,13 +105,12 @@ jobs: - name: 👎 Invalid CSV file run: | - docker run \ + ! docker run \ -v `pwd`:/parent-host \ --rm jbzoo/csv-blueprint \ validate:csv \ --csv=/parent-host/tests/fixtures/batch/*.csv \ --schema=/parent-host/tests/schemas/demo_invalid.yml - continue-on-error: true phar: @@ -138,11 +137,10 @@ jobs: - name: 👎 Invalid CSV file run: | - ./build/csv-blueprint.phar \ + ! ./build/csv-blueprint.phar \ validate:csv \ --csv=./tests/fixtures/batch/*.csv \ --schema=./tests/schemas/demo_invalid.yml - continue-on-error: true php: @@ -169,8 +167,7 @@ jobs: - name: 👎 Invalid CSV file run: | - ./csv-blueprint \ + ! ./csv-blueprint \ validate:csv \ --csv=./tests/fixtures/batch/*.csv \ --schema=./tests/schemas/demo_invalid.yml - continue-on-error: true diff --git a/README.md b/README.md index bb648791..c88fea3e 100644 --- a/README.md +++ b/README.md @@ -254,9 +254,16 @@ Found CSV files: 3 | 7 | 0:Name | min_length | Value "Lois" (length: 4) is too short. Min length is 5 | +------+------------+------------+----- demo-2.csv ---------------------------------------+ -(3/3) OK: ./tests/fixtures/batch/sub/demo-3.csv +(3/3) Invalid file: ./tests/fixtures/batch/sub/demo-3.csv ++------+-----------+------------------+---- demo-3.csv -------------------------------------------+ +| Line | id:Column | Rule | Message | ++------+-----------+------------------+-----------------------------------------------------------+ +| 0 | | filename_pattern | Filename "./tests/fixtures/batch/sub/demo-3.csv" does not | +| | | | match pattern: "/demo-[12].csv$/i" | ++------+-----------+------------------+---- demo-3.csv -------------------------------------------+ -Found 7 issues in 2 out of 3 CSV files. + +Found 8 issues in 3 out of 3 CSV files. ``` @@ -307,6 +314,11 @@ This gives you great flexibility when validating CSV files. ```yml # It's a full example of the CSV schema file in YAML format. +# Regular expression to match the file name. If not set, then no pattern check +# This way you can validate the file name before the validation process. +# Feel free to check parent directories as well. +filename_pattern: /demo(-\d+)?\.csv$/i + csv: # Here are default values. You can skip this section if you don't need to override the default values header: true # If the first row is a header. If true, name of each column is required delimiter: , # Delimiter character in CSV file @@ -362,6 +374,8 @@ columns: cardinal_direction: true # Valid cardinal direction. Examples: "N", "S", "NE", "SE", "none", "" usa_market_name: true # Check if the value is a valid USA market name. Example: "New York, NY" + - name: "another_column" + ``` @@ -370,7 +384,8 @@ columns: ```json { - "csv" : { + "filename_pattern" : "/demo(-\\d+)?\\.csv$/i", + "csv" : { "header" : true, "delimiter" : ",", "quote_char" : "\\", @@ -378,7 +393,7 @@ columns: "encoding" : "utf-8", "bom" : false }, - "columns" : [ + "columns" : [ { "name" : "csv_header_name", "description" : "Lorem ipsum", @@ -412,7 +427,8 @@ columns: "cardinal_direction" : true, "usa_market_name" : true } - } + }, + {"name" : "another_column"} ] } @@ -422,6 +438,7 @@ columns: +
Click to see: PHP Format @@ -430,6 +447,8 @@ columns: declare(strict_types=1); return [ + 'filename_pattern' => '/demo(-\\d+)?\\.csv$/i', + 'csv' => [ 'header' => true, 'delimiter' => ',', @@ -438,6 +457,7 @@ return [ 'encoding' => 'utf-8', 'bom' => false, ], + 'columns' => [ [ 'name' => 'csv_header_name', @@ -473,6 +493,7 @@ return [ 'usa_market_name' => true, ], ], + ['name' => 'another_column'], ], ]; @@ -481,6 +502,7 @@ return [
+ ## Coming soon It's random ideas and plans. No orderings and deadlines. But batch processing is the priority #1. @@ -494,7 +516,7 @@ Batch processing * [ ] Discovering CSV files by `filename_pattern` in the schema file. In case you have a lot of schemas and a lot of CSV files and want to automate the process as one command. Validation -* [ ] `filename_pattern` validation with regex (like "all files in the folder should be in the format `/^[\d]{4}-[\d]{2}-[\d]{2}\.csv$/`"). +* [x] ~~`filename_pattern` validation with regex (like "all files in the folder should be in the format `/^[\d]{4}-[\d]{2}-[\d]{2}\.csv$/`").~~ * [ ] Agregate rules (like "at least one of the fields should be not empty" or "all values must be unique"). * [ ] Handle empty files and files with only a header row, or only with one line of data. One column wthout header is also possible. * [ ] Using multiple schemas for one csv file. diff --git a/schema-examples/full.json b/schema-examples/full.json index 51bf4158..17131304 100644 --- a/schema-examples/full.json +++ b/schema-examples/full.json @@ -1,5 +1,6 @@ { - "csv" : { + "filename_pattern" : "/demo(-\\d+)?\\.csv$/i", + "csv" : { "header" : true, "delimiter" : ",", "quote_char" : "\\", @@ -7,7 +8,7 @@ "encoding" : "utf-8", "bom" : false }, - "columns" : [ + "columns" : [ { "name" : "csv_header_name", "description" : "Lorem ipsum", @@ -41,6 +42,7 @@ "cardinal_direction" : true, "usa_market_name" : true } - } + }, + {"name" : "another_column"} ] } diff --git a/schema-examples/full.php b/schema-examples/full.php index b5bae14a..66c7542a 100644 --- a/schema-examples/full.php +++ b/schema-examples/full.php @@ -15,6 +15,8 @@ declare(strict_types=1); return [ + 'filename_pattern' => '/demo(-\\d+)?\\.csv$/i', + 'csv' => [ 'header' => true, 'delimiter' => ',', @@ -23,6 +25,7 @@ 'encoding' => 'utf-8', 'bom' => false, ], + 'columns' => [ [ 'name' => 'csv_header_name', @@ -58,5 +61,6 @@ 'usa_market_name' => true, ], ], + ['name' => 'another_column'], ], ]; diff --git a/schema-examples/full.yml b/schema-examples/full.yml index 402d526d..98036c1c 100644 --- a/schema-examples/full.yml +++ b/schema-examples/full.yml @@ -12,6 +12,11 @@ # It's a full example of the CSV schema file in YAML format. +# Regular expression to match the file name. If not set, then no pattern check +# This way you can validate the file name before the validation process. +# Feel free to check parent directories as well. +filename_pattern: /demo(-\d+)?\.csv$/i + csv: # Here are default values. You can skip this section if you don't need to override the default values header: true # If the first row is a header. If true, name of each column is required delimiter: , # Delimiter character in CSV file @@ -66,3 +71,5 @@ columns: is_longitude: true # Can be integer or float. Example: -89.123456 cardinal_direction: true # Valid cardinal direction. Examples: "N", "S", "NE", "SE", "none", "" usa_market_name: true # Check if the value is a valid USA market name. Example: "New York, NY" + + - name: "another_column" diff --git a/src/Csv/CsvFile.php b/src/Csv/CsvFile.php index c6372e2c..65d52eb1 100644 --- a/src/Csv/CsvFile.php +++ b/src/Csv/CsvFile.php @@ -17,6 +17,7 @@ namespace JBZoo\CsvBlueprint\Csv; use JBZoo\CsvBlueprint\Schema; +use JBZoo\CsvBlueprint\Utils; use JBZoo\CsvBlueprint\Validators\Error; use JBZoo\CsvBlueprint\Validators\ErrorSuite; use League\Csv\Reader as LeagueReader; @@ -82,7 +83,9 @@ public function validate(bool $quickStop = false): ErrorSuite { $errors = new ErrorSuite($this->getCsvFilename()); - $errors->addErrorSuit($this->validateHeader()) + $errors + ->addErrorSuit($this->validateFile($quickStop)) + ->addErrorSuit($this->validateHeader($quickStop)) ->addErrorSuit($this->validateEachCell($quickStop)) ->addErrorSuit(self::validateAggregateRules($quickStop)); @@ -106,7 +109,7 @@ private function prepareReader(): LeagueReader return $reader; } - private function validateHeader(): ErrorSuite + private function validateHeader(bool $quickStop = false): ErrorSuite { $errors = new ErrorSuite(); @@ -125,6 +128,10 @@ private function validateHeader(): ErrorSuite $errors->addError($error); } + + if ($quickStop && $errors->count() > 0) { + return $errors; + } } return $errors; @@ -152,6 +159,34 @@ private function validateEachCell(bool $quickStop = false): ErrorSuite return $errors; } + private function validateFile(bool $quickStop = false): ErrorSuite + { + $errors = new ErrorSuite(); + + $filenamePattern = $this->schema->getFilenamePattern(); + if ( + $filenamePattern !== null + && $filenamePattern !== '' + && \preg_match($filenamePattern, $this->csvFilename) === 0 + ) { + $error = new Error( + 'filename_pattern', + 'Filename "' . Utils::cutPath($this->csvFilename) . + "\" does not match pattern: \"{$filenamePattern}\"", + '', + 0, + ); + + $errors->addError($error); + + if ($quickStop && $errors->count() > 0) { + return $errors; + } + } + + return $errors; + } + private static function validateAggregateRules(bool $quickStop = false): ErrorSuite { $errors = new ErrorSuite(); diff --git a/src/Schema.php b/src/Schema.php index 1ebf508f..815d6708 100644 --- a/src/Schema.php +++ b/src/Schema.php @@ -114,9 +114,9 @@ public function getColumn(int|string $columNameOrId): ?Column return $column; } - public function getFinenamePattern(): ?string + public function getFilenamePattern(): ?string { - return $this->data->getStringNull('finename_pattern'); + return Utils::prepareRegex($this->data->getStringNull('filename_pattern')); } public function getIncludes(): array diff --git a/src/Utils.php b/src/Utils.php index b18b8a70..02b99f78 100644 --- a/src/Utils.php +++ b/src/Utils.php @@ -47,7 +47,7 @@ public static function prepareRegex(?string $pattern, string $addDelimiter = '/' } } - return $addDelimiter . $pattern . $addDelimiter . 'u'; + return $addDelimiter . $pattern . $addDelimiter; } /** diff --git a/tests/Blueprint/MiscTest.php b/tests/Blueprint/MiscTest.php index 8c114b5d..04b0d277 100644 --- a/tests/Blueprint/MiscTest.php +++ b/tests/Blueprint/MiscTest.php @@ -47,7 +47,7 @@ public function testPrepareRegex(): void { isSame(null, Utils::prepareRegex(null)); isSame(null, Utils::prepareRegex('')); - isSame('/.*/u', Utils::prepareRegex('.*')); + isSame('/.*/', Utils::prepareRegex('.*')); isSame('#.*#u', Utils::prepareRegex('#.*#u')); isSame('/.*/', Utils::prepareRegex('/.*/')); isSame('/.*/ius', Utils::prepareRegex('/.*/ius')); diff --git a/tests/Blueprint/RulesTest.php b/tests/Blueprint/RulesTest.php index 43eb878f..69f97883 100644 --- a/tests/Blueprint/RulesTest.php +++ b/tests/Blueprint/RulesTest.php @@ -591,7 +591,7 @@ public function testRegex(): void isSame(null, $rule->validate('aaa')); isSame(null, $rule->validate('a')); isSame( - '"regex" at line 0, column "prop". Value "1bc" does not match the pattern "/^a/u".', + '"regex" at line 0, column "prop". Value "1bc" does not match the pattern "/^a/".', \strip_tags((string)$rule->validate('1bc')), ); } diff --git a/tests/Blueprint/SchemaTest.php b/tests/Blueprint/SchemaTest.php index aea2416a..98e85a44 100644 --- a/tests/Blueprint/SchemaTest.php +++ b/tests/Blueprint/SchemaTest.php @@ -44,10 +44,10 @@ public function testFilename(): void public function testGetFinenamePattern(): void { $schemaEmpty = new Schema(self::SCHEMA_EXAMPLE_EMPTY); - isSame(null, $schemaEmpty->getFinenamePattern()); + isSame(null, $schemaEmpty->getFilenamePattern()); $schemaFull = new Schema(self::SCHEMA_EXAMPLE_FULL); - isSame('^example\.csv$', $schemaFull->getFinenamePattern()); + isSame('/^example\.csv$/', $schemaFull->getFilenamePattern()); } public function testScvStruture(): void diff --git a/tests/Blueprint/ValidateCsvTest.php b/tests/Blueprint/ValidateCsvTest.php index 6ef0cb9e..a7bc0ec8 100644 --- a/tests/Blueprint/ValidateCsvTest.php +++ b/tests/Blueprint/ValidateCsvTest.php @@ -64,24 +64,26 @@ public function testValidateOneFileNegativeTable(): void Found CSV files: 1 (1/1) Invalid file: ./tests/fixtures/demo.csv - +------+------------------+--------------+-- demo.csv --------------------------------------------+ - | Line | id:Column | Rule | Message | - +------+------------------+--------------+--------------------------------------------------------+ - | 5 | 2:Float | max | Value "74605.944" is greater than "74605" | - | 5 | 4:Favorite color | allow_values | Value "blue" is not allowed. Allowed values: ["red", | - | | | | "green", "Blue"] | - | 6 | 0:Name | min_length | Value "Carl" (length: 4) is too short. Min length is 5 | - | 6 | 3:Birthday | min_date | Value "1955-05-14" is less than the minimum date | - | | | | "1955-05-15T00:00:00.000+00:00" | - | 8 | 3:Birthday | min_date | Value "1955-05-14" is less than the minimum date | - | | | | "1955-05-15T00:00:00.000+00:00" | - | 9 | 3:Birthday | max_date | Value "2010-07-20" is more than the maximum date | - | | | | "2009-01-01T00:00:00.000+00:00" | - | 11 | 0:Name | min_length | Value "Lois" (length: 4) is too short. Min length is 5 | - +------+------------------+--------------+-- demo.csv --------------------------------------------+ - - - Found 7 issues in CSV file. + +------+------------------+------------------+--- demo.csv -------------------------------------------------+ + | Line | id:Column | Rule | Message | + +------+------------------+------------------+--------------------------------------------------------------+ + | 0 | | filename_pattern | Filename "./tests/fixtures/demo.csv" does not match pattern: | + | | | | "/demo-[12].csv$/i" | + | 5 | 2:Float | max | Value "74605.944" is greater than "74605" | + | 5 | 4:Favorite color | allow_values | Value "blue" is not allowed. Allowed values: ["red", | + | | | | "green", "Blue"] | + | 6 | 0:Name | min_length | Value "Carl" (length: 4) is too short. Min length is 5 | + | 6 | 3:Birthday | min_date | Value "1955-05-14" is less than the minimum date | + | | | | "1955-05-15T00:00:00.000+00:00" | + | 8 | 3:Birthday | min_date | Value "1955-05-14" is less than the minimum date | + | | | | "1955-05-15T00:00:00.000+00:00" | + | 9 | 3:Birthday | max_date | Value "2010-07-20" is more than the maximum date | + | | | | "2009-01-01T00:00:00.000+00:00" | + | 11 | 0:Name | min_length | Value "Lois" (length: 4) is too short. Min length is 5 | + +------+------------------+------------------+--- demo.csv -------------------------------------------------+ + + + Found 8 issues in CSV file. TXT; @@ -103,7 +105,7 @@ public function testValidateManyFileNegativeTable(): void $expected = <<<'TXT' Schema: ./tests/schemas/demo_invalid.yml Found CSV files: 3 - + (1/3) Invalid file: ./tests/fixtures/batch/demo-1.csv +------+------------------+--------------+ demo-1.csv ------------------------------------------+ | Line | id:Column | Rule | Message | @@ -127,9 +129,16 @@ public function testValidateManyFileNegativeTable(): void | 7 | 0:Name | min_length | Value "Lois" (length: 4) is too short. Min length is 5 | +------+------------+------------+----- demo-2.csv ---------------------------------------+ - (3/3) OK: ./tests/fixtures/batch/sub/demo-3.csv + (3/3) Invalid file: ./tests/fixtures/batch/sub/demo-3.csv + +------+-----------+------------------+---- demo-3.csv -------------------------------------------+ + | Line | id:Column | Rule | Message | + +------+-----------+------------------+-----------------------------------------------------------+ + | 0 | | filename_pattern | Filename "./tests/fixtures/batch/sub/demo-3.csv" does not | + | | | | match pattern: "/demo-[12].csv$/i" | + +------+-----------+------------------+---- demo-3.csv -------------------------------------------+ + - Found 7 issues in 2 out of 3 CSV files. + Found 8 issues in 3 out of 3 CSV files. TXT; @@ -152,6 +161,7 @@ public function testValidateOneFileNegativeText(): void Found CSV files: 1 (1/1) Invalid file: ./tests/fixtures/demo.csv + "filename_pattern" at line 0, column "". Filename "./tests/fixtures/demo.csv" does not match pattern: "/demo-[12].csv$/i". "max" at line 5, column "2:Float". Value "74605.944" is greater than "74605". "allow_values" at line 5, column "4:Favorite color". Value "blue" is not allowed. Allowed values: ["red", "green", "Blue"]. "min_length" at line 6, column "0:Name". Value "Carl" (length: 4) is too short. Min length is 5. @@ -161,7 +171,7 @@ public function testValidateOneFileNegativeText(): void "min_length" at line 11, column "0:Name". Value "Lois" (length: 4) is too short. Min length is 5. - Found 7 issues in CSV file. + Found 8 issues in CSV file. TXT; @@ -249,9 +259,11 @@ public function testValidateManyFilesNegativeTextQuick(): void "max_date" at line 5, column "3:Birthday". Value "2010-07-20" is more than the maximum date "2009-01-01T00:00:00.000+00:00". "min_length" at line 7, column "0:Name". Value "Lois" (length: 4) is too short. Min length is 5. - (3/3) OK: ./tests/fixtures/batch/sub/demo-3.csv + (3/3) Invalid file: ./tests/fixtures/batch/sub/demo-3.csv + "filename_pattern" at line 0, column "". Filename "./tests/fixtures/batch/sub/demo-3.csv" does not match pattern: "/demo-[12].csv$/i". - Found 7 issues in 2 out of 3 CSV files. + + Found 8 issues in 3 out of 3 CSV files. TXT; @@ -319,9 +331,20 @@ public function testCreateValidateNegativeTeamcity(): void ##teamcity[testSuiteFinished name='demo-2.csv' flowId='42'] - (3/3) OK: ./tests/fixtures/batch/sub/demo-3.csv + (3/3) Invalid file: ./tests/fixtures/batch/sub/demo-3.csv + + ##teamcity[testCount count='1' flowId='42'] + + ##teamcity[testSuiteStarted name='demo-3.csv' flowId='42'] + + ##teamcity[testStarted name='filename_pattern at column' locationHint='php_qn://./tests/fixtures/batch/sub/demo-3.csv' flowId='42'] + "filename_pattern" at line 0, column "". Filename "./tests/fixtures/batch/sub/demo-3.csv" does not match pattern: "/demo-[12].csv$/i". + ##teamcity[testFinished name='filename_pattern at column' flowId='42'] + + ##teamcity[testSuiteFinished name='demo-3.csv' flowId='42'] + - Found 7 issues in 2 out of 3 CSV files. + Found 8 issues in 3 out of 3 CSV files. TXT; diff --git a/tests/Blueprint/ValidatorTest.php b/tests/Blueprint/ValidatorTest.php index 259b0b0a..4c6a0ffa 100644 --- a/tests/Blueprint/ValidatorTest.php +++ b/tests/Blueprint/ValidatorTest.php @@ -141,7 +141,7 @@ public function testRegex(): void $csv = new CsvFile(self::CSV_COMPLEX, $this->getRule('seq', 'regex', '[a-z]')); isSame( - '"regex" at line 2, column "0:seq". Value "1" does not match the pattern "/[a-z]/u".', + '"regex" at line 2, column "0:seq". Value "1" does not match the pattern "/[a-z]/".', \strip_tags((string)$csv->validate()->get(0)), ); @@ -564,6 +564,25 @@ public function testGetAvaiableRenderFormats(): void ], ErrorSuite::getAvaiableRenderFormats()); } + public function testFilenamePattern(): void + { + $csv = new CsvFile(self::CSV_COMPLEX, ['filename_pattern' => '/demo(-\\d+)?\\.csv$/']); + isSame( + '"filename_pattern" at line 0, column "". ' . + 'Filename "./tests/fixtures/complex_header.csv" does not match pattern: "/demo(-\d+)?\.csv$/".', + \strip_tags((string)$csv->validate()->get(0)), + ); + + $csv = new CsvFile(self::CSV_COMPLEX, ['filename_pattern' => '']); + isSame('', (string)$csv->validate()); + + $csv = new CsvFile(self::CSV_COMPLEX, ['filename_pattern' => null]); + isSame('', (string)$csv->validate()); + + $csv = new CsvFile(self::CSV_COMPLEX, ['filename_pattern' => '/.*\.csv$/']); + isSame('', (string)$csv->validate()); + } + private function getRule(?string $columnName, ?string $ruleName, array|bool|float|int|string $options): array { return ['columns' => [['name' => $columnName, 'rules' => [$ruleName => $options]]]]; diff --git a/tests/schemas/demo_invalid.yml b/tests/schemas/demo_invalid.yml index e6034d35..eee67f29 100644 --- a/tests/schemas/demo_invalid.yml +++ b/tests/schemas/demo_invalid.yml @@ -12,6 +12,8 @@ # This schema is invalid because does not match the CSV file (tests/fixtures/demo.csv). +filename_pattern: /demo-[12].csv$/i + columns: - name: Name rules: diff --git a/tests/schemas/demo_valid.yml b/tests/schemas/demo_valid.yml index 3dd44214..d366cfa9 100644 --- a/tests/schemas/demo_valid.yml +++ b/tests/schemas/demo_valid.yml @@ -12,6 +12,8 @@ # This schema is valid because match the CSV file (tests/fixtures/demo.csv) perfectly. +filename_pattern: /demo(-\d+)?\.csv$/ + columns: - name: Name rules: diff --git a/tests/schemas/example_full.yml b/tests/schemas/example_full.yml index 51fdb55f..7c6c5e1e 100644 --- a/tests/schemas/example_full.yml +++ b/tests/schemas/example_full.yml @@ -17,7 +17,7 @@ # Created: 2023-03-09 # File name pattern to match. If not set, then no pattern check -finename_pattern: ^example\.csv$ +filename_pattern: ^example\.csv$ # Include another schemas