diff --git a/.github/workflows/demo.yml b/.github/workflows/demo.yml
index 36533d0f..fcdbf71d 100644
--- a/.github/workflows/demo.yml
+++ b/.github/workflows/demo.yml
@@ -105,13 +105,12 @@ jobs:
- name: 👎 Invalid CSV file
run: |
- docker run \
+ ! docker run \
-v `pwd`:/parent-host \
--rm jbzoo/csv-blueprint \
validate:csv \
--csv=/parent-host/tests/fixtures/batch/*.csv \
--schema=/parent-host/tests/schemas/demo_invalid.yml
- continue-on-error: true
phar:
@@ -138,11 +137,10 @@ jobs:
- name: 👎 Invalid CSV file
run: |
- ./build/csv-blueprint.phar \
+ ! ./build/csv-blueprint.phar \
validate:csv \
--csv=./tests/fixtures/batch/*.csv \
--schema=./tests/schemas/demo_invalid.yml
- continue-on-error: true
php:
@@ -169,8 +167,7 @@ jobs:
- name: 👎 Invalid CSV file
run: |
- ./csv-blueprint \
+ ! ./csv-blueprint \
validate:csv \
--csv=./tests/fixtures/batch/*.csv \
--schema=./tests/schemas/demo_invalid.yml
- continue-on-error: true
diff --git a/README.md b/README.md
index bb648791..c88fea3e 100644
--- a/README.md
+++ b/README.md
@@ -254,9 +254,16 @@ Found CSV files: 3
| 7 | 0:Name | min_length | Value "Lois" (length: 4) is too short. Min length is 5 |
+------+------------+------------+----- demo-2.csv ---------------------------------------+
-(3/3) OK: ./tests/fixtures/batch/sub/demo-3.csv
+(3/3) Invalid file: ./tests/fixtures/batch/sub/demo-3.csv
++------+-----------+------------------+---- demo-3.csv -------------------------------------------+
+| Line | id:Column | Rule | Message |
++------+-----------+------------------+-----------------------------------------------------------+
+| 0 | | filename_pattern | Filename "./tests/fixtures/batch/sub/demo-3.csv" does not |
+| | | | match pattern: "/demo-[12].csv$/i" |
++------+-----------+------------------+---- demo-3.csv -------------------------------------------+
-Found 7 issues in 2 out of 3 CSV files.
+
+Found 8 issues in 3 out of 3 CSV files.
```
@@ -307,6 +314,11 @@ This gives you great flexibility when validating CSV files.
```yml
# It's a full example of the CSV schema file in YAML format.
+# Regular expression to match the file name. If not set, then no pattern check
+# This way you can validate the file name before the validation process.
+# Feel free to check parent directories as well.
+filename_pattern: /demo(-\d+)?\.csv$/i
+
csv: # Here are default values. You can skip this section if you don't need to override the default values
header: true # If the first row is a header. If true, name of each column is required
delimiter: , # Delimiter character in CSV file
@@ -362,6 +374,8 @@ columns:
cardinal_direction: true # Valid cardinal direction. Examples: "N", "S", "NE", "SE", "none", ""
usa_market_name: true # Check if the value is a valid USA market name. Example: "New York, NY"
+ - name: "another_column"
+
```
@@ -370,7 +384,8 @@ columns:
```json
{
- "csv" : {
+ "filename_pattern" : "/demo(-\\d+)?\\.csv$/i",
+ "csv" : {
"header" : true,
"delimiter" : ",",
"quote_char" : "\\",
@@ -378,7 +393,7 @@ columns:
"encoding" : "utf-8",
"bom" : false
},
- "columns" : [
+ "columns" : [
{
"name" : "csv_header_name",
"description" : "Lorem ipsum",
@@ -412,7 +427,8 @@ columns:
"cardinal_direction" : true,
"usa_market_name" : true
}
- }
+ },
+ {"name" : "another_column"}
]
}
@@ -422,6 +438,7 @@ columns:
+
Click to see: PHP Format
@@ -430,6 +447,8 @@ columns:
declare(strict_types=1);
return [
+ 'filename_pattern' => '/demo(-\\d+)?\\.csv$/i',
+
'csv' => [
'header' => true,
'delimiter' => ',',
@@ -438,6 +457,7 @@ return [
'encoding' => 'utf-8',
'bom' => false,
],
+
'columns' => [
[
'name' => 'csv_header_name',
@@ -473,6 +493,7 @@ return [
'usa_market_name' => true,
],
],
+ ['name' => 'another_column'],
],
];
@@ -481,6 +502,7 @@ return [
+
## Coming soon
It's random ideas and plans. No orderings and deadlines. But batch processing is the priority #1.
@@ -494,7 +516,7 @@ Batch processing
* [ ] Discovering CSV files by `filename_pattern` in the schema file. In case you have a lot of schemas and a lot of CSV files and want to automate the process as one command.
Validation
-* [ ] `filename_pattern` validation with regex (like "all files in the folder should be in the format `/^[\d]{4}-[\d]{2}-[\d]{2}\.csv$/`").
+* [x] ~~`filename_pattern` validation with regex (like "all files in the folder should be in the format `/^[\d]{4}-[\d]{2}-[\d]{2}\.csv$/`").~~
* [ ] Agregate rules (like "at least one of the fields should be not empty" or "all values must be unique").
* [ ] Handle empty files and files with only a header row, or only with one line of data. One column wthout header is also possible.
* [ ] Using multiple schemas for one csv file.
diff --git a/schema-examples/full.json b/schema-examples/full.json
index 51bf4158..17131304 100644
--- a/schema-examples/full.json
+++ b/schema-examples/full.json
@@ -1,5 +1,6 @@
{
- "csv" : {
+ "filename_pattern" : "/demo(-\\d+)?\\.csv$/i",
+ "csv" : {
"header" : true,
"delimiter" : ",",
"quote_char" : "\\",
@@ -7,7 +8,7 @@
"encoding" : "utf-8",
"bom" : false
},
- "columns" : [
+ "columns" : [
{
"name" : "csv_header_name",
"description" : "Lorem ipsum",
@@ -41,6 +42,7 @@
"cardinal_direction" : true,
"usa_market_name" : true
}
- }
+ },
+ {"name" : "another_column"}
]
}
diff --git a/schema-examples/full.php b/schema-examples/full.php
index b5bae14a..66c7542a 100644
--- a/schema-examples/full.php
+++ b/schema-examples/full.php
@@ -15,6 +15,8 @@
declare(strict_types=1);
return [
+ 'filename_pattern' => '/demo(-\\d+)?\\.csv$/i',
+
'csv' => [
'header' => true,
'delimiter' => ',',
@@ -23,6 +25,7 @@
'encoding' => 'utf-8',
'bom' => false,
],
+
'columns' => [
[
'name' => 'csv_header_name',
@@ -58,5 +61,6 @@
'usa_market_name' => true,
],
],
+ ['name' => 'another_column'],
],
];
diff --git a/schema-examples/full.yml b/schema-examples/full.yml
index 402d526d..98036c1c 100644
--- a/schema-examples/full.yml
+++ b/schema-examples/full.yml
@@ -12,6 +12,11 @@
# It's a full example of the CSV schema file in YAML format.
+# Regular expression to match the file name. If not set, then no pattern check
+# This way you can validate the file name before the validation process.
+# Feel free to check parent directories as well.
+filename_pattern: /demo(-\d+)?\.csv$/i
+
csv: # Here are default values. You can skip this section if you don't need to override the default values
header: true # If the first row is a header. If true, name of each column is required
delimiter: , # Delimiter character in CSV file
@@ -66,3 +71,5 @@ columns:
is_longitude: true # Can be integer or float. Example: -89.123456
cardinal_direction: true # Valid cardinal direction. Examples: "N", "S", "NE", "SE", "none", ""
usa_market_name: true # Check if the value is a valid USA market name. Example: "New York, NY"
+
+ - name: "another_column"
diff --git a/src/Csv/CsvFile.php b/src/Csv/CsvFile.php
index c6372e2c..65d52eb1 100644
--- a/src/Csv/CsvFile.php
+++ b/src/Csv/CsvFile.php
@@ -17,6 +17,7 @@
namespace JBZoo\CsvBlueprint\Csv;
use JBZoo\CsvBlueprint\Schema;
+use JBZoo\CsvBlueprint\Utils;
use JBZoo\CsvBlueprint\Validators\Error;
use JBZoo\CsvBlueprint\Validators\ErrorSuite;
use League\Csv\Reader as LeagueReader;
@@ -82,7 +83,9 @@ public function validate(bool $quickStop = false): ErrorSuite
{
$errors = new ErrorSuite($this->getCsvFilename());
- $errors->addErrorSuit($this->validateHeader())
+ $errors
+ ->addErrorSuit($this->validateFile($quickStop))
+ ->addErrorSuit($this->validateHeader($quickStop))
->addErrorSuit($this->validateEachCell($quickStop))
->addErrorSuit(self::validateAggregateRules($quickStop));
@@ -106,7 +109,7 @@ private function prepareReader(): LeagueReader
return $reader;
}
- private function validateHeader(): ErrorSuite
+ private function validateHeader(bool $quickStop = false): ErrorSuite
{
$errors = new ErrorSuite();
@@ -125,6 +128,10 @@ private function validateHeader(): ErrorSuite
$errors->addError($error);
}
+
+ if ($quickStop && $errors->count() > 0) {
+ return $errors;
+ }
}
return $errors;
@@ -152,6 +159,34 @@ private function validateEachCell(bool $quickStop = false): ErrorSuite
return $errors;
}
+ private function validateFile(bool $quickStop = false): ErrorSuite
+ {
+ $errors = new ErrorSuite();
+
+ $filenamePattern = $this->schema->getFilenamePattern();
+ if (
+ $filenamePattern !== null
+ && $filenamePattern !== ''
+ && \preg_match($filenamePattern, $this->csvFilename) === 0
+ ) {
+ $error = new Error(
+ 'filename_pattern',
+ 'Filename "' . Utils::cutPath($this->csvFilename) .
+ "\" does not match pattern: \"{$filenamePattern}\"",
+ '',
+ 0,
+ );
+
+ $errors->addError($error);
+
+ if ($quickStop && $errors->count() > 0) {
+ return $errors;
+ }
+ }
+
+ return $errors;
+ }
+
private static function validateAggregateRules(bool $quickStop = false): ErrorSuite
{
$errors = new ErrorSuite();
diff --git a/src/Schema.php b/src/Schema.php
index 1ebf508f..815d6708 100644
--- a/src/Schema.php
+++ b/src/Schema.php
@@ -114,9 +114,9 @@ public function getColumn(int|string $columNameOrId): ?Column
return $column;
}
- public function getFinenamePattern(): ?string
+ public function getFilenamePattern(): ?string
{
- return $this->data->getStringNull('finename_pattern');
+ return Utils::prepareRegex($this->data->getStringNull('filename_pattern'));
}
public function getIncludes(): array
diff --git a/src/Utils.php b/src/Utils.php
index b18b8a70..02b99f78 100644
--- a/src/Utils.php
+++ b/src/Utils.php
@@ -47,7 +47,7 @@ public static function prepareRegex(?string $pattern, string $addDelimiter = '/'
}
}
- return $addDelimiter . $pattern . $addDelimiter . 'u';
+ return $addDelimiter . $pattern . $addDelimiter;
}
/**
diff --git a/tests/Blueprint/MiscTest.php b/tests/Blueprint/MiscTest.php
index 8c114b5d..04b0d277 100644
--- a/tests/Blueprint/MiscTest.php
+++ b/tests/Blueprint/MiscTest.php
@@ -47,7 +47,7 @@ public function testPrepareRegex(): void
{
isSame(null, Utils::prepareRegex(null));
isSame(null, Utils::prepareRegex(''));
- isSame('/.*/u', Utils::prepareRegex('.*'));
+ isSame('/.*/', Utils::prepareRegex('.*'));
isSame('#.*#u', Utils::prepareRegex('#.*#u'));
isSame('/.*/', Utils::prepareRegex('/.*/'));
isSame('/.*/ius', Utils::prepareRegex('/.*/ius'));
diff --git a/tests/Blueprint/RulesTest.php b/tests/Blueprint/RulesTest.php
index 43eb878f..69f97883 100644
--- a/tests/Blueprint/RulesTest.php
+++ b/tests/Blueprint/RulesTest.php
@@ -591,7 +591,7 @@ public function testRegex(): void
isSame(null, $rule->validate('aaa'));
isSame(null, $rule->validate('a'));
isSame(
- '"regex" at line 0, column "prop". Value "1bc" does not match the pattern "/^a/u".',
+ '"regex" at line 0, column "prop". Value "1bc" does not match the pattern "/^a/".',
\strip_tags((string)$rule->validate('1bc')),
);
}
diff --git a/tests/Blueprint/SchemaTest.php b/tests/Blueprint/SchemaTest.php
index aea2416a..98e85a44 100644
--- a/tests/Blueprint/SchemaTest.php
+++ b/tests/Blueprint/SchemaTest.php
@@ -44,10 +44,10 @@ public function testFilename(): void
public function testGetFinenamePattern(): void
{
$schemaEmpty = new Schema(self::SCHEMA_EXAMPLE_EMPTY);
- isSame(null, $schemaEmpty->getFinenamePattern());
+ isSame(null, $schemaEmpty->getFilenamePattern());
$schemaFull = new Schema(self::SCHEMA_EXAMPLE_FULL);
- isSame('^example\.csv$', $schemaFull->getFinenamePattern());
+ isSame('/^example\.csv$/', $schemaFull->getFilenamePattern());
}
public function testScvStruture(): void
diff --git a/tests/Blueprint/ValidateCsvTest.php b/tests/Blueprint/ValidateCsvTest.php
index 6ef0cb9e..a7bc0ec8 100644
--- a/tests/Blueprint/ValidateCsvTest.php
+++ b/tests/Blueprint/ValidateCsvTest.php
@@ -64,24 +64,26 @@ public function testValidateOneFileNegativeTable(): void
Found CSV files: 1
(1/1) Invalid file: ./tests/fixtures/demo.csv
- +------+------------------+--------------+-- demo.csv --------------------------------------------+
- | Line | id:Column | Rule | Message |
- +------+------------------+--------------+--------------------------------------------------------+
- | 5 | 2:Float | max | Value "74605.944" is greater than "74605" |
- | 5 | 4:Favorite color | allow_values | Value "blue" is not allowed. Allowed values: ["red", |
- | | | | "green", "Blue"] |
- | 6 | 0:Name | min_length | Value "Carl" (length: 4) is too short. Min length is 5 |
- | 6 | 3:Birthday | min_date | Value "1955-05-14" is less than the minimum date |
- | | | | "1955-05-15T00:00:00.000+00:00" |
- | 8 | 3:Birthday | min_date | Value "1955-05-14" is less than the minimum date |
- | | | | "1955-05-15T00:00:00.000+00:00" |
- | 9 | 3:Birthday | max_date | Value "2010-07-20" is more than the maximum date |
- | | | | "2009-01-01T00:00:00.000+00:00" |
- | 11 | 0:Name | min_length | Value "Lois" (length: 4) is too short. Min length is 5 |
- +------+------------------+--------------+-- demo.csv --------------------------------------------+
-
-
- Found 7 issues in CSV file.
+ +------+------------------+------------------+--- demo.csv -------------------------------------------------+
+ | Line | id:Column | Rule | Message |
+ +------+------------------+------------------+--------------------------------------------------------------+
+ | 0 | | filename_pattern | Filename "./tests/fixtures/demo.csv" does not match pattern: |
+ | | | | "/demo-[12].csv$/i" |
+ | 5 | 2:Float | max | Value "74605.944" is greater than "74605" |
+ | 5 | 4:Favorite color | allow_values | Value "blue" is not allowed. Allowed values: ["red", |
+ | | | | "green", "Blue"] |
+ | 6 | 0:Name | min_length | Value "Carl" (length: 4) is too short. Min length is 5 |
+ | 6 | 3:Birthday | min_date | Value "1955-05-14" is less than the minimum date |
+ | | | | "1955-05-15T00:00:00.000+00:00" |
+ | 8 | 3:Birthday | min_date | Value "1955-05-14" is less than the minimum date |
+ | | | | "1955-05-15T00:00:00.000+00:00" |
+ | 9 | 3:Birthday | max_date | Value "2010-07-20" is more than the maximum date |
+ | | | | "2009-01-01T00:00:00.000+00:00" |
+ | 11 | 0:Name | min_length | Value "Lois" (length: 4) is too short. Min length is 5 |
+ +------+------------------+------------------+--- demo.csv -------------------------------------------------+
+
+
+ Found 8 issues in CSV file.
TXT;
@@ -103,7 +105,7 @@ public function testValidateManyFileNegativeTable(): void
$expected = <<<'TXT'
Schema: ./tests/schemas/demo_invalid.yml
Found CSV files: 3
-
+
(1/3) Invalid file: ./tests/fixtures/batch/demo-1.csv
+------+------------------+--------------+ demo-1.csv ------------------------------------------+
| Line | id:Column | Rule | Message |
@@ -127,9 +129,16 @@ public function testValidateManyFileNegativeTable(): void
| 7 | 0:Name | min_length | Value "Lois" (length: 4) is too short. Min length is 5 |
+------+------------+------------+----- demo-2.csv ---------------------------------------+
- (3/3) OK: ./tests/fixtures/batch/sub/demo-3.csv
+ (3/3) Invalid file: ./tests/fixtures/batch/sub/demo-3.csv
+ +------+-----------+------------------+---- demo-3.csv -------------------------------------------+
+ | Line | id:Column | Rule | Message |
+ +------+-----------+------------------+-----------------------------------------------------------+
+ | 0 | | filename_pattern | Filename "./tests/fixtures/batch/sub/demo-3.csv" does not |
+ | | | | match pattern: "/demo-[12].csv$/i" |
+ +------+-----------+------------------+---- demo-3.csv -------------------------------------------+
+
- Found 7 issues in 2 out of 3 CSV files.
+ Found 8 issues in 3 out of 3 CSV files.
TXT;
@@ -152,6 +161,7 @@ public function testValidateOneFileNegativeText(): void
Found CSV files: 1
(1/1) Invalid file: ./tests/fixtures/demo.csv
+ "filename_pattern" at line 0, column "". Filename "./tests/fixtures/demo.csv" does not match pattern: "/demo-[12].csv$/i".
"max" at line 5, column "2:Float". Value "74605.944" is greater than "74605".
"allow_values" at line 5, column "4:Favorite color". Value "blue" is not allowed. Allowed values: ["red", "green", "Blue"].
"min_length" at line 6, column "0:Name". Value "Carl" (length: 4) is too short. Min length is 5.
@@ -161,7 +171,7 @@ public function testValidateOneFileNegativeText(): void
"min_length" at line 11, column "0:Name". Value "Lois" (length: 4) is too short. Min length is 5.
- Found 7 issues in CSV file.
+ Found 8 issues in CSV file.
TXT;
@@ -249,9 +259,11 @@ public function testValidateManyFilesNegativeTextQuick(): void
"max_date" at line 5, column "3:Birthday". Value "2010-07-20" is more than the maximum date "2009-01-01T00:00:00.000+00:00".
"min_length" at line 7, column "0:Name". Value "Lois" (length: 4) is too short. Min length is 5.
- (3/3) OK: ./tests/fixtures/batch/sub/demo-3.csv
+ (3/3) Invalid file: ./tests/fixtures/batch/sub/demo-3.csv
+ "filename_pattern" at line 0, column "". Filename "./tests/fixtures/batch/sub/demo-3.csv" does not match pattern: "/demo-[12].csv$/i".
- Found 7 issues in 2 out of 3 CSV files.
+
+ Found 8 issues in 3 out of 3 CSV files.
TXT;
@@ -319,9 +331,20 @@ public function testCreateValidateNegativeTeamcity(): void
##teamcity[testSuiteFinished name='demo-2.csv' flowId='42']
- (3/3) OK: ./tests/fixtures/batch/sub/demo-3.csv
+ (3/3) Invalid file: ./tests/fixtures/batch/sub/demo-3.csv
+
+ ##teamcity[testCount count='1' flowId='42']
+
+ ##teamcity[testSuiteStarted name='demo-3.csv' flowId='42']
+
+ ##teamcity[testStarted name='filename_pattern at column' locationHint='php_qn://./tests/fixtures/batch/sub/demo-3.csv' flowId='42']
+ "filename_pattern" at line 0, column "". Filename "./tests/fixtures/batch/sub/demo-3.csv" does not match pattern: "/demo-[12].csv$/i".
+ ##teamcity[testFinished name='filename_pattern at column' flowId='42']
+
+ ##teamcity[testSuiteFinished name='demo-3.csv' flowId='42']
+
- Found 7 issues in 2 out of 3 CSV files.
+ Found 8 issues in 3 out of 3 CSV files.
TXT;
diff --git a/tests/Blueprint/ValidatorTest.php b/tests/Blueprint/ValidatorTest.php
index 259b0b0a..4c6a0ffa 100644
--- a/tests/Blueprint/ValidatorTest.php
+++ b/tests/Blueprint/ValidatorTest.php
@@ -141,7 +141,7 @@ public function testRegex(): void
$csv = new CsvFile(self::CSV_COMPLEX, $this->getRule('seq', 'regex', '[a-z]'));
isSame(
- '"regex" at line 2, column "0:seq". Value "1" does not match the pattern "/[a-z]/u".',
+ '"regex" at line 2, column "0:seq". Value "1" does not match the pattern "/[a-z]/".',
\strip_tags((string)$csv->validate()->get(0)),
);
@@ -564,6 +564,25 @@ public function testGetAvaiableRenderFormats(): void
], ErrorSuite::getAvaiableRenderFormats());
}
+ public function testFilenamePattern(): void
+ {
+ $csv = new CsvFile(self::CSV_COMPLEX, ['filename_pattern' => '/demo(-\\d+)?\\.csv$/']);
+ isSame(
+ '"filename_pattern" at line 0, column "". ' .
+ 'Filename "./tests/fixtures/complex_header.csv" does not match pattern: "/demo(-\d+)?\.csv$/".',
+ \strip_tags((string)$csv->validate()->get(0)),
+ );
+
+ $csv = new CsvFile(self::CSV_COMPLEX, ['filename_pattern' => '']);
+ isSame('', (string)$csv->validate());
+
+ $csv = new CsvFile(self::CSV_COMPLEX, ['filename_pattern' => null]);
+ isSame('', (string)$csv->validate());
+
+ $csv = new CsvFile(self::CSV_COMPLEX, ['filename_pattern' => '/.*\.csv$/']);
+ isSame('', (string)$csv->validate());
+ }
+
private function getRule(?string $columnName, ?string $ruleName, array|bool|float|int|string $options): array
{
return ['columns' => [['name' => $columnName, 'rules' => [$ruleName => $options]]]];
diff --git a/tests/schemas/demo_invalid.yml b/tests/schemas/demo_invalid.yml
index e6034d35..eee67f29 100644
--- a/tests/schemas/demo_invalid.yml
+++ b/tests/schemas/demo_invalid.yml
@@ -12,6 +12,8 @@
# This schema is invalid because does not match the CSV file (tests/fixtures/demo.csv).
+filename_pattern: /demo-[12].csv$/i
+
columns:
- name: Name
rules:
diff --git a/tests/schemas/demo_valid.yml b/tests/schemas/demo_valid.yml
index 3dd44214..d366cfa9 100644
--- a/tests/schemas/demo_valid.yml
+++ b/tests/schemas/demo_valid.yml
@@ -12,6 +12,8 @@
# This schema is valid because match the CSV file (tests/fixtures/demo.csv) perfectly.
+filename_pattern: /demo(-\d+)?\.csv$/
+
columns:
- name: Name
rules:
diff --git a/tests/schemas/example_full.yml b/tests/schemas/example_full.yml
index 51fdb55f..7c6c5e1e 100644
--- a/tests/schemas/example_full.yml
+++ b/tests/schemas/example_full.yml
@@ -17,7 +17,7 @@
# Created: 2023-03-09
# File name pattern to match. If not set, then no pattern check
-finename_pattern: ^example\.csv$
+filename_pattern: ^example\.csv$
# Include another schemas