diff --git a/README.md b/README.md index 4a984806..0ff73c7c 100644 --- a/README.md +++ b/README.md @@ -285,6 +285,8 @@ This gives you great flexibility when validating CSV files. ### Schema file examples +Available formats: [YAML](schema-examples/full.yml), [JSON](schema-examples/full.json), [PHP](schema-examples/full.php). + ```yml # It's a full example of the CSV schema file in YAML format. @@ -323,6 +325,14 @@ columns: only_lowercase: true # String is only lower-case. Example: "hello world" only_uppercase: true # String is only upper-case. Example: "HELLO WORLD" only_capitalize: true # String is only capitalized. Example: "Hello World" + word_count: 10 # Integer only. Exact count of words in the string. Example: "Hello World, 123" - 2 words only (123 is not a word) + min_word_count: 1 # Integer only. Min count of words in the string. Example: "Hello World. 123" - 2 words only (123 is not a word) + max_word_count: 5 # Integer only. Max count of words in the string Example: "Hello World! 123" - 2 words only (123 is not a word) + at_least_contains: [ a, b ] # At least one of the string must be in the CSV value. Case-sensitive. + all_must_contain: [ a, b, c ] # All the strings must be part of a CSV value. Case-sensitive. + str_ends_with: " suffix" # Case-sensitive. Example: "Hello World suffix" + str_starts_with: "prefix " # Case-sensitive. Example: "prefix Hello World" + # Decimal and integer numbers min: 10 # Can be integer or float, negative and positive @@ -355,134 +365,6 @@ columns: ``` -
- Click to see: JSON Format - -```json -{ - "filename_pattern" : "/demo(-\\d+)?\\.csv$/i", - "csv" : { - "header" : true, - "delimiter" : ",", - "quote_char" : "\\", - "enclosure" : "\"", - "encoding" : "utf-8", - "bom" : false - }, - "columns" : [ - { - "name" : "csv_header_name", - "description" : "Lorem ipsum", - "rules" : { - "not_empty" : true, - "exact_value" : "Some string", - "allow_values" : ["y", "n", ""], - "regex" : "\/^[\\d]{2}$\/", - "min_length" : 1, - "max_length" : 10, - "only_trimed" : true, - "only_lowercase" : true, - "only_uppercase" : true, - "only_capitalize" : true, - "min" : 10, - "max" : 100.5, - "precision" : 3, - "min_precision" : 2, - "max_precision" : 4, - "date_format" : "Y-m-d", - "min_date" : "2000-01-02", - "max_date" : "+1 day", - "is_bool" : true, - "is_int" : true, - "is_float" : true, - "is_ip" : true, - "is_url" : true, - "is_email" : true, - "is_domain" : true, - "is_uuid4" : true, - "is_latitude" : true, - "is_longitude" : true, - "cardinal_direction" : true, - "usa_market_name" : true - } - }, - {"name" : "another_column"} - ] -} - -``` - -
- - - - -
- Click to see: PHP Format - -```php - '/demo(-\\d+)?\\.csv$/i', - - 'csv' => [ - 'header' => true, - 'delimiter' => ',', - 'quote_char' => '\\', - 'enclosure' => '"', - 'encoding' => 'utf-8', - 'bom' => false, - ], - - 'columns' => [ - [ - 'name' => 'csv_header_name', - 'description' => 'Lorem ipsum', - 'rules' => [ - 'not_empty' => true, - 'exact_value' => 'Some string', - 'allow_values' => ['y', 'n', ''], - 'regex' => '/^[\\d]{2}$/', - 'min_length' => 1, - 'max_length' => 10, - 'only_trimed' => true, - 'only_lowercase' => true, - 'only_uppercase' => true, - 'only_capitalize' => true, - 'min' => 10, - 'max' => 100.5, - 'precision' => 3, - 'min_precision' => 2, - 'max_precision' => 4, - 'date_format' => 'Y-m-d', - 'min_date' => '2000-01-02', - 'max_date' => '+1 day', - 'is_bool' => true, - 'is_int' => true, - 'is_float' => true, - 'is_ip' => true, - 'is_url' => true, - 'is_email' => true, - 'is_domain' => true, - 'is_uuid4' => true, - 'is_latitude' => true, - 'is_longitude' => true, - 'cardinal_direction' => true, - 'usa_market_name' => true, - ], - ], - ['name' => 'another_column'], - ], -]; - -``` - -
- - - ## Coming soon It's random ideas and plans. No orderings and deadlines. But batch processing is the priority #1. @@ -498,6 +380,7 @@ Batch processing Validation * [x] ~~`filename_pattern` validation with regex (like "all files in the folder should be in the format `/^[\d]{4}-[\d]{2}-[\d]{2}\.csv$/`").~~ * [ ] Flag to ignore file name pattern. It's useful when you have a lot of files and you don't want to validate the file name. +* [ ] Keyword for null value. Configurable. By default, it's an empty string. But you can use `null`, `nil`, `none`, `empty`, etc. * [ ] Agregate rules (like "at least one of the fields should be not empty" or "all values must be unique"). * [ ] Handle empty files and files with only a header row, or only with one line of data. One column wthout header is also possible. * [ ] Using multiple schemas for one csv file. diff --git a/schema-examples/full.json b/schema-examples/full.json index 6897256e..0f186c76 100644 --- a/schema-examples/full.json +++ b/schema-examples/full.json @@ -23,6 +23,13 @@ "only_lowercase" : true, "only_uppercase" : true, "only_capitalize" : true, + "word_count" : 10, + "min_word_count" : 1, + "max_word_count" : 5, + "at_least_contains" : ["a", "b"], + "all_must_contain" : ["a", "b", "c"], + "str_ends_with" : " suffix", + "str_starts_with" : "prefix ", "min" : 10, "max" : 100.5, "precision" : 3, diff --git a/schema-examples/full.php b/schema-examples/full.php index e80e81ad..2019eaea 100644 --- a/schema-examples/full.php +++ b/schema-examples/full.php @@ -41,6 +41,13 @@ 'only_lowercase' => true, 'only_uppercase' => true, 'only_capitalize' => true, + 'word_count' => 10, + 'min_word_count' => 1, + 'max_word_count' => 5, + 'at_least_contains' => ['a', 'b'], + 'all_must_contain' => ['a', 'b', 'c'], + 'str_ends_with' => ' suffix', + 'str_starts_with' => 'prefix ', 'min' => 10, 'max' => 100.5, 'precision' => 3, diff --git a/schema-examples/full.yml b/schema-examples/full.yml index cc69b868..d15adfb1 100644 --- a/schema-examples/full.yml +++ b/schema-examples/full.yml @@ -47,6 +47,14 @@ columns: only_lowercase: true # String is only lower-case. Example: "hello world" only_uppercase: true # String is only upper-case. Example: "HELLO WORLD" only_capitalize: true # String is only capitalized. Example: "Hello World" + word_count: 10 # Integer only. Exact count of words in the string. Example: "Hello World, 123" - 2 words only (123 is not a word) + min_word_count: 1 # Integer only. Min count of words in the string. Example: "Hello World. 123" - 2 words only (123 is not a word) + max_word_count: 5 # Integer only. Max count of words in the string Example: "Hello World! 123" - 2 words only (123 is not a word) + at_least_contains: [ a, b ] # At least one of the string must be in the CSV value. Case-sensitive. + all_must_contain: [ a, b, c ] # All the strings must be part of a CSV value. Case-sensitive. + str_ends_with: " suffix" # Case-sensitive. Example: "Hello World suffix" + str_starts_with: "prefix " # Case-sensitive. Example: "prefix Hello World" + # Decimal and integer numbers min: 10 # Can be integer or float, negative and positive diff --git a/src/Rules/AllMustContain.php b/src/Rules/AllMustContain.php new file mode 100644 index 00000000..507d97a3 --- /dev/null +++ b/src/Rules/AllMustContain.php @@ -0,0 +1,37 @@ +getOptionAsArray(); + if (\count($inclusions) === 0) { + return null; + } + + foreach ($inclusions as $inclusion) { + if (\strpos((string)$cellValue, (string)$inclusion) === false) { + return "Value \"{$cellValue}\" must contain all of the following:" . + ' "["' . \implode('", "', $inclusions) . '"]"'; + } + } + + return null; + } +} diff --git a/src/Rules/AtLeastContains.php b/src/Rules/AtLeastContains.php new file mode 100644 index 00000000..5fec1baa --- /dev/null +++ b/src/Rules/AtLeastContains.php @@ -0,0 +1,37 @@ +getOptionAsArray(); + if (\count($inclusions) === 0) { + return null; + } + + foreach ($inclusions as $inclusion) { + if (\strpos((string)$cellValue, (string)$inclusion) !== false) { + return null; + } + } + + return "Value \"{$cellValue}\" must contain one of the following:" . + ' "["' . \implode('", "', $inclusions) . '"]"'; + } +} diff --git a/src/Rules/MaxWordCount.php b/src/Rules/MaxWordCount.php new file mode 100644 index 00000000..d7ea0734 --- /dev/null +++ b/src/Rules/MaxWordCount.php @@ -0,0 +1,33 @@ +getOptionAsInt(); + $count = \str_word_count((string)$cellValue); + + if ($count > $wordCount) { + return "Value \"{$cellValue}\" has {$count} words, " . + "but must have no more than {$wordCount} words"; + } + + return null; + } +} diff --git a/src/Rules/MinWordCount.php b/src/Rules/MinWordCount.php new file mode 100644 index 00000000..1c80f4d2 --- /dev/null +++ b/src/Rules/MinWordCount.php @@ -0,0 +1,33 @@ +getOptionAsInt(); + $count = \str_word_count((string)$cellValue); + + if ($count < $wordCount) { + return "Value \"{$cellValue}\" has {$count} words, " . + "but must have at least {$wordCount} words"; + } + + return null; + } +} diff --git a/src/Rules/StrEndsWith.php b/src/Rules/StrEndsWith.php new file mode 100644 index 00000000..639e742a --- /dev/null +++ b/src/Rules/StrEndsWith.php @@ -0,0 +1,34 @@ +getOptionAsString(); + if ($prefix === '') { + return null; + } + + if (!\str_ends_with((string)$cellValue, $prefix)) { + return "Value \"{$cellValue}\" must end with \"{$prefix}\""; + } + + return null; + } +} diff --git a/src/Rules/StrStartsWith.php b/src/Rules/StrStartsWith.php new file mode 100644 index 00000000..50a22f09 --- /dev/null +++ b/src/Rules/StrStartsWith.php @@ -0,0 +1,34 @@ +getOptionAsString(); + if ($prefix === '') { + return null; + } + + if (!\str_starts_with((string)$cellValue, $prefix)) { + return "Value \"{$cellValue}\" must start with \"{$prefix}\""; + } + + return null; + } +} diff --git a/src/Rules/WordCount.php b/src/Rules/WordCount.php new file mode 100644 index 00000000..2dbc66c8 --- /dev/null +++ b/src/Rules/WordCount.php @@ -0,0 +1,33 @@ +getOptionAsInt(); + $count = \str_word_count((string)$cellValue); + + if ($count !== $wordCount) { + return "Value \"{$cellValue}\" has {$count} words, " . + "but must have exactly {$wordCount} words"; + } + + return null; + } +} diff --git a/tests/Blueprint/MiscTest.php b/tests/Blueprint/MiscTest.php index 84e48744..2ac6f844 100644 --- a/tests/Blueprint/MiscTest.php +++ b/tests/Blueprint/MiscTest.php @@ -82,7 +82,13 @@ public function testFullListOfRules(): void } \sort($rulesInCode); - isSame($rulesInCode, $rulesInConfig); + $diffAsErrMessage = \array_reduce( + \array_diff($rulesInCode, $rulesInConfig), + static fn (string $carry, string $item) => $carry . "{$item}: FIXME\n", + '', + ); + + isSame($rulesInCode, $rulesInConfig, $diffAsErrMessage); } public function testCsvStrutureDefaultValues(): void @@ -105,15 +111,15 @@ public function testCheckYmlSchemaExampleInReadme(): void ); } - public function testCheckPhpSchemaExampleInReadme(): void - { - $this->testCheckExampleInReadme(PROJECT_ROOT . '/schema-examples/full.php', 'php', 'PHP Format', 14); - } - - public function testCheckJsonSchemaExampleInReadme(): void - { - $this->testCheckExampleInReadme(PROJECT_ROOT . '/schema-examples/full.json', 'json', 'JSON Format', 0); - } + // public function testCheckPhpSchemaExampleInReadme(): void + // { + // $this->testCheckExampleInReadme(PROJECT_ROOT . '/schema-examples/full.php', 'php', 'PHP Format', 14); + // } + // + // public function testCheckJsonSchemaExampleInReadme(): void + // { + // $this->testCheckExampleInReadme(PROJECT_ROOT . '/schema-examples/full.json', 'json', 'JSON Format', 0); + // } public function testCompareExamplesWithOrig(): void { @@ -125,8 +131,8 @@ public function testCompareExamplesWithOrig(): void // file_put_contents("{$basepath}.php", (string)phpArray($origYml)); // file_put_contents("{$basepath}.json", (string)json($origYml)); - isSame($origYml, phpArray("{$basepath}.php")->getArrayCopy(), 'PHP config is invalid'); - isSame($origYml, json("{$basepath}.json")->getArrayCopy(), 'JSON config is invalid'); + isSame((string)phpArray($origYml), (string)phpArray("{$basepath}.php"), 'PHP config is invalid'); + isSame((string)json($origYml), (string)json("{$basepath}.json"), 'JSON config is invalid'); } public function testFindFiles(): void diff --git a/tests/Blueprint/RulesTest.php b/tests/Blueprint/RulesTest.php index bd3a12da..1074fb18 100644 --- a/tests/Blueprint/RulesTest.php +++ b/tests/Blueprint/RulesTest.php @@ -16,7 +16,9 @@ namespace JBZoo\PHPUnit\Blueprint; +use JBZoo\CsvBlueprint\Rules\AllMustContain; use JBZoo\CsvBlueprint\Rules\AllowValues; +use JBZoo\CsvBlueprint\Rules\AtLeastContains; use JBZoo\CsvBlueprint\Rules\CardinalDirection; use JBZoo\CsvBlueprint\Rules\DateFormat; use JBZoo\CsvBlueprint\Rules\ExactValue; @@ -34,17 +36,22 @@ use JBZoo\CsvBlueprint\Rules\MaxDate; use JBZoo\CsvBlueprint\Rules\MaxLength; use JBZoo\CsvBlueprint\Rules\MaxPrecision; +use JBZoo\CsvBlueprint\Rules\MaxWordCount; use JBZoo\CsvBlueprint\Rules\Min; use JBZoo\CsvBlueprint\Rules\MinDate; use JBZoo\CsvBlueprint\Rules\MinLength; use JBZoo\CsvBlueprint\Rules\MinPrecision; +use JBZoo\CsvBlueprint\Rules\MinWordCount; use JBZoo\CsvBlueprint\Rules\NotEmpty; use JBZoo\CsvBlueprint\Rules\OnlyCapitalize; use JBZoo\CsvBlueprint\Rules\OnlyLowercase; use JBZoo\CsvBlueprint\Rules\OnlyUppercase; use JBZoo\CsvBlueprint\Rules\Precision; use JBZoo\CsvBlueprint\Rules\Regex; +use JBZoo\CsvBlueprint\Rules\StrEndsWith; +use JBZoo\CsvBlueprint\Rules\StrStartsWith; use JBZoo\CsvBlueprint\Rules\UsaMarketName; +use JBZoo\CsvBlueprint\Rules\WordCount; use JBZoo\PHPUnit\PHPUnit; use JBZoo\Utils\Str; @@ -714,4 +721,136 @@ public function testIsUuid4(): void $rule = new IsUuid4('prop', false); isSame(null, $rule->validate('123')); } + + public function testMustContain(): void + { + $rule = new AtLeastContains('prop', ['a', 'b', 'c']); + isSame(null, $rule->validate('a')); + isSame(null, $rule->validate('abc')); + isSame(null, $rule->validate('adasdasdasdc')); + + isSame( + '"at_least_contains" at line 0, column "prop". ' . + 'Value "123" must contain one of the following: "["a", "b", "c"]".', + \strip_tags((string)$rule->validate('123')), + ); + } + + public function testAllMustContain(): void + { + $rule = new AllMustContain('prop', ['a', 'b', 'c']); + isSame(null, $rule->validate('abc')); + isSame(null, $rule->validate('abdasadasdasdc')); + + isSame( + '"all_must_contain" at line 0, column "prop". ' . + 'Value "ab" must contain all of the following: "["a", "b", "c"]".', + \strip_tags((string)$rule->validate('ab')), + ); + isSame( + '"all_must_contain" at line 0, column "prop". ' . + 'Value "ac" must contain all of the following: "["a", "b", "c"]".', + \strip_tags((string)$rule->validate('ac')), + ); + } + + public function testStrStartsWith(): void + { + $rule = new StrStartsWith('prop', 'a'); + isSame(null, $rule->validate('a')); + isSame(null, $rule->validate('abc')); + + isSame( + '"str_starts_with" at line 0, column "prop". Value "" must start with "a".', + \strip_tags((string)$rule->validate('')), + ); + + isSame( + '"str_starts_with" at line 0, column "prop". Value " a" must start with "a".', + \strip_tags((string)$rule->validate(' a')), + ); + } + + public function testStrEndsWith(): void + { + $rule = new StrEndsWith('prop', 'a'); + isSame(null, $rule->validate('a')); + isSame(null, $rule->validate('cba')); + + isSame( + '"str_ends_with" at line 0, column "prop". Value "" must end with "a".', + \strip_tags((string)$rule->validate('')), + ); + + isSame( + '"str_ends_with" at line 0, column "prop". Value "a " must end with "a".', + \strip_tags((string)$rule->validate('a ')), + ); + } + + public function testStrWordCount(): void + { + $rule = new WordCount('prop', 0); + isSame(null, $rule->validate('')); + isSame( + '"word_count" at line 0, column "prop". ' . + 'Value "cba" has 1 words, but must have exactly 0 words.', + \strip_tags((string)$rule->validate('cba')), + ); + + $rule = new WordCount('prop', 2); + isSame(null, $rule->validate('asd, asdasd')); + isSame( + '"word_count" at line 0, column "prop". ' . + 'Value "cba" has 1 words, but must have exactly 2 words.', + \strip_tags((string)$rule->validate('cba')), + ); + isSame( + '"word_count" at line 0, column "prop". ' . + 'Value "cba 123, 123123" has 1 words, but must have exactly 2 words.', + \strip_tags((string)$rule->validate('cba 123, 123123')), + ); + + isSame( + '"word_count" at line 0, column "prop". Value "a b c" has 3 words, but must have exactly 2 words.', + \strip_tags((string)$rule->validate('a b c')), + ); + } + + public function testMinWordCount(): void + { + $rule = new MinWordCount('prop', 0); + isSame(null, $rule->validate('cba')); + + $rule = new MinWordCount('prop', 2); + isSame(null, $rule->validate('asd, asdasd')); + isSame(null, $rule->validate('asd, asdasd asd')); + isSame(null, $rule->validate('asd, asdasd 1232 asdas')); + isSame( + '"min_word_count" at line 0, column "prop". ' . + 'Value "cba" has 1 words, but must have at least 2 words.', + \strip_tags((string)$rule->validate('cba')), + ); + isSame( + '"min_word_count" at line 0, column "prop". ' . + 'Value "cba 123, 123123" has 1 words, but must have at least 2 words.', + \strip_tags((string)$rule->validate('cba 123, 123123')), + ); + } + + public function testMaxWordCount(): void + { + $rule = new MaxWordCount('prop', 0); + isSame(null, $rule->validate('')); + + $rule = new MaxWordCount('prop', 2); + isSame(null, $rule->validate('asd, asdasd')); + isSame(null, $rule->validate('asd, 1232')); + isSame(null, $rule->validate('asd, 1232 113234324 342 . ..')); + isSame( + '"max_word_count" at line 0, column "prop". ' . + 'Value "asd, asdasd asd 1232 asdas" has 4 words, but must have no more than 2 words.', + \strip_tags((string)$rule->validate('asd, asdasd asd 1232 asdas')), + ); + } }