From 5f4c4ac9932cbea7ef3901d48aec72e92c4b3a18 Mon Sep 17 00:00:00 2001 From: Denis Smet Date: Thu, 14 Mar 2024 03:36:10 +0400 Subject: [PATCH] Add new string and word count validation rules Introduced new validation rules related to string content and word count. The changes include rules for word counting, string beginning and ending content, and required content presence in strings. These modifications expand the ability of the library to perform more intricate validations, aiding in ensuring data consistency and accuracy in CSV files. --- README.md | 139 +++------------------------------- schema-examples/full.json | 7 ++ schema-examples/full.php | 7 ++ schema-examples/full.yml | 8 ++ src/Rules/AllMustContain.php | 37 +++++++++ src/Rules/AtLeastContains.php | 37 +++++++++ src/Rules/MaxWordCount.php | 33 ++++++++ src/Rules/MinWordCount.php | 33 ++++++++ src/Rules/StrEndsWith.php | 34 +++++++++ src/Rules/StrStartsWith.php | 34 +++++++++ src/Rules/WordCount.php | 33 ++++++++ tests/Blueprint/MiscTest.php | 30 +++++--- tests/Blueprint/RulesTest.php | 139 ++++++++++++++++++++++++++++++++++ 13 files changed, 431 insertions(+), 140 deletions(-) create mode 100644 src/Rules/AllMustContain.php create mode 100644 src/Rules/AtLeastContains.php create mode 100644 src/Rules/MaxWordCount.php create mode 100644 src/Rules/MinWordCount.php create mode 100644 src/Rules/StrEndsWith.php create mode 100644 src/Rules/StrStartsWith.php create mode 100644 src/Rules/WordCount.php diff --git a/README.md b/README.md index 4a984806..0ff73c7c 100644 --- a/README.md +++ b/README.md @@ -285,6 +285,8 @@ This gives you great flexibility when validating CSV files. ### Schema file examples +Available formats: [YAML](schema-examples/full.yml), [JSON](schema-examples/full.json), [PHP](schema-examples/full.php). + ```yml # It's a full example of the CSV schema file in YAML format. @@ -323,6 +325,14 @@ columns: only_lowercase: true # String is only lower-case. Example: "hello world" only_uppercase: true # String is only upper-case. Example: "HELLO WORLD" only_capitalize: true # String is only capitalized. Example: "Hello World" + word_count: 10 # Integer only. Exact count of words in the string. Example: "Hello World, 123" - 2 words only (123 is not a word) + min_word_count: 1 # Integer only. Min count of words in the string. Example: "Hello World. 123" - 2 words only (123 is not a word) + max_word_count: 5 # Integer only. Max count of words in the string Example: "Hello World! 123" - 2 words only (123 is not a word) + at_least_contains: [ a, b ] # At least one of the string must be in the CSV value. Case-sensitive. + all_must_contain: [ a, b, c ] # All the strings must be part of a CSV value. Case-sensitive. + str_ends_with: " suffix" # Case-sensitive. Example: "Hello World suffix" + str_starts_with: "prefix " # Case-sensitive. Example: "prefix Hello World" + # Decimal and integer numbers min: 10 # Can be integer or float, negative and positive @@ -355,134 +365,6 @@ columns: ``` -
- Click to see: JSON Format - -```json -{ - "filename_pattern" : "/demo(-\\d+)?\\.csv$/i", - "csv" : { - "header" : true, - "delimiter" : ",", - "quote_char" : "\\", - "enclosure" : "\"", - "encoding" : "utf-8", - "bom" : false - }, - "columns" : [ - { - "name" : "csv_header_name", - "description" : "Lorem ipsum", - "rules" : { - "not_empty" : true, - "exact_value" : "Some string", - "allow_values" : ["y", "n", ""], - "regex" : "\/^[\\d]{2}$\/", - "min_length" : 1, - "max_length" : 10, - "only_trimed" : true, - "only_lowercase" : true, - "only_uppercase" : true, - "only_capitalize" : true, - "min" : 10, - "max" : 100.5, - "precision" : 3, - "min_precision" : 2, - "max_precision" : 4, - "date_format" : "Y-m-d", - "min_date" : "2000-01-02", - "max_date" : "+1 day", - "is_bool" : true, - "is_int" : true, - "is_float" : true, - "is_ip" : true, - "is_url" : true, - "is_email" : true, - "is_domain" : true, - "is_uuid4" : true, - "is_latitude" : true, - "is_longitude" : true, - "cardinal_direction" : true, - "usa_market_name" : true - } - }, - {"name" : "another_column"} - ] -} - -``` - -
- - - - -
- Click to see: PHP Format - -```php - '/demo(-\\d+)?\\.csv$/i', - - 'csv' => [ - 'header' => true, - 'delimiter' => ',', - 'quote_char' => '\\', - 'enclosure' => '"', - 'encoding' => 'utf-8', - 'bom' => false, - ], - - 'columns' => [ - [ - 'name' => 'csv_header_name', - 'description' => 'Lorem ipsum', - 'rules' => [ - 'not_empty' => true, - 'exact_value' => 'Some string', - 'allow_values' => ['y', 'n', ''], - 'regex' => '/^[\\d]{2}$/', - 'min_length' => 1, - 'max_length' => 10, - 'only_trimed' => true, - 'only_lowercase' => true, - 'only_uppercase' => true, - 'only_capitalize' => true, - 'min' => 10, - 'max' => 100.5, - 'precision' => 3, - 'min_precision' => 2, - 'max_precision' => 4, - 'date_format' => 'Y-m-d', - 'min_date' => '2000-01-02', - 'max_date' => '+1 day', - 'is_bool' => true, - 'is_int' => true, - 'is_float' => true, - 'is_ip' => true, - 'is_url' => true, - 'is_email' => true, - 'is_domain' => true, - 'is_uuid4' => true, - 'is_latitude' => true, - 'is_longitude' => true, - 'cardinal_direction' => true, - 'usa_market_name' => true, - ], - ], - ['name' => 'another_column'], - ], -]; - -``` - -
- - - ## Coming soon It's random ideas and plans. No orderings and deadlines. But batch processing is the priority #1. @@ -498,6 +380,7 @@ Batch processing Validation * [x] ~~`filename_pattern` validation with regex (like "all files in the folder should be in the format `/^[\d]{4}-[\d]{2}-[\d]{2}\.csv$/`").~~ * [ ] Flag to ignore file name pattern. It's useful when you have a lot of files and you don't want to validate the file name. +* [ ] Keyword for null value. Configurable. By default, it's an empty string. But you can use `null`, `nil`, `none`, `empty`, etc. * [ ] Agregate rules (like "at least one of the fields should be not empty" or "all values must be unique"). * [ ] Handle empty files and files with only a header row, or only with one line of data. One column wthout header is also possible. * [ ] Using multiple schemas for one csv file. diff --git a/schema-examples/full.json b/schema-examples/full.json index 6897256e..0f186c76 100644 --- a/schema-examples/full.json +++ b/schema-examples/full.json @@ -23,6 +23,13 @@ "only_lowercase" : true, "only_uppercase" : true, "only_capitalize" : true, + "word_count" : 10, + "min_word_count" : 1, + "max_word_count" : 5, + "at_least_contains" : ["a", "b"], + "all_must_contain" : ["a", "b", "c"], + "str_ends_with" : " suffix", + "str_starts_with" : "prefix ", "min" : 10, "max" : 100.5, "precision" : 3, diff --git a/schema-examples/full.php b/schema-examples/full.php index e80e81ad..2019eaea 100644 --- a/schema-examples/full.php +++ b/schema-examples/full.php @@ -41,6 +41,13 @@ 'only_lowercase' => true, 'only_uppercase' => true, 'only_capitalize' => true, + 'word_count' => 10, + 'min_word_count' => 1, + 'max_word_count' => 5, + 'at_least_contains' => ['a', 'b'], + 'all_must_contain' => ['a', 'b', 'c'], + 'str_ends_with' => ' suffix', + 'str_starts_with' => 'prefix ', 'min' => 10, 'max' => 100.5, 'precision' => 3, diff --git a/schema-examples/full.yml b/schema-examples/full.yml index cc69b868..d15adfb1 100644 --- a/schema-examples/full.yml +++ b/schema-examples/full.yml @@ -47,6 +47,14 @@ columns: only_lowercase: true # String is only lower-case. Example: "hello world" only_uppercase: true # String is only upper-case. Example: "HELLO WORLD" only_capitalize: true # String is only capitalized. Example: "Hello World" + word_count: 10 # Integer only. Exact count of words in the string. Example: "Hello World, 123" - 2 words only (123 is not a word) + min_word_count: 1 # Integer only. Min count of words in the string. Example: "Hello World. 123" - 2 words only (123 is not a word) + max_word_count: 5 # Integer only. Max count of words in the string Example: "Hello World! 123" - 2 words only (123 is not a word) + at_least_contains: [ a, b ] # At least one of the string must be in the CSV value. Case-sensitive. + all_must_contain: [ a, b, c ] # All the strings must be part of a CSV value. Case-sensitive. + str_ends_with: " suffix" # Case-sensitive. Example: "Hello World suffix" + str_starts_with: "prefix " # Case-sensitive. Example: "prefix Hello World" + # Decimal and integer numbers min: 10 # Can be integer or float, negative and positive diff --git a/src/Rules/AllMustContain.php b/src/Rules/AllMustContain.php new file mode 100644 index 00000000..507d97a3 --- /dev/null +++ b/src/Rules/AllMustContain.php @@ -0,0 +1,37 @@ +getOptionAsArray(); + if (\count($inclusions) === 0) { + return null; + } + + foreach ($inclusions as $inclusion) { + if (\strpos((string)$cellValue, (string)$inclusion) === false) { + return "Value \"{$cellValue}\" must contain all of the following:" . + ' "["' . \implode('", "', $inclusions) . '"]"'; + } + } + + return null; + } +} diff --git a/src/Rules/AtLeastContains.php b/src/Rules/AtLeastContains.php new file mode 100644 index 00000000..5fec1baa --- /dev/null +++ b/src/Rules/AtLeastContains.php @@ -0,0 +1,37 @@ +getOptionAsArray(); + if (\count($inclusions) === 0) { + return null; + } + + foreach ($inclusions as $inclusion) { + if (\strpos((string)$cellValue, (string)$inclusion) !== false) { + return null; + } + } + + return "Value \"{$cellValue}\" must contain one of the following:" . + ' "["' . \implode('", "', $inclusions) . '"]"'; + } +} diff --git a/src/Rules/MaxWordCount.php b/src/Rules/MaxWordCount.php new file mode 100644 index 00000000..d7ea0734 --- /dev/null +++ b/src/Rules/MaxWordCount.php @@ -0,0 +1,33 @@ +getOptionAsInt(); + $count = \str_word_count((string)$cellValue); + + if ($count > $wordCount) { + return "Value \"{$cellValue}\" has {$count} words, " . + "but must have no more than {$wordCount} words"; + } + + return null; + } +} diff --git a/src/Rules/MinWordCount.php b/src/Rules/MinWordCount.php new file mode 100644 index 00000000..1c80f4d2 --- /dev/null +++ b/src/Rules/MinWordCount.php @@ -0,0 +1,33 @@ +getOptionAsInt(); + $count = \str_word_count((string)$cellValue); + + if ($count < $wordCount) { + return "Value \"{$cellValue}\" has {$count} words, " . + "but must have at least {$wordCount} words"; + } + + return null; + } +} diff --git a/src/Rules/StrEndsWith.php b/src/Rules/StrEndsWith.php new file mode 100644 index 00000000..639e742a --- /dev/null +++ b/src/Rules/StrEndsWith.php @@ -0,0 +1,34 @@ +getOptionAsString(); + if ($prefix === '') { + return null; + } + + if (!\str_ends_with((string)$cellValue, $prefix)) { + return "Value \"{$cellValue}\" must end with \"{$prefix}\""; + } + + return null; + } +} diff --git a/src/Rules/StrStartsWith.php b/src/Rules/StrStartsWith.php new file mode 100644 index 00000000..50a22f09 --- /dev/null +++ b/src/Rules/StrStartsWith.php @@ -0,0 +1,34 @@ +getOptionAsString(); + if ($prefix === '') { + return null; + } + + if (!\str_starts_with((string)$cellValue, $prefix)) { + return "Value \"{$cellValue}\" must start with \"{$prefix}\""; + } + + return null; + } +} diff --git a/src/Rules/WordCount.php b/src/Rules/WordCount.php new file mode 100644 index 00000000..2dbc66c8 --- /dev/null +++ b/src/Rules/WordCount.php @@ -0,0 +1,33 @@ +getOptionAsInt(); + $count = \str_word_count((string)$cellValue); + + if ($count !== $wordCount) { + return "Value \"{$cellValue}\" has {$count} words, " . + "but must have exactly {$wordCount} words"; + } + + return null; + } +} diff --git a/tests/Blueprint/MiscTest.php b/tests/Blueprint/MiscTest.php index 84e48744..2ac6f844 100644 --- a/tests/Blueprint/MiscTest.php +++ b/tests/Blueprint/MiscTest.php @@ -82,7 +82,13 @@ public function testFullListOfRules(): void } \sort($rulesInCode); - isSame($rulesInCode, $rulesInConfig); + $diffAsErrMessage = \array_reduce( + \array_diff($rulesInCode, $rulesInConfig), + static fn (string $carry, string $item) => $carry . "{$item}: FIXME\n", + '', + ); + + isSame($rulesInCode, $rulesInConfig, $diffAsErrMessage); } public function testCsvStrutureDefaultValues(): void @@ -105,15 +111,15 @@ public function testCheckYmlSchemaExampleInReadme(): void ); } - public function testCheckPhpSchemaExampleInReadme(): void - { - $this->testCheckExampleInReadme(PROJECT_ROOT . '/schema-examples/full.php', 'php', 'PHP Format', 14); - } - - public function testCheckJsonSchemaExampleInReadme(): void - { - $this->testCheckExampleInReadme(PROJECT_ROOT . '/schema-examples/full.json', 'json', 'JSON Format', 0); - } + // public function testCheckPhpSchemaExampleInReadme(): void + // { + // $this->testCheckExampleInReadme(PROJECT_ROOT . '/schema-examples/full.php', 'php', 'PHP Format', 14); + // } + // + // public function testCheckJsonSchemaExampleInReadme(): void + // { + // $this->testCheckExampleInReadme(PROJECT_ROOT . '/schema-examples/full.json', 'json', 'JSON Format', 0); + // } public function testCompareExamplesWithOrig(): void { @@ -125,8 +131,8 @@ public function testCompareExamplesWithOrig(): void // file_put_contents("{$basepath}.php", (string)phpArray($origYml)); // file_put_contents("{$basepath}.json", (string)json($origYml)); - isSame($origYml, phpArray("{$basepath}.php")->getArrayCopy(), 'PHP config is invalid'); - isSame($origYml, json("{$basepath}.json")->getArrayCopy(), 'JSON config is invalid'); + isSame((string)phpArray($origYml), (string)phpArray("{$basepath}.php"), 'PHP config is invalid'); + isSame((string)json($origYml), (string)json("{$basepath}.json"), 'JSON config is invalid'); } public function testFindFiles(): void diff --git a/tests/Blueprint/RulesTest.php b/tests/Blueprint/RulesTest.php index bd3a12da..1074fb18 100644 --- a/tests/Blueprint/RulesTest.php +++ b/tests/Blueprint/RulesTest.php @@ -16,7 +16,9 @@ namespace JBZoo\PHPUnit\Blueprint; +use JBZoo\CsvBlueprint\Rules\AllMustContain; use JBZoo\CsvBlueprint\Rules\AllowValues; +use JBZoo\CsvBlueprint\Rules\AtLeastContains; use JBZoo\CsvBlueprint\Rules\CardinalDirection; use JBZoo\CsvBlueprint\Rules\DateFormat; use JBZoo\CsvBlueprint\Rules\ExactValue; @@ -34,17 +36,22 @@ use JBZoo\CsvBlueprint\Rules\MaxDate; use JBZoo\CsvBlueprint\Rules\MaxLength; use JBZoo\CsvBlueprint\Rules\MaxPrecision; +use JBZoo\CsvBlueprint\Rules\MaxWordCount; use JBZoo\CsvBlueprint\Rules\Min; use JBZoo\CsvBlueprint\Rules\MinDate; use JBZoo\CsvBlueprint\Rules\MinLength; use JBZoo\CsvBlueprint\Rules\MinPrecision; +use JBZoo\CsvBlueprint\Rules\MinWordCount; use JBZoo\CsvBlueprint\Rules\NotEmpty; use JBZoo\CsvBlueprint\Rules\OnlyCapitalize; use JBZoo\CsvBlueprint\Rules\OnlyLowercase; use JBZoo\CsvBlueprint\Rules\OnlyUppercase; use JBZoo\CsvBlueprint\Rules\Precision; use JBZoo\CsvBlueprint\Rules\Regex; +use JBZoo\CsvBlueprint\Rules\StrEndsWith; +use JBZoo\CsvBlueprint\Rules\StrStartsWith; use JBZoo\CsvBlueprint\Rules\UsaMarketName; +use JBZoo\CsvBlueprint\Rules\WordCount; use JBZoo\PHPUnit\PHPUnit; use JBZoo\Utils\Str; @@ -714,4 +721,136 @@ public function testIsUuid4(): void $rule = new IsUuid4('prop', false); isSame(null, $rule->validate('123')); } + + public function testMustContain(): void + { + $rule = new AtLeastContains('prop', ['a', 'b', 'c']); + isSame(null, $rule->validate('a')); + isSame(null, $rule->validate('abc')); + isSame(null, $rule->validate('adasdasdasdc')); + + isSame( + '"at_least_contains" at line 0, column "prop". ' . + 'Value "123" must contain one of the following: "["a", "b", "c"]".', + \strip_tags((string)$rule->validate('123')), + ); + } + + public function testAllMustContain(): void + { + $rule = new AllMustContain('prop', ['a', 'b', 'c']); + isSame(null, $rule->validate('abc')); + isSame(null, $rule->validate('abdasadasdasdc')); + + isSame( + '"all_must_contain" at line 0, column "prop". ' . + 'Value "ab" must contain all of the following: "["a", "b", "c"]".', + \strip_tags((string)$rule->validate('ab')), + ); + isSame( + '"all_must_contain" at line 0, column "prop". ' . + 'Value "ac" must contain all of the following: "["a", "b", "c"]".', + \strip_tags((string)$rule->validate('ac')), + ); + } + + public function testStrStartsWith(): void + { + $rule = new StrStartsWith('prop', 'a'); + isSame(null, $rule->validate('a')); + isSame(null, $rule->validate('abc')); + + isSame( + '"str_starts_with" at line 0, column "prop". Value "" must start with "a".', + \strip_tags((string)$rule->validate('')), + ); + + isSame( + '"str_starts_with" at line 0, column "prop". Value " a" must start with "a".', + \strip_tags((string)$rule->validate(' a')), + ); + } + + public function testStrEndsWith(): void + { + $rule = new StrEndsWith('prop', 'a'); + isSame(null, $rule->validate('a')); + isSame(null, $rule->validate('cba')); + + isSame( + '"str_ends_with" at line 0, column "prop". Value "" must end with "a".', + \strip_tags((string)$rule->validate('')), + ); + + isSame( + '"str_ends_with" at line 0, column "prop". Value "a " must end with "a".', + \strip_tags((string)$rule->validate('a ')), + ); + } + + public function testStrWordCount(): void + { + $rule = new WordCount('prop', 0); + isSame(null, $rule->validate('')); + isSame( + '"word_count" at line 0, column "prop". ' . + 'Value "cba" has 1 words, but must have exactly 0 words.', + \strip_tags((string)$rule->validate('cba')), + ); + + $rule = new WordCount('prop', 2); + isSame(null, $rule->validate('asd, asdasd')); + isSame( + '"word_count" at line 0, column "prop". ' . + 'Value "cba" has 1 words, but must have exactly 2 words.', + \strip_tags((string)$rule->validate('cba')), + ); + isSame( + '"word_count" at line 0, column "prop". ' . + 'Value "cba 123, 123123" has 1 words, but must have exactly 2 words.', + \strip_tags((string)$rule->validate('cba 123, 123123')), + ); + + isSame( + '"word_count" at line 0, column "prop". Value "a b c" has 3 words, but must have exactly 2 words.', + \strip_tags((string)$rule->validate('a b c')), + ); + } + + public function testMinWordCount(): void + { + $rule = new MinWordCount('prop', 0); + isSame(null, $rule->validate('cba')); + + $rule = new MinWordCount('prop', 2); + isSame(null, $rule->validate('asd, asdasd')); + isSame(null, $rule->validate('asd, asdasd asd')); + isSame(null, $rule->validate('asd, asdasd 1232 asdas')); + isSame( + '"min_word_count" at line 0, column "prop". ' . + 'Value "cba" has 1 words, but must have at least 2 words.', + \strip_tags((string)$rule->validate('cba')), + ); + isSame( + '"min_word_count" at line 0, column "prop". ' . + 'Value "cba 123, 123123" has 1 words, but must have at least 2 words.', + \strip_tags((string)$rule->validate('cba 123, 123123')), + ); + } + + public function testMaxWordCount(): void + { + $rule = new MaxWordCount('prop', 0); + isSame(null, $rule->validate('')); + + $rule = new MaxWordCount('prop', 2); + isSame(null, $rule->validate('asd, asdasd')); + isSame(null, $rule->validate('asd, 1232')); + isSame(null, $rule->validate('asd, 1232 113234324 342 . ..')); + isSame( + '"max_word_count" at line 0, column "prop". ' . + 'Value "asd, asdasd asd 1232 asdas" has 4 words, but must have no more than 2 words.', + \strip_tags((string)$rule->validate('asd, asdasd asd 1232 asdas')), + ); + } }