diff --git a/README.md b/README.md
index 4a984806..0ff73c7c 100644
--- a/README.md
+++ b/README.md
@@ -285,6 +285,8 @@ This gives you great flexibility when validating CSV files.
### Schema file examples
+Available formats: [YAML](schema-examples/full.yml), [JSON](schema-examples/full.json), [PHP](schema-examples/full.php).
+
```yml
# It's a full example of the CSV schema file in YAML format.
@@ -323,6 +325,14 @@ columns:
only_lowercase: true # String is only lower-case. Example: "hello world"
only_uppercase: true # String is only upper-case. Example: "HELLO WORLD"
only_capitalize: true # String is only capitalized. Example: "Hello World"
+ word_count: 10 # Integer only. Exact count of words in the string. Example: "Hello World, 123" - 2 words only (123 is not a word)
+ min_word_count: 1 # Integer only. Min count of words in the string. Example: "Hello World. 123" - 2 words only (123 is not a word)
+ max_word_count: 5 # Integer only. Max count of words in the string Example: "Hello World! 123" - 2 words only (123 is not a word)
+ at_least_contains: [ a, b ] # At least one of the string must be in the CSV value. Case-sensitive.
+ all_must_contain: [ a, b, c ] # All the strings must be part of a CSV value. Case-sensitive.
+ str_ends_with: " suffix" # Case-sensitive. Example: "Hello World suffix"
+ str_starts_with: "prefix " # Case-sensitive. Example: "prefix Hello World"
+
# Decimal and integer numbers
min: 10 # Can be integer or float, negative and positive
@@ -355,134 +365,6 @@ columns:
```
-
- Click to see: JSON Format
-
-```json
-{
- "filename_pattern" : "/demo(-\\d+)?\\.csv$/i",
- "csv" : {
- "header" : true,
- "delimiter" : ",",
- "quote_char" : "\\",
- "enclosure" : "\"",
- "encoding" : "utf-8",
- "bom" : false
- },
- "columns" : [
- {
- "name" : "csv_header_name",
- "description" : "Lorem ipsum",
- "rules" : {
- "not_empty" : true,
- "exact_value" : "Some string",
- "allow_values" : ["y", "n", ""],
- "regex" : "\/^[\\d]{2}$\/",
- "min_length" : 1,
- "max_length" : 10,
- "only_trimed" : true,
- "only_lowercase" : true,
- "only_uppercase" : true,
- "only_capitalize" : true,
- "min" : 10,
- "max" : 100.5,
- "precision" : 3,
- "min_precision" : 2,
- "max_precision" : 4,
- "date_format" : "Y-m-d",
- "min_date" : "2000-01-02",
- "max_date" : "+1 day",
- "is_bool" : true,
- "is_int" : true,
- "is_float" : true,
- "is_ip" : true,
- "is_url" : true,
- "is_email" : true,
- "is_domain" : true,
- "is_uuid4" : true,
- "is_latitude" : true,
- "is_longitude" : true,
- "cardinal_direction" : true,
- "usa_market_name" : true
- }
- },
- {"name" : "another_column"}
- ]
-}
-
-```
-
-
-
-
-
-
-
- Click to see: PHP Format
-
-```php
- '/demo(-\\d+)?\\.csv$/i',
-
- 'csv' => [
- 'header' => true,
- 'delimiter' => ',',
- 'quote_char' => '\\',
- 'enclosure' => '"',
- 'encoding' => 'utf-8',
- 'bom' => false,
- ],
-
- 'columns' => [
- [
- 'name' => 'csv_header_name',
- 'description' => 'Lorem ipsum',
- 'rules' => [
- 'not_empty' => true,
- 'exact_value' => 'Some string',
- 'allow_values' => ['y', 'n', ''],
- 'regex' => '/^[\\d]{2}$/',
- 'min_length' => 1,
- 'max_length' => 10,
- 'only_trimed' => true,
- 'only_lowercase' => true,
- 'only_uppercase' => true,
- 'only_capitalize' => true,
- 'min' => 10,
- 'max' => 100.5,
- 'precision' => 3,
- 'min_precision' => 2,
- 'max_precision' => 4,
- 'date_format' => 'Y-m-d',
- 'min_date' => '2000-01-02',
- 'max_date' => '+1 day',
- 'is_bool' => true,
- 'is_int' => true,
- 'is_float' => true,
- 'is_ip' => true,
- 'is_url' => true,
- 'is_email' => true,
- 'is_domain' => true,
- 'is_uuid4' => true,
- 'is_latitude' => true,
- 'is_longitude' => true,
- 'cardinal_direction' => true,
- 'usa_market_name' => true,
- ],
- ],
- ['name' => 'another_column'],
- ],
-];
-
-```
-
-
-
-
-
## Coming soon
It's random ideas and plans. No orderings and deadlines. But batch processing is the priority #1.
@@ -498,6 +380,7 @@ Batch processing
Validation
* [x] ~~`filename_pattern` validation with regex (like "all files in the folder should be in the format `/^[\d]{4}-[\d]{2}-[\d]{2}\.csv$/`").~~
* [ ] Flag to ignore file name pattern. It's useful when you have a lot of files and you don't want to validate the file name.
+* [ ] Keyword for null value. Configurable. By default, it's an empty string. But you can use `null`, `nil`, `none`, `empty`, etc.
* [ ] Agregate rules (like "at least one of the fields should be not empty" or "all values must be unique").
* [ ] Handle empty files and files with only a header row, or only with one line of data. One column wthout header is also possible.
* [ ] Using multiple schemas for one csv file.
diff --git a/schema-examples/full.json b/schema-examples/full.json
index 6897256e..0f186c76 100644
--- a/schema-examples/full.json
+++ b/schema-examples/full.json
@@ -23,6 +23,13 @@
"only_lowercase" : true,
"only_uppercase" : true,
"only_capitalize" : true,
+ "word_count" : 10,
+ "min_word_count" : 1,
+ "max_word_count" : 5,
+ "at_least_contains" : ["a", "b"],
+ "all_must_contain" : ["a", "b", "c"],
+ "str_ends_with" : " suffix",
+ "str_starts_with" : "prefix ",
"min" : 10,
"max" : 100.5,
"precision" : 3,
diff --git a/schema-examples/full.php b/schema-examples/full.php
index e80e81ad..2019eaea 100644
--- a/schema-examples/full.php
+++ b/schema-examples/full.php
@@ -41,6 +41,13 @@
'only_lowercase' => true,
'only_uppercase' => true,
'only_capitalize' => true,
+ 'word_count' => 10,
+ 'min_word_count' => 1,
+ 'max_word_count' => 5,
+ 'at_least_contains' => ['a', 'b'],
+ 'all_must_contain' => ['a', 'b', 'c'],
+ 'str_ends_with' => ' suffix',
+ 'str_starts_with' => 'prefix ',
'min' => 10,
'max' => 100.5,
'precision' => 3,
diff --git a/schema-examples/full.yml b/schema-examples/full.yml
index cc69b868..d15adfb1 100644
--- a/schema-examples/full.yml
+++ b/schema-examples/full.yml
@@ -47,6 +47,14 @@ columns:
only_lowercase: true # String is only lower-case. Example: "hello world"
only_uppercase: true # String is only upper-case. Example: "HELLO WORLD"
only_capitalize: true # String is only capitalized. Example: "Hello World"
+ word_count: 10 # Integer only. Exact count of words in the string. Example: "Hello World, 123" - 2 words only (123 is not a word)
+ min_word_count: 1 # Integer only. Min count of words in the string. Example: "Hello World. 123" - 2 words only (123 is not a word)
+ max_word_count: 5 # Integer only. Max count of words in the string Example: "Hello World! 123" - 2 words only (123 is not a word)
+ at_least_contains: [ a, b ] # At least one of the string must be in the CSV value. Case-sensitive.
+ all_must_contain: [ a, b, c ] # All the strings must be part of a CSV value. Case-sensitive.
+ str_ends_with: " suffix" # Case-sensitive. Example: "Hello World suffix"
+ str_starts_with: "prefix " # Case-sensitive. Example: "prefix Hello World"
+
# Decimal and integer numbers
min: 10 # Can be integer or float, negative and positive
diff --git a/src/Rules/AllMustContain.php b/src/Rules/AllMustContain.php
new file mode 100644
index 00000000..507d97a3
--- /dev/null
+++ b/src/Rules/AllMustContain.php
@@ -0,0 +1,37 @@
+getOptionAsArray();
+ if (\count($inclusions) === 0) {
+ return null;
+ }
+
+ foreach ($inclusions as $inclusion) {
+ if (\strpos((string)$cellValue, (string)$inclusion) === false) {
+ return "Value \"{$cellValue}\" must contain all of the following:" .
+ ' "["' . \implode('", "', $inclusions) . '"]"';
+ }
+ }
+
+ return null;
+ }
+}
diff --git a/src/Rules/AtLeastContains.php b/src/Rules/AtLeastContains.php
new file mode 100644
index 00000000..5fec1baa
--- /dev/null
+++ b/src/Rules/AtLeastContains.php
@@ -0,0 +1,37 @@
+getOptionAsArray();
+ if (\count($inclusions) === 0) {
+ return null;
+ }
+
+ foreach ($inclusions as $inclusion) {
+ if (\strpos((string)$cellValue, (string)$inclusion) !== false) {
+ return null;
+ }
+ }
+
+ return "Value \"{$cellValue}\" must contain one of the following:" .
+ ' "["' . \implode('", "', $inclusions) . '"]"';
+ }
+}
diff --git a/src/Rules/MaxWordCount.php b/src/Rules/MaxWordCount.php
new file mode 100644
index 00000000..d7ea0734
--- /dev/null
+++ b/src/Rules/MaxWordCount.php
@@ -0,0 +1,33 @@
+getOptionAsInt();
+ $count = \str_word_count((string)$cellValue);
+
+ if ($count > $wordCount) {
+ return "Value \"{$cellValue}\" has {$count} words, " .
+ "but must have no more than {$wordCount} words";
+ }
+
+ return null;
+ }
+}
diff --git a/src/Rules/MinWordCount.php b/src/Rules/MinWordCount.php
new file mode 100644
index 00000000..1c80f4d2
--- /dev/null
+++ b/src/Rules/MinWordCount.php
@@ -0,0 +1,33 @@
+getOptionAsInt();
+ $count = \str_word_count((string)$cellValue);
+
+ if ($count < $wordCount) {
+ return "Value \"{$cellValue}\" has {$count} words, " .
+ "but must have at least {$wordCount} words";
+ }
+
+ return null;
+ }
+}
diff --git a/src/Rules/StrEndsWith.php b/src/Rules/StrEndsWith.php
new file mode 100644
index 00000000..639e742a
--- /dev/null
+++ b/src/Rules/StrEndsWith.php
@@ -0,0 +1,34 @@
+getOptionAsString();
+ if ($prefix === '') {
+ return null;
+ }
+
+ if (!\str_ends_with((string)$cellValue, $prefix)) {
+ return "Value \"{$cellValue}\" must end with \"{$prefix}\"";
+ }
+
+ return null;
+ }
+}
diff --git a/src/Rules/StrStartsWith.php b/src/Rules/StrStartsWith.php
new file mode 100644
index 00000000..50a22f09
--- /dev/null
+++ b/src/Rules/StrStartsWith.php
@@ -0,0 +1,34 @@
+getOptionAsString();
+ if ($prefix === '') {
+ return null;
+ }
+
+ if (!\str_starts_with((string)$cellValue, $prefix)) {
+ return "Value \"{$cellValue}\" must start with \"{$prefix}\"";
+ }
+
+ return null;
+ }
+}
diff --git a/src/Rules/WordCount.php b/src/Rules/WordCount.php
new file mode 100644
index 00000000..2dbc66c8
--- /dev/null
+++ b/src/Rules/WordCount.php
@@ -0,0 +1,33 @@
+getOptionAsInt();
+ $count = \str_word_count((string)$cellValue);
+
+ if ($count !== $wordCount) {
+ return "Value \"{$cellValue}\" has {$count} words, " .
+ "but must have exactly {$wordCount} words";
+ }
+
+ return null;
+ }
+}
diff --git a/tests/Blueprint/MiscTest.php b/tests/Blueprint/MiscTest.php
index 84e48744..2ac6f844 100644
--- a/tests/Blueprint/MiscTest.php
+++ b/tests/Blueprint/MiscTest.php
@@ -82,7 +82,13 @@ public function testFullListOfRules(): void
}
\sort($rulesInCode);
- isSame($rulesInCode, $rulesInConfig);
+ $diffAsErrMessage = \array_reduce(
+ \array_diff($rulesInCode, $rulesInConfig),
+ static fn (string $carry, string $item) => $carry . "{$item}: FIXME\n",
+ '',
+ );
+
+ isSame($rulesInCode, $rulesInConfig, $diffAsErrMessage);
}
public function testCsvStrutureDefaultValues(): void
@@ -105,15 +111,15 @@ public function testCheckYmlSchemaExampleInReadme(): void
);
}
- public function testCheckPhpSchemaExampleInReadme(): void
- {
- $this->testCheckExampleInReadme(PROJECT_ROOT . '/schema-examples/full.php', 'php', 'PHP Format', 14);
- }
-
- public function testCheckJsonSchemaExampleInReadme(): void
- {
- $this->testCheckExampleInReadme(PROJECT_ROOT . '/schema-examples/full.json', 'json', 'JSON Format', 0);
- }
+ // public function testCheckPhpSchemaExampleInReadme(): void
+ // {
+ // $this->testCheckExampleInReadme(PROJECT_ROOT . '/schema-examples/full.php', 'php', 'PHP Format', 14);
+ // }
+ //
+ // public function testCheckJsonSchemaExampleInReadme(): void
+ // {
+ // $this->testCheckExampleInReadme(PROJECT_ROOT . '/schema-examples/full.json', 'json', 'JSON Format', 0);
+ // }
public function testCompareExamplesWithOrig(): void
{
@@ -125,8 +131,8 @@ public function testCompareExamplesWithOrig(): void
// file_put_contents("{$basepath}.php", (string)phpArray($origYml));
// file_put_contents("{$basepath}.json", (string)json($origYml));
- isSame($origYml, phpArray("{$basepath}.php")->getArrayCopy(), 'PHP config is invalid');
- isSame($origYml, json("{$basepath}.json")->getArrayCopy(), 'JSON config is invalid');
+ isSame((string)phpArray($origYml), (string)phpArray("{$basepath}.php"), 'PHP config is invalid');
+ isSame((string)json($origYml), (string)json("{$basepath}.json"), 'JSON config is invalid');
}
public function testFindFiles(): void
diff --git a/tests/Blueprint/RulesTest.php b/tests/Blueprint/RulesTest.php
index bd3a12da..1074fb18 100644
--- a/tests/Blueprint/RulesTest.php
+++ b/tests/Blueprint/RulesTest.php
@@ -16,7 +16,9 @@
namespace JBZoo\PHPUnit\Blueprint;
+use JBZoo\CsvBlueprint\Rules\AllMustContain;
use JBZoo\CsvBlueprint\Rules\AllowValues;
+use JBZoo\CsvBlueprint\Rules\AtLeastContains;
use JBZoo\CsvBlueprint\Rules\CardinalDirection;
use JBZoo\CsvBlueprint\Rules\DateFormat;
use JBZoo\CsvBlueprint\Rules\ExactValue;
@@ -34,17 +36,22 @@
use JBZoo\CsvBlueprint\Rules\MaxDate;
use JBZoo\CsvBlueprint\Rules\MaxLength;
use JBZoo\CsvBlueprint\Rules\MaxPrecision;
+use JBZoo\CsvBlueprint\Rules\MaxWordCount;
use JBZoo\CsvBlueprint\Rules\Min;
use JBZoo\CsvBlueprint\Rules\MinDate;
use JBZoo\CsvBlueprint\Rules\MinLength;
use JBZoo\CsvBlueprint\Rules\MinPrecision;
+use JBZoo\CsvBlueprint\Rules\MinWordCount;
use JBZoo\CsvBlueprint\Rules\NotEmpty;
use JBZoo\CsvBlueprint\Rules\OnlyCapitalize;
use JBZoo\CsvBlueprint\Rules\OnlyLowercase;
use JBZoo\CsvBlueprint\Rules\OnlyUppercase;
use JBZoo\CsvBlueprint\Rules\Precision;
use JBZoo\CsvBlueprint\Rules\Regex;
+use JBZoo\CsvBlueprint\Rules\StrEndsWith;
+use JBZoo\CsvBlueprint\Rules\StrStartsWith;
use JBZoo\CsvBlueprint\Rules\UsaMarketName;
+use JBZoo\CsvBlueprint\Rules\WordCount;
use JBZoo\PHPUnit\PHPUnit;
use JBZoo\Utils\Str;
@@ -714,4 +721,136 @@ public function testIsUuid4(): void
$rule = new IsUuid4('prop', false);
isSame(null, $rule->validate('123'));
}
+
+ public function testMustContain(): void
+ {
+ $rule = new AtLeastContains('prop', ['a', 'b', 'c']);
+ isSame(null, $rule->validate('a'));
+ isSame(null, $rule->validate('abc'));
+ isSame(null, $rule->validate('adasdasdasdc'));
+
+ isSame(
+ '"at_least_contains" at line 0, column "prop". ' .
+ 'Value "123" must contain one of the following: "["a", "b", "c"]".',
+ \strip_tags((string)$rule->validate('123')),
+ );
+ }
+
+ public function testAllMustContain(): void
+ {
+ $rule = new AllMustContain('prop', ['a', 'b', 'c']);
+ isSame(null, $rule->validate('abc'));
+ isSame(null, $rule->validate('abdasadasdasdc'));
+
+ isSame(
+ '"all_must_contain" at line 0, column "prop". ' .
+ 'Value "ab" must contain all of the following: "["a", "b", "c"]".',
+ \strip_tags((string)$rule->validate('ab')),
+ );
+ isSame(
+ '"all_must_contain" at line 0, column "prop". ' .
+ 'Value "ac" must contain all of the following: "["a", "b", "c"]".',
+ \strip_tags((string)$rule->validate('ac')),
+ );
+ }
+
+ public function testStrStartsWith(): void
+ {
+ $rule = new StrStartsWith('prop', 'a');
+ isSame(null, $rule->validate('a'));
+ isSame(null, $rule->validate('abc'));
+
+ isSame(
+ '"str_starts_with" at line 0, column "prop". Value "" must start with "a".',
+ \strip_tags((string)$rule->validate('')),
+ );
+
+ isSame(
+ '"str_starts_with" at line 0, column "prop". Value " a" must start with "a".',
+ \strip_tags((string)$rule->validate(' a')),
+ );
+ }
+
+ public function testStrEndsWith(): void
+ {
+ $rule = new StrEndsWith('prop', 'a');
+ isSame(null, $rule->validate('a'));
+ isSame(null, $rule->validate('cba'));
+
+ isSame(
+ '"str_ends_with" at line 0, column "prop". Value "" must end with "a".',
+ \strip_tags((string)$rule->validate('')),
+ );
+
+ isSame(
+ '"str_ends_with" at line 0, column "prop". Value "a " must end with "a".',
+ \strip_tags((string)$rule->validate('a ')),
+ );
+ }
+
+ public function testStrWordCount(): void
+ {
+ $rule = new WordCount('prop', 0);
+ isSame(null, $rule->validate(''));
+ isSame(
+ '"word_count" at line 0, column "prop". ' .
+ 'Value "cba" has 1 words, but must have exactly 0 words.',
+ \strip_tags((string)$rule->validate('cba')),
+ );
+
+ $rule = new WordCount('prop', 2);
+ isSame(null, $rule->validate('asd, asdasd'));
+ isSame(
+ '"word_count" at line 0, column "prop". ' .
+ 'Value "cba" has 1 words, but must have exactly 2 words.',
+ \strip_tags((string)$rule->validate('cba')),
+ );
+ isSame(
+ '"word_count" at line 0, column "prop". ' .
+ 'Value "cba 123, 123123" has 1 words, but must have exactly 2 words.',
+ \strip_tags((string)$rule->validate('cba 123, 123123')),
+ );
+
+ isSame(
+ '"word_count" at line 0, column "prop". Value "a b c" has 3 words, but must have exactly 2 words.',
+ \strip_tags((string)$rule->validate('a b c')),
+ );
+ }
+
+ public function testMinWordCount(): void
+ {
+ $rule = new MinWordCount('prop', 0);
+ isSame(null, $rule->validate('cba'));
+
+ $rule = new MinWordCount('prop', 2);
+ isSame(null, $rule->validate('asd, asdasd'));
+ isSame(null, $rule->validate('asd, asdasd asd'));
+ isSame(null, $rule->validate('asd, asdasd 1232 asdas'));
+ isSame(
+ '"min_word_count" at line 0, column "prop". ' .
+ 'Value "cba" has 1 words, but must have at least 2 words.',
+ \strip_tags((string)$rule->validate('cba')),
+ );
+ isSame(
+ '"min_word_count" at line 0, column "prop". ' .
+ 'Value "cba 123, 123123" has 1 words, but must have at least 2 words.',
+ \strip_tags((string)$rule->validate('cba 123, 123123')),
+ );
+ }
+
+ public function testMaxWordCount(): void
+ {
+ $rule = new MaxWordCount('prop', 0);
+ isSame(null, $rule->validate(''));
+
+ $rule = new MaxWordCount('prop', 2);
+ isSame(null, $rule->validate('asd, asdasd'));
+ isSame(null, $rule->validate('asd, 1232'));
+ isSame(null, $rule->validate('asd, 1232 113234324 342 . ..'));
+ isSame(
+ '"max_word_count" at line 0, column "prop". ' .
+ 'Value "asd, asdasd asd 1232 asdas" has 4 words, but must have no more than 2 words.',
+ \strip_tags((string)$rule->validate('asd, asdasd asd 1232 asdas')),
+ );
+ }
}