Skip to content

Commit

Permalink
Implement column validation and enhance error handling (#65)
Browse files Browse the repository at this point in the history
This commit introduces a new feature to validate column data in CSV
files. A new method, `validateColumn`, has been added to the
CsvValidator class, and the `validateRule` method in the AbstractRule
class now includes exception handling. Additionally, the interpretation
of validation results has been adjusted and some variable and format
inconsistencies were corrected across multiple classes and files. Tests
have been updated to reflect these changes.
  • Loading branch information
SmetDenis committed Mar 20, 2024
1 parent 14bd208 commit fa8ed9e
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 32 deletions.
3 changes: 2 additions & 1 deletion README.md
Expand Up @@ -530,6 +530,7 @@ Schema is invalid: ./tests/schemas/demo_invalid.yml
| Line | id:Column | Rule | Message |
+-------+------------------+------------------+------------------------------------------------------------------------------------------------------+
| undef | | filename_pattern | Filename "./tests/fixtures/demo.csv" does not match pattern: "/demo-[12].csv$/i" |
| 1 | | csv.header | Columns not found in CSV: "wrong_column_name" |
| 6 | 0:Name | length_min | The length of the value "Carl" is 4, which is less than the expected "5" |
| 11 | 0:Name | length_min | The length of the value "Lois" is 4, which is less than the expected "5" |
| 1 | 1:City | ag:is_unique | Column has non-unique values. Unique: 9, total: 10 |
Expand All @@ -544,7 +545,7 @@ Schema is invalid: ./tests/schemas/demo_invalid.yml
+-------+------------------+------------------+----------------------- demo.csv ---------------------------------------------------------------------+
Found 9 issues in CSV file.
Found 10 issues in CSV file.
Found 2 issues in schema.
```
Expand Down
17 changes: 13 additions & 4 deletions src/Rules/AbstarctRule.php
Expand Up @@ -70,10 +70,19 @@ public function validate(array|string $cellValue, int $line = ColumnValidator::F
}

if (\method_exists($this, 'validateRule')) {
/** @phan-suppress-next-line PhanUndeclaredMethod */
$error = $this->validateRule($cellValue);
if ($error !== null) {
return new Error($this->ruleCode, $error, $this->columnNameId, $line);
try {
/** @phan-suppress-next-line PhanUndeclaredMethod */
$error = $this->validateRule($cellValue);
if ($error !== null) {
return new Error($this->ruleCode, $error, $this->columnNameId, $line);
}
} catch (\Exception $e) {
return new Error(
$this->ruleCode,
"Unexpected error: {$e->getMessage()}",
$this->columnNameId,
$line,
);
}
} else {
throw new \RuntimeException('Method "validateRule" not found in ' . static::class);
Expand Down
42 changes: 37 additions & 5 deletions src/Validators/CsvValidator.php
Expand Up @@ -38,6 +38,7 @@ public function validate(bool $quickStop = false): ErrorSuite
return $this->errors
->addErrorSuit($this->validateFile($quickStop))
->addErrorSuit($this->validateHeader($quickStop))
->addErrorSuit($this->validateColumn($quickStop))
->addErrorSuit($this->validateLines($quickStop));
}

Expand Down Expand Up @@ -72,10 +73,10 @@ private function validateHeader(bool $quickStop = false): ErrorSuite

private function validateLines(bool $quickStop = false): ErrorSuite
{
$errors = new ErrorSuite();
$columns = $this->schema->getColumnsMappedByHeader($this->csv->getHeader());
$errors = new ErrorSuite();
$realColumns = $this->schema->getColumnsMappedByHeader($this->csv->getHeader());

foreach ($columns as $column) {
foreach ($realColumns as $column) {
$columValues = [];
if ($column === null) {
continue;
Expand Down Expand Up @@ -107,8 +108,8 @@ private function validateFile(bool $quickStop = false): ErrorSuite
) {
$error = new Error(
'filename_pattern',
'Filename "<c>' . Utils::cutPath($this->csv->getCsvFilename()) .
"</c>\" does not match pattern: \"<c>{$filenamePattern}</c>\"",
'Filename "<c>' . Utils::cutPath($this->csv->getCsvFilename()) . '</c>" ' .
"does not match pattern: \"<c>{$filenamePattern}</c>\"",
'',
Error::UNDEFINED_LINE,
);
Expand All @@ -122,4 +123,35 @@ private function validateFile(bool $quickStop = false): ErrorSuite

return $errors;
}

private function validateColumn(bool $quickStop): ErrorSuite
{
$errors = new ErrorSuite();

if (!$this->schema->getCsvStructure()->isHeader()) {
return $errors;
}

$realColumns = $this->schema->getColumnsMappedByHeader($this->csv->getHeader());
$schemaColumns = $this->schema->getColumns();

$notFoundColums = \array_diff(\array_keys($schemaColumns), \array_keys($realColumns));

if (\count($notFoundColums) > 0) {
$error = new Error(
'csv.header',
'Columns not found in CSV: "<c>' . \implode(', ', $notFoundColums) . '</c>"',
'',
ColumnValidator::FALLBACK_LINE,
);

$errors->addError($error);

if ($quickStop) {
return $errors;
}
}

return $errors;
}
}
42 changes: 32 additions & 10 deletions tests/Commands/ValidateCsvTest.php
Expand Up @@ -75,6 +75,7 @@ public function testValidateOneFileNegativeTable(): void
| Line | id:Column | Rule | Message |
+-------+------------------+------------------+------------------------------------------------------------------------------------------------------+
| undef | | filename_pattern | Filename "./tests/fixtures/demo.csv" does not match pattern: "/demo-[12].csv$/i" |
| 1 | | csv.header | Columns not found in CSV: "wrong_column_name" |
| 6 | 0:Name | length_min | The length of the value "Carl" is 4, which is less than the expected "5" |
| 11 | 0:Name | length_min | The length of the value "Lois" is 4, which is less than the expected "5" |
| 1 | 1:City | ag:is_unique | Column has non-unique values. Unique: 9, total: 10 |
Expand All @@ -89,7 +90,7 @@ public function testValidateOneFileNegativeTable(): void
+-------+------------------+------------------+----------------------- demo.csv ---------------------------------------------------------------------+
Found 9 issues in CSV file.
Found 10 issues in CSV file.
Found 2 issues in schema.
TXT;
Expand Down Expand Up @@ -123,6 +124,7 @@ public function testValidateManyFileNegativeTable(): void
+------+------------------+--------------+--------- demo-1.csv --------------------------------------------------+
| Line | id:Column | Rule | Message |
+------+------------------+--------------+-----------------------------------------------------------------------+
| 1 | | csv.header | Columns not found in CSV: "wrong_column_name" |
| 1 | 1:City | ag:is_unique | Column has non-unique values. Unique: 1, total: 2 |
| 3 | 4:Favorite color | allow_values | Value "blue" is not allowed. Allowed values: ["red", "green", "Blue"] |
+------+------------------+--------------+--------- demo-1.csv --------------------------------------------------+
Expand All @@ -131,6 +133,7 @@ public function testValidateManyFileNegativeTable(): void
+------+------------+------------+---------------------------- demo-2.csv --------------------------------------------------------------+
| Line | id:Column | Rule | Message |
+------+------------+------------+------------------------------------------------------------------------------------------------------+
| 1 | | csv.header | Columns not found in CSV: "wrong_column_name" |
| 2 | 0:Name | length_min | The length of the value "Carl" is 4, which is less than the expected "5" |
| 7 | 0:Name | length_min | The length of the value "Lois" is 4, which is less than the expected "5" |
| 2 | 3:Birthday | date_min | The date of the value "1955-05-14" is parsed as "1955-05-14 00:00:00 +00:00", which is less than the |
Expand All @@ -146,10 +149,11 @@ public function testValidateManyFileNegativeTable(): void
| Line | id:Column | Rule | Message |
+-------+-----------+------------------+----------------------------------------------------------------------------------------------+
| undef | | filename_pattern | Filename "./tests/fixtures/batch/sub/demo-3.csv" does not match pattern: "/demo-[12].csv$/i" |
| 1 | | csv.header | Columns not found in CSV: "wrong_column_name" |
+-------+-----------+------------------+--------------------- demo-3.csv -------------------------------------------------------------+
Found 8 issues in 3 out of 3 CSV files.
Found 11 issues in 3 out of 3 CSV files.
Found 2 issues in schema.
TXT;
Expand All @@ -176,6 +180,7 @@ public function testValidateOneFileNegativeText(): void
(1/1) Invalid file: ./tests/fixtures/demo.csv
"filename_pattern". Filename "./tests/fixtures/demo.csv" does not match pattern: "/demo-[12].csv$/i".
"csv.header" at line 1. Columns not found in CSV: "wrong_column_name".
"length_min" at line 6, column "0:Name". The length of the value "Carl" is 4, which is less than the expected "5".
"length_min" at line 11, column "0:Name". The length of the value "Lois" is 4, which is less than the expected "5".
"ag:is_unique" at line 1, column "1:City". Column has non-unique values. Unique: 9, total: 10.
Expand All @@ -186,7 +191,7 @@ public function testValidateOneFileNegativeText(): void
"allow_values" at line 5, column "4:Favorite color". Value "blue" is not allowed. Allowed values: ["red", "green", "Blue"].
Found 9 issues in CSV file.
Found 10 issues in CSV file.
Found 2 issues in schema.
TXT;
Expand All @@ -206,12 +211,13 @@ public function testValidateManyFilesNegativeTextQuick(): void
"allow_values", column "4:Favorite color". Value "123" is not allowed. Allowed values: ["red", "green", "Blue"].
(1/3) Invalid file: ./tests/fixtures/batch/demo-1.csv
"csv.header" at line 1. Columns not found in CSV: "wrong_column_name".
"ag:is_unique" at line 1, column "1:City". Column has non-unique values. Unique: 1, total: 2.
(2/3) Skipped: ./tests/fixtures/batch/demo-2.csv
(3/3) Skipped: ./tests/fixtures/batch/sub/demo-3.csv
Found 1 issues in 1 out of 3 CSV files.
Found 2 issues in 1 out of 3 CSV files.
Found 2 issues in schema.
TXT;
Expand Down Expand Up @@ -274,10 +280,12 @@ public function testValidateManyFilesNegativeTextQuick(): void
"allow_values", column "4:Favorite color". Value "123" is not allowed. Allowed values: ["red", "green", "Blue"].
(1/3) Invalid file: ./tests/fixtures/batch/demo-1.csv
"csv.header" at line 1. Columns not found in CSV: "wrong_column_name".
"ag:is_unique" at line 1, column "1:City". Column has non-unique values. Unique: 1, total: 2.
"allow_values" at line 3, column "4:Favorite color". Value "blue" is not allowed. Allowed values: ["red", "green", "Blue"].
(2/3) Invalid file: ./tests/fixtures/batch/demo-2.csv
"csv.header" at line 1. Columns not found in CSV: "wrong_column_name".
"length_min" at line 2, column "0:Name". The length of the value "Carl" is 4, which is less than the expected "5".
"length_min" at line 7, column "0:Name". The length of the value "Lois" is 4, which is less than the expected "5".
"date_min" at line 2, column "3:Birthday". The date of the value "1955-05-14" is parsed as "1955-05-14 00:00:00 +00:00", which is less than the expected "1955-05-15 00:00:00 +00:00 (1955-05-15)".
Expand All @@ -286,9 +294,10 @@ public function testValidateManyFilesNegativeTextQuick(): void
(3/3) Invalid file: ./tests/fixtures/batch/sub/demo-3.csv
"filename_pattern". Filename "./tests/fixtures/batch/sub/demo-3.csv" does not match pattern: "/demo-[12].csv$/i".
"csv.header" at line 1. Columns not found in CSV: "wrong_column_name".
Found 8 issues in 3 out of 3 CSV files.
Found 11 issues in 3 out of 3 CSV files.
Found 2 issues in schema.
TXT;
Expand Down Expand Up @@ -329,10 +338,14 @@ public function testCreateValidateNegativeTeamcity(): void
(1/3) Invalid file: ./tests/fixtures/batch/demo-1.csv
##teamcity[testCount count='2' flowId='42']
##teamcity[testCount count='3' flowId='42']
##teamcity[testSuiteStarted name='demo-1.csv' flowId='42']
##teamcity[testStarted name='csv.header at column' locationHint='php_qn://./tests/fixtures/batch/demo-1.csv' flowId='42']
"csv.header" at line 1. Columns not found in CSV: "wrong_column_name".
##teamcity[testFinished name='csv.header at column' flowId='42']
##teamcity[testStarted name='ag:is_unique at column 1:City' locationHint='php_qn://./tests/fixtures/batch/demo-1.csv' flowId='42']
"ag:is_unique" at line 1, column "1:City". Column has non-unique values. Unique: 1, total: 2.
##teamcity[testFinished name='ag:is_unique at column 1:City' flowId='42']
Expand All @@ -345,10 +358,14 @@ public function testCreateValidateNegativeTeamcity(): void
(2/3) Invalid file: ./tests/fixtures/batch/demo-2.csv
##teamcity[testCount count='5' flowId='42']
##teamcity[testCount count='6' flowId='42']
##teamcity[testSuiteStarted name='demo-2.csv' flowId='42']
##teamcity[testStarted name='csv.header at column' locationHint='php_qn://./tests/fixtures/batch/demo-2.csv' flowId='42']
"csv.header" at line 1. Columns not found in CSV: "wrong_column_name".
##teamcity[testFinished name='csv.header at column' flowId='42']
##teamcity[testStarted name='length_min at column 0:Name' locationHint='php_qn://./tests/fixtures/batch/demo-2.csv' flowId='42']
"length_min" at line 2, column "0:Name". The length of the value "Carl" is 4, which is less than the expected "5".
##teamcity[testFinished name='length_min at column 0:Name' flowId='42']
Expand All @@ -373,18 +390,22 @@ public function testCreateValidateNegativeTeamcity(): void
(3/3) Invalid file: ./tests/fixtures/batch/sub/demo-3.csv
##teamcity[testCount count='1' flowId='42']
##teamcity[testCount count='2' flowId='42']
##teamcity[testSuiteStarted name='demo-3.csv' flowId='42']
##teamcity[testStarted name='filename_pattern at column' locationHint='php_qn://./tests/fixtures/batch/sub/demo-3.csv' flowId='42']
"filename_pattern". Filename "./tests/fixtures/batch/sub/demo-3.csv" does not match pattern: "/demo-[12].csv$/i".
##teamcity[testFinished name='filename_pattern at column' flowId='42']
##teamcity[testStarted name='csv.header at column' locationHint='php_qn://./tests/fixtures/batch/sub/demo-3.csv' flowId='42']
"csv.header" at line 1. Columns not found in CSV: "wrong_column_name".
##teamcity[testFinished name='csv.header at column' flowId='42']
##teamcity[testSuiteFinished name='demo-3.csv' flowId='42']
Found 8 issues in 3 out of 3 CSV files.
Found 11 issues in 3 out of 3 CSV files.
Found 2 issues in schema.
TXT;
Expand Down Expand Up @@ -454,6 +475,7 @@ public function testInvalidSchema(): void
+-------+------------+------------------+------------------------------------------------------------------------------------------------------+
| undef | | filename_pattern | Filename "./tests/fixtures/demo.csv" does not match pattern: "/demo-[12].csv$/i" |
| 1 | 4: | csv.header | Property "name" is not defined in schema: "./tests/schemas/invalid_schema.yml" |
| 1 | | csv.header | Columns not found in CSV: "4" |
| 6 | 0:Name | length_min | The length of the value "Carl" is 4, which is less than the expected "5" |
| 11 | 0:Name | length_min | The length of the value "Lois" is 4, which is less than the expected "5" |
| 1 | 1:City | ag:is_unique | Column has non-unique values. Unique: 9, total: 10 |
Expand Down Expand Up @@ -491,7 +513,7 @@ public function testInvalidSchema(): void
+-------+------------+------------------+-------------------------- demo.csv ------------------------------------------------------------------+
Found 26 issues in CSV file.
Found 27 issues in CSV file.
Found 8 issues in schema.
TXT;
Expand Down
10 changes: 5 additions & 5 deletions tests/Rules/Cell/AllowValuesTest.php
Expand Up @@ -56,11 +56,11 @@ public function testNegative(): void

public function testInvalidOption(): void
{
$this->expectExceptionMessage(
'Invalid option "qwe" for the "allow_values" rule. It should be array of strings.',
);

$rule = $this->create('qwe');
$rule->validate('true');
isSame(
'"allow_values" at line <red>1</red>, column "prop". ' .
'Unexpected error: Invalid option "qwe" for the "allow_values" rule. It should be array of strings.',
(string)$rule->validate('true'),
);
}
}
10 changes: 5 additions & 5 deletions tests/Rules/Cell/NotAllowValuesTest.php
Expand Up @@ -49,11 +49,11 @@ public function testNegative(): void

public function testInvalidOption(): void
{
$this->expectExceptionMessage(
'Invalid option "qwe" for the "not_allow_values" rule. It should be array of strings.',
);

$rule = $this->create('qwe');
$rule->validate('true');
isSame(
'"not_allow_values" at line <red>1</red>, column "prop". ' .
'Unexpected error: Invalid option "qwe" for the "not_allow_values" rule. It should be array of strings.',
(string)$rule->validate('true'),
);
}
}

0 comments on commit fa8ed9e

Please sign in to comment.