-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add midhinge rules for data validation
This commit introduces the new 'midhinge' rules for data validation. These rules calculate the average of the first and third quartiles of a data set, providing a useful measure of its central location. The new rule makes it possible to identify rows that do not meet expected midhinge values, ensuring better data consistency and quality. Corresponding adjustments have been made in schema files and unit tests.
- Loading branch information
Showing
10 changed files
with
137 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
<?php | ||
|
||
/** | ||
* JBZoo Toolbox - Csv-Blueprint. | ||
* | ||
* This file is part of the JBZoo Toolbox project. | ||
* For the full copyright and license information, please view the LICENSE | ||
* file that was distributed with this source code. | ||
* | ||
* @license MIT | ||
* @copyright Copyright (C) JBZoo.com, All rights reserved. | ||
* @see https://github.com/JBZoo/Csv-Blueprint | ||
*/ | ||
|
||
declare(strict_types=1); | ||
|
||
namespace JBZoo\CsvBlueprint\Rules\Aggregate; | ||
|
||
use JBZoo\CsvBlueprint\Rules\AbstarctRule; | ||
use MathPHP\Statistics\Descriptive; | ||
|
||
final class ComboMidhinge extends AbstarctAggregateRuleCombo | ||
{ | ||
public const INPUT_TYPE = AbstarctRule::INPUT_TYPE_FLOATS; | ||
|
||
protected const NAME = 'midhinge'; | ||
|
||
public function getHelpMeta(): array | ||
{ | ||
return [ | ||
[ | ||
'Midhinge. The average of the first and third quartiles and is thus a measure of location.', | ||
'Equivalently, it is the 25% trimmed mid-range or 25% midsummary; it is an L-estimator.', | ||
'See: https://en.wikipedia.org/wiki/Midhinge', | ||
'Midhinge = (first quartile, third quartile) / 2', | ||
], | ||
[], | ||
]; | ||
} | ||
|
||
protected function getActualAggregate(array $colValues): ?float | ||
{ | ||
if (\count($colValues) === 0) { | ||
return null; | ||
} | ||
|
||
return Descriptive::midhinge(self::stringsToFloat($colValues)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
<?php | ||
|
||
/** | ||
* JBZoo Toolbox - Csv-Blueprint. | ||
* | ||
* This file is part of the JBZoo Toolbox project. | ||
* For the full copyright and license information, please view the LICENSE | ||
* file that was distributed with this source code. | ||
* | ||
* @license MIT | ||
* @copyright Copyright (C) JBZoo.com, All rights reserved. | ||
* @see https://github.com/JBZoo/Csv-Blueprint | ||
*/ | ||
|
||
declare(strict_types=1); | ||
|
||
namespace JBZoo\PHPUnit\Rules\Aggregate; | ||
|
||
use JBZoo\CsvBlueprint\Rules\AbstarctRule as Combo; | ||
use JBZoo\CsvBlueprint\Rules\Aggregate\ComboMidhinge; | ||
use JBZoo\PHPUnit\Rules\TestAbstractAggregateRuleCombo; | ||
|
||
use function JBZoo\PHPUnit\isSame; | ||
|
||
class ComboMidhingeTest extends TestAbstractAggregateRuleCombo | ||
{ | ||
protected string $ruleClass = ComboMidhinge::class; | ||
|
||
public function testEqual(): void | ||
{ | ||
$rule = $this->create(18, Combo::EQ); | ||
isSame('', $rule->test([])); | ||
isSame('', $rule->test(\range(1, 35))); | ||
|
||
$rule = $this->create(3, Combo::EQ); | ||
isSame( | ||
'The midhinge in the column is "18", which is not equal than the expected "3"', | ||
$rule->test(\range(1, 35)), | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters