Skip to content

Commit

Permalink
Add new "quartiles" and "midhinge" rules (#89)
Browse files Browse the repository at this point in the history
  • Loading branch information
SmetDenis committed Mar 25, 2024
1 parent 85ce369 commit a56119d
Show file tree
Hide file tree
Showing 12 changed files with 426 additions and 16 deletions.
25 changes: 24 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[![Stable Version](https://poser.pugx.org/jbzoo/csv-blueprint/version)](https://packagist.org/packages/jbzoo/csv-blueprint/) [![Total Downloads](https://poser.pugx.org/jbzoo/csv-blueprint/downloads)](https://packagist.org/packages/jbzoo/csv-blueprint/stats) [![Docker Pulls](https://img.shields.io/docker/pulls/jbzoo/csv-blueprint.svg)](https://hub.docker.com/r/jbzoo/csv-blueprint/tags) [![GitHub License](https://img.shields.io/github/license/jbzoo/csv-blueprint)](https://github.com/JBZoo/Csv-Blueprint/blob/master/LICENSE)

<!-- rules-counter -->
[![Static Badge](https://img.shields.io/badge/Rules-180-green?label=Total%20Number%20of%20Rules&labelColor=darkgreen&color=gray)](schema-examples/full.yml) [![Static Badge](https://img.shields.io/badge/Rules-66-green?label=Cell%20Value&labelColor=blue&color=gray)](src/Rules/Cell) [![Static Badge](https://img.shields.io/badge/Rules-109-green?label=Aggregate%20Column&labelColor=blue&color=gray)](src/Rules/Aggregate) [![Static Badge](https://img.shields.io/badge/Rules-5-green?label=Extra%20Checks&labelColor=blue&color=gray)](#extra-checks) [![Static Badge](https://img.shields.io/badge/Rules-293-green?label=Plan%20to%20add&labelColor=gray&color=gray)](tests/schemas/todo.yml)
[![Static Badge](https://img.shields.io/badge/Rules-192-green?label=Total%20Number%20of%20Rules&labelColor=darkgreen&color=gray)](schema-examples/full.yml) [![Static Badge](https://img.shields.io/badge/Rules-66-green?label=Cell%20Value&labelColor=blue&color=gray)](src/Rules/Cell) [![Static Badge](https://img.shields.io/badge/Rules-121-green?label=Aggregate%20Column&labelColor=blue&color=gray)](src/Rules/Aggregate) [![Static Badge](https://img.shields.io/badge/Rules-5-green?label=Extra%20Checks&labelColor=blue&color=gray)](#extra-checks) [![Static Badge](https://img.shields.io/badge/Rules-323-green?label=Plan%20to%20add&labelColor=gray&color=gray)](tests/schemas/todo.yml)
<!-- /rules-counter -->

## Introduction
Expand Down Expand Up @@ -369,6 +369,29 @@ columns:
percentile_less: [ 95, 8.0 ] # x < 8.0
percentile_max: [ 95, 9.0 ] # x <= 9.0

# Quartiles. Three points that divide the data set into four equal groups, each group comprising a quarter of the data.
# See: https://en.wikipedia.org/wiki/Quartile
# There are multiple methods for computing quartiles: "exclusive", "inclusive". Exclusive is ussually classic.
# Available types: "0%", "Q1", "Q2", "Q3", "100%", "IQR" (aka Interquartile Range)
# Example: `[ inclusive, 'Q3', 42.0 ]` - the Q3 inclusive quartile is 50.0
quartiles_min: [ 'exclusive', '0%', 1.0 ] # x >= 1.0
quartiles_greater: [ 'inclusive', 'Q1', 2.0 ] # x > 2.0
quartiles_not: [ 'exclusive', 'Q2', 5.0 ] # x != 5.0
quartiles: [ 'inclusive', 'Q3', 7.0 ] # x == 7.0
quartiles_less: [ 'exclusive', '100%', 8.0 ] # x < 8.0
quartiles_max: [ 'inclusive', 'IQR', 9.0 ] # x <= 9.0

# Midhinge. The average of the first and third quartiles and is thus a measure of location.
# Equivalently, it is the 25% trimmed mid-range or 25% midsummary; it is an L-estimator.
# See: https://en.wikipedia.org/wiki/Midhinge
# Midhinge = (first quartile, third quartile) / 2
midhinge_min: 1.0 # x >= 1.0
midhinge_greater: 2.0 # x > 2.0
midhinge_not: 5.0 # x != 5.0
midhinge: 7.0 # x == 7.0
midhinge_less: 8.0 # x < 8.0
midhinge_max: 9.0 # x <= 9.0

# MAD - mean absolute deviation. The average of the absolute deviations from a central point.
# It is a summary statistic of statistical dispersion or variability.
# See: https://en.wikipedia.org/wiki/Average_absolute_deviation
Expand Down
14 changes: 14 additions & 0 deletions schema-examples/full.json
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,20 @@
"percentile_less" : [95, 8],
"percentile_max" : [95, 9],

"quartiles_min" : ["exclusive", "0%", 1],
"quartiles_greater" : ["inclusive", "Q1", 2],
"quartiles_not" : ["exclusive", "Q2", 5],
"quartiles" : ["inclusive", "Q3", 7],
"quartiles_less" : ["exclusive", "100%", 8],
"quartiles_max" : ["inclusive", "IQR", 9],

"midhinge_min" : 1,
"midhinge_greater" : 2,
"midhinge_not" : 5,
"midhinge" : 7,
"midhinge_less" : 8,
"midhinge_max" : 9,

"mean_abs_dev_min" : 1,
"mean_abs_dev_greater" : 2,
"mean_abs_dev_not" : 5,
Expand Down
14 changes: 14 additions & 0 deletions schema-examples/full.php
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,20 @@
'percentile_less' => [95, 8.0],
'percentile_max' => [95, 9.0],

'quartiles_min' => ['exclusive', '0%', 1.0],
'quartiles_greater' => ['inclusive', 'Q1', 2.0],
'quartiles_not' => ['exclusive', 'Q2', 5.0],
'quartiles' => ['inclusive', 'Q3', 7.0],
'quartiles_less' => ['exclusive', '100%', 8.0],
'quartiles_max' => ['inclusive', 'IQR', 9.0],

'midhinge_min' => 1.0,
'midhinge_greater' => 2.0,
'midhinge_not' => 5.0,
'midhinge' => 7.0,
'midhinge_less' => 8.0,
'midhinge_max' => 9.0,

'mean_abs_dev_min' => 1.0,
'mean_abs_dev_greater' => 2.0,
'mean_abs_dev_not' => 5.0,
Expand Down
23 changes: 23 additions & 0 deletions schema-examples/full.yml
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,29 @@ columns:
percentile_less: [ 95, 8.0 ] # x < 8.0
percentile_max: [ 95, 9.0 ] # x <= 9.0

# Quartiles. Three points that divide the data set into four equal groups, each group comprising a quarter of the data.
# See: https://en.wikipedia.org/wiki/Quartile
# There are multiple methods for computing quartiles: "exclusive", "inclusive". Exclusive is ussually classic.
# Available types: "0%", "Q1", "Q2", "Q3", "100%", "IQR" (aka Interquartile Range)
# Example: `[ inclusive, 'Q3', 42.0 ]` - the Q3 inclusive quartile is 50.0
quartiles_min: [ 'exclusive', '0%', 1.0 ] # x >= 1.0
quartiles_greater: [ 'inclusive', 'Q1', 2.0 ] # x > 2.0
quartiles_not: [ 'exclusive', 'Q2', 5.0 ] # x != 5.0
quartiles: [ 'inclusive', 'Q3', 7.0 ] # x == 7.0
quartiles_less: [ 'exclusive', '100%', 8.0 ] # x < 8.0
quartiles_max: [ 'inclusive', 'IQR', 9.0 ] # x <= 9.0

# Midhinge. The average of the first and third quartiles and is thus a measure of location.
# Equivalently, it is the 25% trimmed mid-range or 25% midsummary; it is an L-estimator.
# See: https://en.wikipedia.org/wiki/Midhinge
# Midhinge = (first quartile, third quartile) / 2
midhinge_min: 1.0 # x >= 1.0
midhinge_greater: 2.0 # x > 2.0
midhinge_not: 5.0 # x != 5.0
midhinge: 7.0 # x == 7.0
midhinge_less: 8.0 # x < 8.0
midhinge_max: 9.0 # x <= 9.0

# MAD - mean absolute deviation. The average of the absolute deviations from a central point.
# It is a summary statistic of statistical dispersion or variability.
# See: https://en.wikipedia.org/wiki/Average_absolute_deviation
Expand Down
32 changes: 32 additions & 0 deletions schema-examples/full_clean.yml
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,38 @@ columns:
- 95
- 9.0

quartiles_min:
- exclusive
- 0%
- 1.0
quartiles_greater:
- inclusive
- Q1
- 2.0
quartiles_not:
- exclusive
- Q2
- 5.0
quartiles:
- inclusive
- Q3
- 7.0
quartiles_less:
- exclusive
- 100%
- 8.0
quartiles_max:
- inclusive
- IQR
- 9.0

midhinge_min: 1.0
midhinge_greater: 2.0
midhinge_not: 5.0
midhinge: 7.0
midhinge_less: 8.0
midhinge_max: 9.0

mean_abs_dev_min: 1.0
mean_abs_dev_greater: 2.0
mean_abs_dev_not: 5.0
Expand Down
49 changes: 49 additions & 0 deletions src/Rules/Aggregate/ComboMidhinge.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?php

/**
* JBZoo Toolbox - Csv-Blueprint.
*
* This file is part of the JBZoo Toolbox project.
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*
* @license MIT
* @copyright Copyright (C) JBZoo.com, All rights reserved.
* @see https://github.com/JBZoo/Csv-Blueprint
*/

declare(strict_types=1);

namespace JBZoo\CsvBlueprint\Rules\Aggregate;

use JBZoo\CsvBlueprint\Rules\AbstarctRule;
use MathPHP\Statistics\Descriptive;

final class ComboMidhinge extends AbstarctAggregateRuleCombo
{
public const INPUT_TYPE = AbstarctRule::INPUT_TYPE_FLOATS;

protected const NAME = 'midhinge';

public function getHelpMeta(): array
{
return [
[
'Midhinge. The average of the first and third quartiles and is thus a measure of location.',
'Equivalently, it is the 25% trimmed mid-range or 25% midsummary; it is an L-estimator.',
'See: https://en.wikipedia.org/wiki/Midhinge',
'Midhinge = (first quartile, third quartile) / 2',
],
[],
];
}

protected function getActualAggregate(array $colValues): ?float
{
if (\count($colValues) === 0) {
return null;
}

return Descriptive::midhinge(self::stringsToFloat($colValues));
}
}
124 changes: 124 additions & 0 deletions src/Rules/Aggregate/ComboQuartiles.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
<?php

/**
* JBZoo Toolbox - Csv-Blueprint.
*
* This file is part of the JBZoo Toolbox project.
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*
* @license MIT
* @copyright Copyright (C) JBZoo.com, All rights reserved.
* @see https://github.com/JBZoo/Csv-Blueprint
*/

declare(strict_types=1);

namespace JBZoo\CsvBlueprint\Rules\Aggregate;

use JBZoo\CsvBlueprint\Rules\AbstarctRule;
use MathPHP\Statistics\Descriptive;

use function JBZoo\Utils\float;

final class ComboQuartiles extends AbstarctAggregateRuleCombo
{
public const INPUT_TYPE = AbstarctRule::INPUT_TYPE_FLOATS;

protected const NAME = 'quartile';

private const TYPES = ['0%', 'Q1', 'Q2', 'Q3', '100%', 'IQR'];
private const METHODS = ['exclusive', 'inclusive'];

private const ARGS = 3;
private const METHOD = 0;
private const TYPE = 1;
private const VAL = 2;

public function getHelpMeta(): array
{
return [
[
'Quartiles. Three points that divide the data set into four equal groups, ' .
'each group comprising a quarter of the data.',
'See: https://en.wikipedia.org/wiki/Quartile',
// Options
'There are multiple methods for computing quartiles: "' . \implode('", "', self::METHODS) . '". ' .
'Exclusive is ussually classic.',
'Available types: "' . \implode('", "', self::TYPES) . '" (aka Interquartile Range)',
// Example
'Example: `[ ' . self::METHODS[1] . ", '" . self::TYPES[3] . "', 42.0 ]`" .
' - the ' . self::TYPES[3] . ' ' . self::METHODS[1] . ' quartile is 50.0',
],
[
self::MIN => ["[ '" . self::METHODS[0] . "', '" . self::TYPES[0] . "', 1.0 ]", 'x >= 1.0'],
self::GREATER => ["[ '" . self::METHODS[1] . "', '" . self::TYPES[1] . "', 2.0 ]", 'x > 2.0'],
self::NOT => ["[ '" . self::METHODS[0] . "', '" . self::TYPES[2] . "', 5.0 ]", 'x != 5.0'],
self::EQ => ["[ '" . self::METHODS[1] . "', '" . self::TYPES[3] . "', 7.0 ]", 'x == 7.0'],
self::LESS => ["[ '" . self::METHODS[0] . "', '" . self::TYPES[4] . "', 8.0 ]", 'x < 8.0'],
self::MAX => ["[ '" . self::METHODS[1] . "', '" . self::TYPES[5] . "', 9.0 ]", 'x <= 9.0'],
],
];
}

protected function getExpected(): float
{
return float($this->getParams()[self::VAL]);
}

protected function getActualAggregate(array $colValues): ?float
{
if (\count($colValues) === 0) {
return null;
}

$method = $this->getMethod();
$type = $this->getType();
$result = Descriptive::quartiles(self::stringsToFloat($colValues), $method);

return $result[$type];
}

private function getType(): string
{
$allowedTypes = ['0%', 'Q1', 'Q2', 'Q3', '100%', 'IQR'];

$type = $this->getParams()[self::TYPE];

if (!\in_array($type, $allowedTypes, true)) {
throw new \RuntimeException(
"Unknown quartile type: \"{$type}\". Allowed: \"" . \implode('", "', $allowedTypes) . '"',
);
}

return $type;
}

private function getMethod(): string
{
$allowedMethods = ['exclusive', 'inclusive'];

$method = $this->getParams()[self::METHOD];

if (!\in_array($method, $allowedMethods, true)) {
throw new \RuntimeException(
"Unknown quartile method: \"{$method}\". Allowed: \"" . \implode('", "', $allowedMethods) . '"',
);
}

return $method;
}

private function getParams(): array
{
$params = $this->getOptionAsArray();
if (\count($params) !== self::ARGS) {
throw new \RuntimeException(
'The rule expects exactly three params: ' .
'method (exclusive, inclusive), type (0%, Q1, Q2, Q3, 100%, IQR), expected value (float)',
);
}

return $params;
}
}
7 changes: 6 additions & 1 deletion src/Rules/DocBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,18 @@ private static function getYmlRuleCodeClean(string $origRuleCode): string
private static function renderLine(string $ruleCode, array $row, string $mode): string
{
$leftPad = \str_repeat(' ', self::HELP_LEFT_PAD);
$descPad = self::HELP_DESC_PAD;

$baseKeyVal = $mode === ''
? "{$leftPad}{$ruleCode}: {$row[0]}"
: "{$leftPad}{$ruleCode}_{$mode}: {$row[0]}";

if (\strlen($baseKeyVal) > $descPad) {
$descPad = 60;
}

if (isset($row[1]) && $row[1] !== '') {
return \str_pad($baseKeyVal, self::HELP_DESC_PAD, ' ', \STR_PAD_RIGHT) . "# {$row[1]}";
return \str_pad($baseKeyVal, $descPad, ' ', \STR_PAD_RIGHT) . "# {$row[1]}";
}

return $baseKeyVal;
Expand Down
2 changes: 1 addition & 1 deletion tests/ReadmeTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public function testBadgeOfRules(): void

$todoYml = yml(Tools::SCHEMA_TODO);
$planToAdd = \count($todoYml->findArray('columns.0.rules')) +
(\count($todoYml->findArray('columns.0.aggregate_rules')) * 4)
(\count($todoYml->findArray('columns.0.aggregate_rules')) * 6)
+ \count([
'required',
'null_values',
Expand Down
41 changes: 41 additions & 0 deletions tests/Rules/Aggregate/ComboMidhingeTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<?php

/**
* JBZoo Toolbox - Csv-Blueprint.
*
* This file is part of the JBZoo Toolbox project.
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*
* @license MIT
* @copyright Copyright (C) JBZoo.com, All rights reserved.
* @see https://github.com/JBZoo/Csv-Blueprint
*/

declare(strict_types=1);

namespace JBZoo\PHPUnit\Rules\Aggregate;

use JBZoo\CsvBlueprint\Rules\AbstarctRule as Combo;
use JBZoo\CsvBlueprint\Rules\Aggregate\ComboMidhinge;
use JBZoo\PHPUnit\Rules\TestAbstractAggregateRuleCombo;

use function JBZoo\PHPUnit\isSame;

class ComboMidhingeTest extends TestAbstractAggregateRuleCombo
{
protected string $ruleClass = ComboMidhinge::class;

public function testEqual(): void
{
$rule = $this->create(18, Combo::EQ);
isSame('', $rule->test([]));
isSame('', $rule->test(\range(1, 35)));

$rule = $this->create(3, Combo::EQ);
isSame(
'The midhinge in the column is "18", which is not equal than the expected "3"',
$rule->test(\range(1, 35)),
);
}
}

0 comments on commit a56119d

Please sign in to comment.