Skip to content

Commit

Permalink
Add Hash validation rule for cell values (#104)
Browse files Browse the repository at this point in the history
This commit introduces the Hash class to the Csv-Blueprint project's
rules for cell validation. The new Hash rule validates whether a cell
value correctly represents a hash by checking against a variety of
supported algorithms. The addition of this rule allows the project to
provide a more comprehensive and diverse range of data validation
options.
  • Loading branch information
SmetDenis committed Mar 27, 2024
1 parent 246f2c5 commit 24c28a9
Show file tree
Hide file tree
Showing 9 changed files with 226 additions and 83 deletions.
1 change: 0 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,6 @@ jobs:
context: .
push: true
tags: jbzoo/csv-blueprint:master
platforms: linux/amd64,linux/arm64/v8,linux/386


verify-ga:
Expand Down
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[![GitHub Release](https://img.shields.io/github/v/release/jbzoo/csv-blueprint?label=Latest)](https://github.com/jbzoo/csv-blueprint/releases) [![Total Downloads](https://poser.pugx.org/jbzoo/csv-blueprint/downloads)](https://packagist.org/packages/jbzoo/csv-blueprint/stats) [![Docker Pulls](https://img.shields.io/docker/pulls/jbzoo/csv-blueprint.svg)](https://hub.docker.com/r/jbzoo/csv-blueprint/tags) [![Docker Image Size](https://img.shields.io/docker/image-size/jbzoo/csv-blueprint)](https://hub.docker.com/r/jbzoo/csv-blueprint/tags)

<!-- rules-counter -->
[![Static Badge](https://img.shields.io/badge/Rules-304-green?label=Total%20number%20of%20rules&labelColor=darkgreen&color=gray)](schema-examples/full.yml) [![Static Badge](https://img.shields.io/badge/Rules-93-green?label=Cell%20rules&labelColor=blue&color=gray)](src/Rules/Cell) [![Static Badge](https://img.shields.io/badge/Rules-206-green?label=Aggregate%20rules&labelColor=blue&color=gray)](src/Rules/Aggregate) [![Static Badge](https://img.shields.io/badge/Rules-5-green?label=Extra%20checks&labelColor=blue&color=gray)](#extra-checks) [![Static Badge](https://img.shields.io/badge/Rules-119/54/8-green?label=Plan%20to%20add&labelColor=gray&color=gray)](tests/schemas/todo.yml)
[![Static Badge](https://img.shields.io/badge/Rules-305-green?label=Total%20number%20of%20rules&labelColor=darkgreen&color=gray)](schema-examples/full.yml) [![Static Badge](https://img.shields.io/badge/Rules-94-green?label=Cell%20rules&labelColor=blue&color=gray)](src/Rules/Cell) [![Static Badge](https://img.shields.io/badge/Rules-206-green?label=Aggregate%20rules&labelColor=blue&color=gray)](src/Rules/Aggregate) [![Static Badge](https://img.shields.io/badge/Rules-5-green?label=Extra%20checks&labelColor=blue&color=gray)](#extra-checks) [![Static Badge](https://img.shields.io/badge/Rules-42/54/8-green?label=Plan%20to%20add&labelColor=gray&color=gray)](tests/schemas/todo.yml)
<!-- /rules-counter -->

## Introduction
Expand Down Expand Up @@ -304,6 +304,15 @@ columns:
is_alnum: true # Validates whether the input is only alphanumeric. Example: "aBc123".
is_alpha: true # This is similar to `is_alnum`, but it does not allow numbers. Example: "aBc".

# Check if the value is a valid hash. Supported algorithms:
# - md2, md4, md5, sha1, sha224, sha256, sha384, sha512/224, sha512/256, sha512
# - sha3-224, sha3-256, sha3-384, sha3-512, ripemd128, ripemd160, ripemd256, ripemd320, whirlpool, tiger128,3
# - tiger160,3, tiger192,3, tiger128,4, tiger160,4, tiger192,4, snefru, snefru256, gost, gost-crypto, adler32
# - crc32, crc32b, crc32c, fnv132, fnv1a32, fnv164, fnv1a64, joaat, murmur3a, murmur3c
# - murmur3f, xxh32, xxh64, xxh3, xxh128, haval128,3, haval160,3, haval192,3, haval224,3, haval256,3
# - haval128,4, haval160,4, haval192,4, haval224,4, haval256,4, haval128,5, haval160,5, haval192,5, haval224,5, haval256,5
hash: set_algo # Example: "1234567890abcdef".

####################################################################################################################
# Data validation for the entire(!) column using different data aggregation methods.
# Depending on the file size and the chosen aggregation method - this can use a lot of RAM time.
Expand Down
3 changes: 2 additions & 1 deletion schema-examples/full.json
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@
"is_vowel" : true,
"is_consonant" : true,
"is_alnum" : true,
"is_alpha" : true
"is_alpha" : true,
"hash" : "set_algo"
},
"aggregate_rules" : {
"is_unique" : true,
Expand Down
2 changes: 2 additions & 0 deletions schema-examples/full.php
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@
'is_consonant' => true,
'is_alnum' => true,
'is_alpha' => true,

'hash' => 'set_algo',
],

'aggregate_rules' => [
Expand Down
9 changes: 9 additions & 0 deletions schema-examples/full.yml
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,15 @@ columns:
is_alnum: true # Validates whether the input is only alphanumeric. Example: "aBc123".
is_alpha: true # This is similar to `is_alnum`, but it does not allow numbers. Example: "aBc".

# Check if the value is a valid hash. Supported algorithms:
# - md2, md4, md5, sha1, sha224, sha256, sha384, sha512/224, sha512/256, sha512
# - sha3-224, sha3-256, sha3-384, sha3-512, ripemd128, ripemd160, ripemd256, ripemd320, whirlpool, tiger128,3
# - tiger160,3, tiger192,3, tiger128,4, tiger160,4, tiger192,4, snefru, snefru256, gost, gost-crypto, adler32
# - crc32, crc32b, crc32c, fnv132, fnv1a32, fnv164, fnv1a64, joaat, murmur3a, murmur3c
# - murmur3f, xxh32, xxh64, xxh3, xxh128, haval128,3, haval160,3, haval192,3, haval224,3, haval256,3
# - haval128,4, haval160,4, haval192,4, haval224,4, haval256,4, haval128,5, haval160,5, haval192,5, haval224,5, haval256,5
hash: set_algo # Example: "1234567890abcdef".

####################################################################################################################
# Data validation for the entire(!) column using different data aggregation methods.
# Depending on the file size and the chosen aggregation method - this can use a lot of RAM time.
Expand Down
2 changes: 2 additions & 0 deletions schema-examples/full_clean.yml
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ columns:
is_alnum: true
is_alpha: true

hash: set_algo

aggregate_rules:
is_unique: true
is_sorted:
Expand Down
147 changes: 147 additions & 0 deletions src/Rules/Cell/Hash.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
<?php

/**
* JBZoo Toolbox - Csv-Blueprint.
*
* This file is part of the JBZoo Toolbox project.
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*
* @license MIT
* @copyright Copyright (C) JBZoo.com, All rights reserved.
* @see https://github.com/JBZoo/Csv-Blueprint
*/

declare(strict_types=1);

namespace JBZoo\CsvBlueprint\Rules\Cell;

class Hash extends AbstractCellRule
{
public function getHelpMeta(): array
{
return [
self::getHelpTitle(),
[self::DEFAULT => ['set_algo', 'Example: "1234567890abcdef".']],
];
}

public function validateRule(string $cellValue): ?string
{
if ($cellValue === '') {
return null;
}

$regex = self::getRegexList();
$hashAlg = $this->getOptionAsString();

if (!isset($regex[$hashAlg])) {
return "The algorithm \"{$hashAlg}\" is not supported.";
}

if (\preg_match($regex[$hashAlg], $cellValue) === 0) {
return "The value \"<c>{$cellValue}</c>\" is not a valid hash for the " .
"algorithm \"<green>{$hashAlg}</green>\"";
}

return null;
}

private static function getRegex(int $length, string $charset = '[a-f0-9]'): string
{
return "/^{$charset}{{$length}}\$/i";
}

private static function getRegexList(): array
{
return [
'md2' => self::getRegex(32),
'md4' => self::getRegex(32),
'md5' => self::getRegex(32),

'sha1' => self::getRegex(40),
'sha224' => self::getRegex(56),
'sha256' => self::getRegex(64),
'sha384' => self::getRegex(96),
'sha512/224' => self::getRegex(56),
'sha512/256' => self::getRegex(64),
'sha512' => self::getRegex(128),
'sha3-224' => self::getRegex(56),
'sha3-256' => self::getRegex(64),
'sha3-384' => self::getRegex(96),
'sha3-512' => self::getRegex(128),

'ripemd128' => self::getRegex(32),
'ripemd160' => self::getRegex(40),
'ripemd256' => self::getRegex(64),
'ripemd320' => self::getRegex(80),

'whirlpool' => self::getRegex(128),

'tiger128,3' => self::getRegex(32),
'tiger160,3' => self::getRegex(40),
'tiger192,3' => self::getRegex(48),
'tiger128,4' => self::getRegex(32),
'tiger160,4' => self::getRegex(40),
'tiger192,4' => self::getRegex(48),

'snefru' => self::getRegex(64),
'snefru256' => self::getRegex(64),

'gost' => self::getRegex(64),
'gost-crypto' => self::getRegex(64),

'adler32' => self::getRegex(8),

'crc32' => self::getRegex(8),
'crc32b' => self::getRegex(8),
'crc32c' => self::getRegex(8),

'fnv132' => self::getRegex(8),
'fnv1a32' => self::getRegex(8),

'fnv164' => self::getRegex(16),
'fnv1a64' => self::getRegex(16),

'joaat' => self::getRegex(8),

'murmur3a' => self::getRegex(8),
'murmur3c' => self::getRegex(32),
'murmur3f' => self::getRegex(32),

'xxh32' => self::getRegex(8),
'xxh64' => self::getRegex(16),
'xxh3' => self::getRegex(16),
'xxh128' => self::getRegex(32),

'haval128,3' => self::getRegex(32),
'haval160,3' => self::getRegex(40),
'haval192,3' => self::getRegex(48),
'haval224,3' => self::getRegex(56),
'haval256,3' => self::getRegex(64),
'haval128,4' => self::getRegex(32),
'haval160,4' => self::getRegex(40),
'haval192,4' => self::getRegex(48),
'haval224,4' => self::getRegex(56),
'haval256,4' => self::getRegex(64),
'haval128,5' => self::getRegex(32),
'haval160,5' => self::getRegex(40),
'haval192,5' => self::getRegex(48),
'haval224,5' => self::getRegex(56),
'haval256,5' => self::getRegex(64),
];
}

private static function getHelpTitle(): array
{
$maxOnLine = 10;
$lines = \array_chunk(\array_keys(self::getRegexList()), $maxOnLine);

$result = ['Check if the value is a valid hash. Supported algorithms:'];
foreach ($lines as $line) {
$result[] = ' - ' . \implode(', ', $line);
}

return $result;
}
}
54 changes: 54 additions & 0 deletions tests/Rules/Cell/HashTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
<?php

/**
* JBZoo Toolbox - Csv-Blueprint.
*
* This file is part of the JBZoo Toolbox project.
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*
* @license MIT
* @copyright Copyright (C) JBZoo.com, All rights reserved.
* @see https://github.com/JBZoo/Csv-Blueprint
*/

declare(strict_types=1);

namespace JBZoo\PHPUnit\Rules\Cell;

use JBZoo\CsvBlueprint\Rules\Cell\Hash;
use JBZoo\PHPUnit\Rules\TestAbstractCellRule;
use JBZoo\Utils\Str;

use function JBZoo\PHPUnit\isSame;

final class HashTest extends TestAbstractCellRule
{
protected string $ruleClass = Hash::class;

public function testPositive(): void
{
$algos = \hash_algos();
$attempts = 100;

foreach ($algos as $algo) {
$rule = $this->create($algo);
isSame('', $rule->test(''));

foreach (\range(1, $attempts) as $i) {
$hash = \hash($algo, Str::random(32));
$strlen = \strlen($hash);
isSame('', $rule->test($hash), "'{$algo}' => \$this->getRegex({$strlen}),");
}
}
}

public function testNegative(): void
{
$rule = $this->create('qwerty');
isSame(
'The algorithm "qwerty" is not supported.',
$rule->test('qwerty'),
);
}
}
80 changes: 0 additions & 80 deletions tests/schemas/todo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,86 +73,6 @@ columns:
is_iban: true
is_card_number: true

# Hashes
is_md2: true # Example: "c2cb085c24f850986e55f1c44abe6876"
is_md4: true # Example: "2a4bbeffd06c016ab4134cc7963496d2"
is_md5: true # Example: "d8578edf8458ce06fbc5bb76a58c5ca4"
is_sha1: true # Example: "b1b3773a05c0ed0176787a4f1574ff0075f7521e"
is_sha224: true # Example: "5154aaa49392fb275ce7e12a7d3e00901cf9cf3ab10491673f97322f"
is_sha256: true # Example: "65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5"
is_sha384: true # Example: "1ab60e110d41a9aac5e30d086c490819bfe3461b38c76b9602fe9686aa0aa3d28c63c96a1019e3788c40a14f4292e50f"
is_sha512_224: true # Example: "55c4880932da9f6d518ab3d07bc2526c3a358ddbdd4b792c440d2af8"
is_sha512_256: true # Example: "e8b1e383dd63a3fb50ec8e4e582b8a6e5412269eff82ffb892dd00b3cb157daf"
is_sha512: true # Example: "0dd3e512642c97ca3f747f9a76e374fbda73f9292823c0313be9d78add7cdd8f72235af0c553dd26797e78e1854edee0ae002f8aba074b066dfce1af114e32f8"
is_sha3_224: true # Example: "13783bdfa4a63b202d9aa1992eccdd68a9fa5e44539273d8c2b797cd"
is_sha3_256: true # Example: "f171cbb35dd1166a20f99b5ad226553e122f3c0f2fe981915fb9e4517aac9038"
is_sha3_384: true # Example: "6729a614db5c5c97920e15501d361ba2f445758012e181af1c6300a99d9a951553fcc4e14aa614db164f61a758c6d6c9"
is_sha3_512: true # Example: "f6d1015e17df348f2d84b3b603648ae4bd14011f4e5b82f885e45587bcad48947d37d64501dc965c0f201171c44b656ee28ed9a5060aea1f2a336025320683d6"
is_ripemd128: true # Example: "aa0a5264bd784fcec22026886608ff37"
is_ripemd160: true # Example: "3a0ede1791358f307ae1f211d3fc4acf677644d8"
is_ripemd256: true # Example: "2e90f47814527f95933dffc1da9eebc1eba2bbca75095b6a4718d4cf6b2a5b89"
is_ripemd320: true # Example: "bdcd4765bd5050f8bd0fe1c1b9728c5ad14c2121d9d9069ce446e1d3e7e3f4e60e65c62e64d8d643"
is_whirlpool: true # Example: "4925da7da7a56260baf1c37925a8fa24e46ad8b107dcd21f44e39e4751bae1304fc70de7acb847ffa96126bb372de005f5320f1ede6f9df07c7d53f9c160f022"
is_tiger128_3: true # Example: "00722492524aaac42f4119227927f12f"
is_tiger160_3: true # Example: "00722492524aaac42f4119227927f12f51847b37"
is_tiger192_3: true # Example: "00722492524aaac42f4119227927f12f51847b37d400e819"
is_tiger128_4: true # Example: "2a581c1ffc93dba6065d1a037f3acc50"
is_tiger160_4: true # Example: "2a581c1ffc93dba6065d1a037f3acc50915d3621"
is_tiger192_4: true # Example: "2a581c1ffc93dba6065d1a037f3acc50915d3621dbe3d5c6"
is_snefru: true # Example: "68880d55903beb3001eea8222aaedece6d1fa57cd142b1c217eafa817d5dc6a6"
is_snefru256: true # Example: "68880d55903beb3001eea8222aaedece6d1fa57cd142b1c217eafa817d5dc6a6"
is_gost: true # Example: "20930ee319a54c7c97d29f373208134087998e29dff356f9dd3caafcc16a76eb"
is_gost_crypto: true # Example: "649f84af9970f16ba391dc1b3c4d959e44bb5f908fce01a1028a51593f2bd8d9"
is_adler32: true # Example: "094a02ad"
is_crc32: true # Example: "30143f93"
is_crc32b: true # Example: "03498d7d"
is_crc32c: true # Example: "c3ba6452"
is_fnv132: true # Example: "55549221"
is_fnv1a32: true # Example: "1ae54459"
is_fnv164: true # Example: "5f10a08319d448c1"
is_fnv1a64: true # Example: "3eb459c7c3501ff9"
is_joaat: true # Example: "956cea9b"
is_murmur3a: true # Example: "63a94b17"
is_murmur3c: true # Example: "65aeaed61ddeddd529b0f29229b0f292"
is_murmur3f: true # Example: "1ac8c76fab899b563590e4cdc1dffc37"
is_xxh32: true # Example: "a7d1a491"
is_xxh64: true # Example: "8afe1e67d48f917e"
is_xxh3: true # Example: "e26e47d406128241"
is_xxh128: true # Example: "7526ee965137b2687ad5cd960682dc28"
is_haval128_3: true # Example: "a63fc1b65651aad6ccc86880b0e4361b"
is_haval160_3: true # Example: "216bbac7189ccbf07e66367bc881f5e575629794"
is_haval192_3: true # Example: "af8f66b04be701f16febaa06882afe02ad51d4e1ec89d3f5"
is_haval224_3: true # Example: "1443a48e5f5c0f6481fdf3cb46eaf9fbcf8d838438917ca1daae78a4"
is_haval256_3: true # Example: "1fd87cdf95e66a64276f6445f5685dfc460c91e331bf104b6b2bb6bcbb7900f5"
is_haval128_4: true # Example: "2c0eb363ea8e206ccc38672a65ae94b6"
is_haval160_4: true # Example: "23806f6579d791f93ec31fffbf948cefa8ff6591"
is_haval192_4: true # Example: "91df271ea9f0027cfc3c751d00877b341d04606d4bd0f5ea"
is_haval224_4: true # Example: "42e45f9c516cb5e6fbff0b39c42a93d839019a690b8c867e25d380f4"
is_haval256_4: true # Example: "4672dd42d83c762bd7d0b6e3a6d220617d2e0678b8d616c4cfdeded0401a2bc5"
is_haval128_5: true # Example: "a85ba773f7a94bee515b8e8ff05b5985"
is_haval160_5: true # Example: "1becea7d3da8214210931b862535d5d4c32370cc"
is_haval192_5: true # Example: "08a07c4cdf3fb10da40b9cf3a43aa4c1f38f8e00bec3056a"
is_haval224_5: true # Example: "29f42b4f63899fd1b623437db7f5f30e3493190fad2e32fc795f01e5"

is_ntlm: true # example: "3dbde697d71690a769204beb12283678"
is_md6_128: true # example: "650cdaa202ca22b1d8d7697f98267ae5"
is_md6_256: true # example: "4a56c5d61a2bf080e4bb945b0b6cd8a98e8812749dbc104881e35c35e29202d1"
is_md6_512: true # example: "4054d49a4fefce5a1958242fee07a2a25e831347a351aef7e28d1e31922e9fc1f1634523bfcfb35db78292e3b20a3b8f300c6e63c143d88462d4733b520333cc"
is_ripemd_128: true # example: "781f357c35df1fef3138f6d29670365a"
is_ripemd_160: true # example: "e3431a8e0adbf96fd140103dc6f63a3f8fa343ab"
is_ripemd_256: true # example: "8536753ad7bface2dba89fb318c95b1b42890016057d4c3a2f351cec3acbb28b"
is_ripemd_320: true # example: "bfa11b73ad4e6421a8ba5a1223d9c9f58a5ad456be98bee5bfcd19a3ecdc6140ce4c700be860fda9"
is_sha_224: true # example: "78d8045d684abd2eece923758f3cd781489df3a48e1278982466017f"
is_sha_256: true # example: "a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3"
is_sha_384: true # example: "9a0a82f0c0cf31470d7affede3406cc9aa8410671520b727044eda15b4c25532a9b5cd8aaf9cec4919d76255b6bfb00f"
is_sha_512: true # example: "3c9909afec25354d551dae21590bb26e38d53f2173b8d3dc3eee4c047e7ab1c1eb8b85103e3be7ba613b31bb5c9c36214dc9f14a42fd7a2fdb84856bca5c44c2"
is_crc16: true # example: "ba04"
is_hmac: true
is_hmac_md5: true
is_hmac_sha1: true
is_hmac_sha256: true
is_hmac_sha512: true

# URL
url_scheme: https # Can be set of schemes [http, https, ftp]
url_host: example.com # Can be regex
Expand Down

0 comments on commit 24c28a9

Please sign in to comment.