Skip to content

Commit

Permalink
Average calculation function based on MathPHP library (#36)
Browse files Browse the repository at this point in the history
  • Loading branch information
SmetDenis committed Mar 17, 2024
1 parent 41ae59a commit 195c3b4
Show file tree
Hide file tree
Showing 11 changed files with 359 additions and 112 deletions.
6 changes: 3 additions & 3 deletions .phan.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@
'directory_list' => [
'src',

'vendor/jbzoo/data/src',
'vendor/jbzoo/ci-report-converter/src',
'vendor/jbzoo/cli/src',
'vendor/jbzoo/data/src',
'vendor/jbzoo/utils/src',
'vendor/jbzoo/ci-report-converter/src',
'vendor/league/csv/src',
'vendor/fakerphp/faker/src',
'vendor/markrogoyski/math-php/src',
'vendor/symfony/console',
'vendor/symfony/finder',
],
Expand Down
41 changes: 23 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,26 +105,26 @@ columns:
- name: "Column Name (header)" # Any custom name of the column in the CSV file (first row). Required if "csv_structure.header" is true.
description: "Lorem ipsum" # Optional. Description of the column. Not used in the validation process.

# Important notes about the validation rules.
# 1. All rules except "not_empty" ignored for empty strings (length 0).
# If the value must be non-empty, use "not_empty" as extra rule!
# 2. All rules don't depend on each other. They are independent.
# They know nothing about each other and cannot influence each other.
# 3. You can use the rules in any combination. Or not use any of them.
# They are grouped below simply for ease of navigation and reading.
# 4. If you see the value for the rule is "true" - that's just an enable flag.
# In other cases, these are rule parameters.
# 5. The order of rules execution is the same as in the scheme. But it doesn't matter.
# The result will be the same in any order.
# 6. Most of the rules are case-sensitive. Unless otherwise specified.
# 7. As backup plan, you always can use the "regex" rule.

####################################################################################################################
# Data validation for each(!) value in the column.
# Of course, this can greatly affect the speed of checking.
# It depends on the number of checks and CSV file size.
# TODO: There are several ways to optimize this process, but the author needs time to test it carefully.
rules:
# Important notes:
# 1. All rules except "not_empty" ignored for empty strings (length 0).
# If the value must be non-empty, use "not_empty" as extra rule!
# 2. All rules don't depend on each other. They are independent.
# They know nothing about each other and cannot influence each other.
# 3. You can use the rules in any combination. Or not use any of them.
# They are grouped below simply for ease of navigation and reading.
# 4. If you see the value for the rule is "true" - that's just an enable flag.
# In other cases, these are rule parameters.
# 5. The order of rules execution is the same as in the scheme. But it doesn't matter.
# The result will be the same in any order.
# 6. Most of the rules are case-sensitive. Unless otherwise specified.
# 7. As backup plan, you always can use the "regex" rule.

# General rules
not_empty: true # Value is not an empty string. Actually checks if the string length is not 0.
exact_value: Some string # Exact value for string in the column.
Expand Down Expand Up @@ -170,6 +170,8 @@ columns:
num_not: 4
num_min: 1
num_max: 10
is_int: true # Check format only. Can be negative and positive. Without any separators.
is_float: true # Check format only. Can be negative and positive. Dot as decimal separator.

# Number of digits after the decimal point (with zeros).
precision: 5
Expand All @@ -192,8 +194,6 @@ columns:

# Specific formats
is_bool: true # Allow only boolean values "true" and "false", case-insensitive.
is_int: true # Check format only. Can be negative and positive. Without any separators.
is_float: true # Check format only. Can be negative and positive. Dot as decimal separator.
is_ip4: true # Only IPv4. Example: "127.0.0.1".
is_url: true # Only URL format. Example: "https://example.com/page?query=string#anchor".
is_email: true # Only email format. Example: "user@example.com".
Expand All @@ -217,13 +217,18 @@ columns:
aggregate_rules:
is_unique: true # All values in the column are unique.

# Assumes that all values in the column are int/float only.
# An empty string is converted to null.
# Sum of the numbers in the column. Example: [1, 2, 3] => 6.
sum: 5
sum_not: 4
sum_min: 1
sum_max: 10

# Regular the arithmetic mean. The sum of the numbers divided by the count.
average: 5
average_not: 4
average_min: 1
average_max: 10

- name: "another_column"

- name: "third_column"
Expand Down
8 changes: 6 additions & 2 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,18 @@
"php" : "^8.1",
"ext-mbstring" : "*",

"league/csv" : "^9.15",

"jbzoo/data" : "^7.1",
"jbzoo/cli" : "^7.1",
"jbzoo/utils" : "^7.1",
"jbzoo/ci-report-converter" : "^7.2",
"league/csv" : "^9.15",

"symfony/yaml" : ">=6.4",
"symfony/filesystem" : ">=6.4",
"symfony/finder" : ">=6.4"
"symfony/finder" : ">=6.4",

"markrogoyski/math-php" : "^2.9"
},

"require-dev" : {
Expand Down
89 changes: 82 additions & 7 deletions composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 26 additions & 7 deletions schema-examples/full.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,62 +17,81 @@
"exact_value" : "Some string",
"allow_values" : ["y", "n", ""],
"regex" : "\/^[\\d]{2}$\/",

"length" : 5,
"length_not" : 4,
"length_min" : 1,
"length_max" : 10,

"is_trimmed" : true,
"is_lowercase" : true,
"is_uppercase" : true,
"is_capitalize" : true,

"word_count" : 5,
"word_count_not" : 4,
"word_count_min" : 1,
"word_count_max" : 10,

"contains" : "Hello",
"contains_one" : ["a", "b"],
"contains_all" : ["a", "b", "c"],
"starts_with" : "prefix ",
"ends_with" : " suffix",

"num" : 5,
"num_not" : 4,
"num_min" : 1,
"num_max" : 10,
"is_int" : true,
"is_float" : true,

"precision" : 5,
"precision_not" : 4,
"precision_min" : 1,
"precision_max" : 10,

"date" : "01 Jan 2000",
"date_not" : "2006-01-02 15:04:05 -0700 Europe\/Rome",
"date_min" : "+1 day",
"date_max" : "now",
"date_format" : "Y-m-d",
"is_date" : true,

"is_bool" : true,
"is_int" : true,
"is_float" : true,
"is_ip4" : true,
"is_url" : true,
"is_email" : true,
"is_domain" : true,
"is_uuid" : true,
"is_alias" : true,

"is_latitude" : true,
"is_longitude" : true,
"is_geohash" : true,
"is_cardinal_direction" : true,
"is_usa_market_name" : true
},

"aggregate_rules" : {
"is_unique" : true,
"sum" : 5,
"sum_not" : 4,
"sum_min" : 1,
"sum_max" : 10
"is_unique" : true,

"sum" : 5,
"sum_not" : 4,
"sum_min" : 1,
"sum_max" : 10,

"average" : 5,
"average_not" : 4,
"average_min" : 1,
"average_max" : 10
}
},

{"name" : "another_column"},

{"name" : "third_column"},

{"description" : "Column with description only. Undefined header name."}
]
}

0 comments on commit 195c3b4

Please sign in to comment.