From 027aceb3ad8eedb5b2db9ab1aec16b3ca200301e Mon Sep 17 00:00:00 2001 From: Saurav Date: Thu, 9 Apr 2026 17:21:52 +0545 Subject: [PATCH 1/2] Add Z-score normalization algorithm for data standardization --- data_structures/z_score_normalization.py | 68 ++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 data_structures/z_score_normalization.py diff --git a/data_structures/z_score_normalization.py b/data_structures/z_score_normalization.py new file mode 100644 index 000000000000..34bb6ebbff91 --- /dev/null +++ b/data_structures/z_score_normalization.py @@ -0,0 +1,68 @@ +""" +Z-Score Normalization: Standardizes data by converting each value to the number +of standard deviations it is from the mean. The result has a mean of 0 and a +standard deviation of 1. + +Formula: z = (x - mean) / standard_deviation + +Z-score normalization is widely used in machine learning preprocessing, +statistics, and data analysis to bring features to the same scale. + +Reference: https://en.wikipedia.org/wiki/Standard_score +""" + + +def z_score_normalization(data: list[float]) -> list[float]: + """ + Normalize a list of numbers using Z-score normalization. + + Parameters + ---------- + data: list[float], the input list of numbers + + Returns + ------- + list[float]: list of z-scores for each element + + >>> z_score_normalization([2, 4, 4, 4, 5, 5, 7, 9]) + [-1.5, -0.5, -0.5, -0.5, 0.0, 0.0, 1.0, 2.0] + >>> z_score_normalization([1, 1, 1, 1]) + Traceback (most recent call last): + ... + ValueError: standard deviation is zero — all elements are identical + >>> z_score_normalization([]) + Traceback (most recent call last): + ... + ValueError: data cannot be empty + >>> z_score_normalization([10]) + Traceback (most recent call last): + ... + ValueError: data must contain at least two elements + >>> z_score_normalization([0, 0, 1, 1]) + [-1.0, -1.0, 1.0, 1.0] + >>> z_score_normalization([-5, 0, 5]) + [-1.2247448714, 0.0, 1.2247448714] + """ + if not data: + raise ValueError("data cannot be empty") + if len(data) < 2: + raise ValueError("data must contain at least two elements") + + mean = sum(data) / len(data) + variance = sum((x - mean) ** 2 for x in data) / len(data) + std_dev = variance ** 0.5 + + if std_dev == 0: + raise ValueError("standard deviation is zero — all elements are identical") + + return [round((x - mean) / std_dev, 10) for x in data] + + +if __name__ == "__main__": + import doctest + + doctest.testmod() + + data = [2, 4, 4, 4, 5, 5, 7, 9] + print(f"Original data: {data}") + print(f"Z-score normalized: {z_score_normalization(data)}") From 2df83ecbc1be26cbc8e6ec9d41a68b9e1c78ef32 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 9 Apr 2026 11:37:53 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- data_structures/z_score_normalization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_structures/z_score_normalization.py b/data_structures/z_score_normalization.py index 34bb6ebbff91..37e24a15f538 100644 --- a/data_structures/z_score_normalization.py +++ b/data_structures/z_score_normalization.py @@ -50,7 +50,7 @@ def z_score_normalization(data: list[float]) -> list[float]: mean = sum(data) / len(data) variance = sum((x - mean) ** 2 for x in data) / len(data) - std_dev = variance ** 0.5 + std_dev = variance**0.5 if std_dev == 0: raise ValueError("standard deviation is zero — all elements are identical")