Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions data_structures/z_score_normalization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
Z-Score Normalization: Standardizes data by converting each value to the number
of standard deviations it is from the mean. The result has a mean of 0 and a
standard deviation of 1.

Formula: z = (x - mean) / standard_deviation

Z-score normalization is widely used in machine learning preprocessing,
statistics, and data analysis to bring features to the same scale.

Reference: https://en.wikipedia.org/wiki/Standard_score
"""


def z_score_normalization(data: list[float]) -> list[float]:
"""
Normalize a list of numbers using Z-score normalization.

Parameters
----------
data: list[float], the input list of numbers

Returns
-------
list[float]: list of z-scores for each element

>>> z_score_normalization([2, 4, 4, 4, 5, 5, 7, 9])
[-1.5, -0.5, -0.5, -0.5, 0.0, 0.0, 1.0, 2.0]
>>> z_score_normalization([1, 1, 1, 1])
Traceback (most recent call last):
...
ValueError: standard deviation is zero — all elements are identical
>>> z_score_normalization([])
Traceback (most recent call last):
...
ValueError: data cannot be empty
>>> z_score_normalization([10])
Traceback (most recent call last):
...
ValueError: data must contain at least two elements
>>> z_score_normalization([0, 0, 1, 1])
[-1.0, -1.0, 1.0, 1.0]
>>> z_score_normalization([-5, 0, 5])
[-1.2247448714, 0.0, 1.2247448714]
"""
if not data:
raise ValueError("data cannot be empty")
if len(data) < 2:
raise ValueError("data must contain at least two elements")

mean = sum(data) / len(data)
variance = sum((x - mean) ** 2 for x in data) / len(data)
std_dev = variance**0.5

if std_dev == 0:
raise ValueError("standard deviation is zero — all elements are identical")

return [round((x - mean) / std_dev, 10) for x in data]


if __name__ == "__main__":
import doctest

doctest.testmod()

data = [2, 4, 4, 4, 5, 5, 7, 9]
print(f"Original data: {data}")
print(f"Z-score normalized: {z_score_normalization(data)}")
Loading