In [None]:
import numpy as np

def standardize_data(X):
  # Calculate the mean and standard deviation for each column (axis=0)
  mean_of_cols = X.mean(axis=0)
  std_of_cols = X.std(axis=0)

  # Handle columns with zero standard deviation to avoid division by zero
  std_of_cols[std_of_cols == 0] = 1

  # Perform the vectorized operation to standardize the data
  X_scaled = (X - mean_of_cols) / std_of_cols
  return X_scaled

# --- Example Usage ---
# 1. Create a sample dataset (matrix) with different scales
X = np.array([
    [10, 200, 3],
    [20, 150, 6],
    [30, 250, 9],
    [40, 100, 12]
])

print("Original Dataset (X):")
print(X)
print("-" * 30)

# 2. Standardize the data using the function
X_standardized = standardize_data(X)

print("Standardized Dataset (X_standardized):")
print(X_standardized)
print("-" * 30)

# 3. Verify the result by calculating the new mean and standard deviation
print("Mean of each column in the standardized data:")
print(X_standardized.mean(axis=0))  # Should be close to 0
print("\nStandard deviation of each column in the standardized data:")
print(X_standardized.std(axis=0))   # Should be close to 1


Original Dataset (X):
[[ 10 200   3]
 [ 20 150   6]
 [ 30 250   9]
 [ 40 100  12]]
------------------------------
Standardized Dataset (X_standardized):
[[-1.34164079  0.4472136  -1.34164079]
 [-0.4472136  -0.4472136  -0.4472136 ]
 [ 0.4472136   1.34164079  0.4472136 ]
 [ 1.34164079 -1.34164079  1.34164079]]
------------------------------
Mean of each column in the standardized data:
[0. 0. 0.]

Standard deviation of each column in the standardized data:
[1. 1. 1.]
