<a href="https://colab.research.google.com/github/Samsul2121/221230021-Pengantar-ML/blob/main/221230021_Pengantar_ML/week_02/latihan_praktikum_2_numpy_ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np

# TODO 1: Z-score Normalization
def z_score_normalization(data):
    """
    Normalisasi data menggunakan Z-score.
    (x - mean) / std
    """
    if data.size == 0:
        raise ValueError("Input array kosong.")

    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    if np.any(std == 0):
        raise ValueError("Std dev = 0, normalisasi tidak bisa dilakukan.")

    return (data - mean) / std


# TODO 2: Handle Outliers
def handle_outliers(data, std_threshold=3):
    """
    Clip nilai di luar ±threshold*std ke boundary terdekat.
    """
    if data.size == 0:
        raise ValueError("Input array kosong.")

    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    lower, upper = mean - std_threshold * std, mean + std_threshold * std
    return np.clip(data, lower, upper)


# TODO 3: One-hot Encoding
def one_hot_encoding(labels):
    """
    Konversi label kategorikal (integer) menjadi one-hot array.
    Contoh: [0,1,2] -> [[1,0,0],[0,1,0],[0,0,1]]
    """
    if labels.size == 0:
        raise ValueError("Labels kosong.")

    n_classes = np.max(labels) + 1
    return np.eye(n_classes)[labels]


# TODO 4: Train-test split manual
def train_test_split_numpy(X, y, test_size=0.2, seed=42):
    """
    Membagi dataset menjadi train dan test set tanpa sklearn.
    """
    if X.shape[0] != y.shape[0]:
        raise ValueError("Jumlah sampel X dan y tidak sama.")

    np.random.seed(seed)
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)

    test_count = int(test_size * X.shape[0])
    test_idx, train_idx = indices[:test_count], indices[test_count:]

    return X[train_idx], X[test_idx], y[train_idx], y[test_idx]


# === Contoh Pemakaian ===
np.random.seed(42)
X = np.random.randn(100, 5) * 10 + 5
labels = np.array([0, 1, 2, 0, 1, 2, 0])

# Z-score normalization
X_normalized = z_score_normalization(X)

# Handle outliers
X_cleaned = handle_outliers(X_normalized)

# One-hot encoding
one_hot_labels = one_hot_encoding(labels)

# Train-test split
y = np.random.randint(0, 3, 100)
X_train, X_test, y_train, y_test = train_test_split_numpy(X, y)

# === Unit Tests ===
assert X_normalized.shape == X.shape
assert np.allclose(X_normalized.mean(), 0, atol=1e-10)
assert np.allclose(X_normalized.std(), 1, atol=1e-10)
assert one_hot_labels.shape == (labels.shape[0], np.max(labels)+1)

print("✅ Semua fungsi berjalan dengan baik!")

print("✅ NumPy operations completed")
print("X_normalized:\n", X_normalized[:5])
print("X_cleaned:\n", X_cleaned[:5])
print("One-hot labels:\n", one_hot_labels)

✅ Semua fungsi berjalan dengan baik!
✅ NumPy operations completed
X_normalized:
 [[ 0.604418   -0.21979528  0.75746006  1.461092   -0.18919425]
 [-0.21141045  1.53420502  0.87977344 -0.62585547  0.53845484]
 [-0.46735006 -0.55422449  0.34303561 -2.13809665 -1.58578836]
 [-0.57771567 -1.1129603   0.41687036 -1.08519209 -1.2929218 ]
 [ 1.68601234 -0.30916828  0.16486193 -1.62640854 -0.47982667]]
X_cleaned:
 [[ 0.604418   -0.21979528  0.75746006  1.461092   -0.18919425]
 [-0.21141045  1.53420502  0.87977344 -0.62585547  0.53845484]
 [-0.46735006 -0.55422449  0.34303561 -2.13809665 -1.58578836]
 [-0.57771567 -1.1129603   0.41687036 -1.08519209 -1.2929218 ]
 [ 1.68601234 -0.30916828  0.16486193 -1.62640854 -0.47982667]]
One-hot labels:
 [[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]]
