<a href="https://colab.research.google.com/github/221230003-coder/221230003-pengantar-ML/blob/main/week-02/latihan_praktikum_2_numpy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

# 🎯 SOLUSI PRAKTIKUM 2: NUMPY - Data Preprocessing

# Dataset simulasi: 100 samples, 5 features
np.random.seed(42)
X = np.random.randn(100, 5) * 10 + 5  # Mean=5, Std=10

# 1. Z-score Normalization
def z_score_normalization(data):
    if not isinstance(data, np.ndarray):
        raise TypeError("Input harus berupa NumPy array")
    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    std[std == 0] = 1
    return (data - mean) / std

X_normalized = z_score_normalization(X)

# 2. Outlier Handling (clipping)
def handle_outliers(data, std_threshold=3):
    if not isinstance(data, np.ndarray):
        raise TypeError("Input harus berupa NumPy array")
    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    lower_bound = mean - std_threshold * std
    upper_bound = mean + std_threshold * std
    return np.clip(data, lower_bound, upper_bound)

X_cleaned = handle_outliers(X_normalized)

# 3. One-hot Encoding
def one_hot_encoding(labels):
    if not isinstance(labels, np.ndarray):
        raise TypeError("Labels harus berupa NumPy array")
    if labels.ndim != 1:
        raise ValueError("Labels harus berupa array 1D")
    n_classes = np.max(labels) + 1
    one_hot = np.zeros((labels.shape[0], n_classes), dtype=int)
    one_hot[np.arange(labels.shape[0]), labels] = 1
    return one_hot

labels = np.array([0, 1, 2, 0, 1, 2, 0])
one_hot_labels = one_hot_encoding(labels)

# 4. Train-Test Split (manual)
def train_test_split_numpy(X, y, test_size=0.2, random_state=None):
    if random_state is not None:
        np.random.seed(random_state)
    n_samples = X.shape[0]
    indices = np.arange(n_samples)
    np.random.shuffle(indices)
    n_test = int(n_samples * test_size)
    test_idx = indices[:n_test]
    train_idx = indices[n_test:]
    return X[train_idx], X[test_idx], y[train_idx], y[test_idx]

y = np.random.randint(0, 3, 100)
X_train, X_test, y_train, y_test = train_test_split_numpy(X, y, test_size=0.2, random_state=42)


# 🔍 Validasi & Output
assert X_normalized.shape == X.shape, "Shape harus sama"
assert np.allclose(X_normalized.mean(axis=0), 0, atol=1e-10), "Mean ~0 setelah normalisasi"
assert np.allclose(X_normalized.std(axis=0), 1, atol=1e-10), "Std ~1 setelah normalisasi"
assert one_hot_labels.shape == (7, 3), "Shape one-hot salah"

print("✅ Semua fungsi berjalan dengan baik!")
print("✅ NumPy operations completed")
print("X_normalized:\n", np.round(X_normalized[:5], 5))  # tampilkan 5 baris pertama
print("X_cleaned:\n", np.round(X_cleaned[:5], 5))        # tampilkan 5 baris pertama
print("One-hot labels:\n", one_hot_labels)


✅ Semua fungsi berjalan dengan baik!
✅ NumPy operations completed
X_normalized:
 [[ 0.60442 -0.2198   0.75746  1.46109 -0.18919]
 [-0.21141  1.53421  0.87977 -0.62586  0.53845]
 [-0.46735 -0.55422  0.34304 -2.1381  -1.58579]
 [-0.57772 -1.11296  0.41687 -1.08519 -1.29292]
 [ 1.68601 -0.30917  0.16486 -1.62641 -0.47983]]
X_cleaned:
 [[ 0.60442 -0.2198   0.75746  1.46109 -0.18919]
 [-0.21141  1.53421  0.87977 -0.62586  0.53845]
 [-0.46735 -0.55422  0.34304 -2.1381  -1.58579]
 [-0.57772 -1.11296  0.41687 -1.08519 -1.29292]
 [ 1.68601 -0.30917  0.16486 -1.62641 -0.47983]]
One-hot labels:
 [[1 0 0]
 [0 1 0]
 [0 0 1]
 [1 0 0]
 [0 1 0]
 [0 0 1]
 [1 0 0]]
