In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score, recall_score, f1_score
from tabulate import tabulate

# ==============================================================================
# JAWABAN NO. 1: LOAD DATA & EKSPLORASI
# ==============================================================================
print("\n" + "="*40)
print("1. DATA LOADING & EXPLORATION")
print("="*40)

# Load dataset
wine = datasets.load_wine()
X = wine.data
y = wine.target

# Konversi ke DataFrame
df = pd.DataFrame(X, columns=wine.feature_names)
df['target'] = y

# A. Menampilkan Jumlah Data & Fitur
print(f"Jumlah Data (Samples) : {X.shape[0]}")
print(f"Jumlah Fitur (Columns): {X.shape[1]}")

# B. Menampilkan Nama Fitur
fitur_df = pd.DataFrame(wine.feature_names, columns=['Nama Fitur'])
print(tabulate(fitur_df, headers='keys', tablefmt='fancy_grid', showindex=True))

# C. Distribusi Kelas Target
target_counts = df['target'].value_counts().sort_index().reset_index()
target_counts.columns = ['Kelas (Target)', 'Jumlah']
target_counts['Nama Kelas'] = [wine.target_names[i] for i in target_counts['Kelas (Target)']]

print(f"\nDistribusi Kelas Target:")
# Menampilkan tabel distribusi
print(tabulate(target_counts, headers='keys', tablefmt='fancy_grid', showindex=False))

# D. Cek Range Data (Tabel Rapih)
print("\nContoh Perbedaan Skala Data (Statistik):")
stats = df[['alcohol', 'magnesium', 'proline']].describe().loc[['min', 'max', 'mean']]

# Menampilkan tabel statistik dengan format grid yang cantik
print(tabulate(stats, headers='keys', tablefmt='fancy_grid', floatfmt=".2f"))


1. DATA LOADING & EXPLORATION
Jumlah Data (Samples) : 178
Jumlah Fitur (Columns): 13
╒════╤══════════════════════════════╕
│    │ Nama Fitur                   │
╞════╪══════════════════════════════╡
│  0 │ alcohol                      │
├────┼──────────────────────────────┤
│  1 │ malic_acid                   │
├────┼──────────────────────────────┤
│  2 │ ash                          │
├────┼──────────────────────────────┤
│  3 │ alcalinity_of_ash            │
├────┼──────────────────────────────┤
│  4 │ magnesium                    │
├────┼──────────────────────────────┤
│  5 │ total_phenols                │
├────┼──────────────────────────────┤
│  6 │ flavanoids                   │
├────┼──────────────────────────────┤
│  7 │ nonflavanoid_phenols         │
├────┼──────────────────────────────┤
│  8 │ proanthocyanins              │
├────┼──────────────────────────────┤
│  9 │ color_intensity              │
├────┼──────────────────────────────┤
│ 10 │ hue                          │
├─

In [7]:
# ==============================================================================
# JAWABAN NO. 2: PREPROCESSING (SPLIT & SCALING)
# ==============================================================================
print("\n" + "="*40)
print("2. SPLITTING & SCALING")
print("="*40)

# A. Split Data (70% Train, 30% Test)
# stratify=y penting agar proporsi kelas di training dan testing tetap sama
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

print(f"Jumlah Data Training: {X_train.shape[0]}")
print(f"Jumlah Data Testing : {X_test.shape[0]}")

# B. Feature Scaling (StandardScaler)
scaler = StandardScaler()

# Fit hanya pada training data untuk menghindari data leakage
X_train_scaled = scaler.fit_transform(X_train)
# Transform data testing menggunakan parameter (mean/std) dari data training
X_test_scaled = scaler.transform(X_test)

print("\nStatus Scaling: Selesai.")
print(f"Mean data training setelah scaling: {np.mean(X_train_scaled):.2f} (Mendekati 0)")
print(f"Std data training setelah scaling : {np.std(X_train_scaled):.2f} (Mendekati 1)")


2. SPLITTING & SCALING
Jumlah Data Training: 124
Jumlah Data Testing : 54

Status Scaling: Selesai.
Mean data training setelah scaling: 0.00 (Mendekati 0)
Std data training setelah scaling : 1.00 (Mendekati 1)


In [8]:
# ==============================================================================
# JAWABAN NO. 3: MODELING (KNN), EVALUASI & INPUT MANUAL
# ==============================================================================
print("\n" + "="*40)
print("3. IMPLEMENTASI KNN & INPUT MANUAL")
print("="*40)

# A. Training Model
k = 5
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train_scaled, y_train)
print(f"Model KNN (k={k}) berhasil dilatih.")

# B. Evaluasi Model
y_pred = knn.predict(X_test_scaled)

acc = accuracy_score(y_test, y_pred)
# average='weighted' digunakan karena dataset ini multiclass (3 kelas)
rec = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("\n--- Metrik Evaluasi ---")
print(f"Accuracy : {acc:.4f}")
print(f"Recall   : {rec:.4f}")
print(f"F1-Score : {f1:.4f}")
print("\nClassification Report Lengkap:")
print(classification_report(y_test, y_pred, target_names=wine.target_names))

# C. Input Manual Interaktif
print("\n--- Simulasi Prediksi Manual ---")
print("Masukkan nilai fitur satu per satu:")

input_data = []
for feature in wine.feature_names:
    while True:
        try:
            # Meminta input user
            val = float(input(f"Masukkan nilai '{feature}': "))
            input_data.append(val)
            break
        except ValueError:
            print("  [Error] Harap masukkan angka yang valid.")

# Konversi ke array 2D
input_data_array = np.array(input_data).reshape(1, -1)

# PENTING: Lakukan scaling pada input manual menggunakan scaler yang sudah dilatih
input_data_scaled = scaler.transform(input_data_array)

# Prediksi
hasil_prediksi = knn.predict(input_data_scaled)[0]
nama_kelas = wine.target_names[hasil_prediksi]

print("\n" + "-"*30)
print(f"Hasil Prediksi: {nama_kelas.upper()} (Class {hasil_prediksi})")
print("-"*30)


3. IMPLEMENTASI KNN & INPUT MANUAL
Model KNN (k=5) berhasil dilatih.

--- Metrik Evaluasi ---
Accuracy : 0.9444
Recall   : 0.9444
F1-Score : 0.9448

Classification Report Lengkap:
              precision    recall  f1-score   support

     class_0       1.00      1.00      1.00        18
     class_1       1.00      0.86      0.92        21
     class_2       0.83      1.00      0.91        15

    accuracy                           0.94        54
   macro avg       0.94      0.95      0.94        54
weighted avg       0.95      0.94      0.94        54


--- Simulasi Prediksi Manual ---
Masukkan nilai fitur satu per satu:
Masukkan nilai 'alcohol': 13.5
Masukkan nilai 'malic_acid': 1.8
Masukkan nilai 'ash': 2.4
Masukkan nilai 'alcalinity_of_ash': 16.0
Masukkan nilai 'magnesium': 120
Masukkan nilai 'total_phenols': 2.8
Masukkan nilai 'flavanoids': 3.0
Masukkan nilai 'nonflavanoid_phenols': 0.3
Masukkan nilai 'proanthocyanins': 2.0
Masukkan nilai 'color_intensity': 5.5
Masukkan nilai 'h