# KNN Klasifikasi (disesuaikan dengan alur Tugas 10)
Notebook ini memakai `dataset.csv` (Bangladesh Dengue) dan mengikuti alur seperti Tugas 10: load data → split → scaling → training → evaluasi → prediksi data baru.

In [5]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
DATA_PATH = "dataset.csv"  
df = pd.read_csv(DATA_PATH)

print("Shape:", df.shape)
display(df.head())

Shape: (1000, 10)


Unnamed: 0,Gender,Age,NS1,IgG,IgM,Area,AreaType,HouseType,District,Outcome
0,Female,45,0,0,0,Mirpur,Undeveloped,Building,Dhaka,0
1,Male,17,0,0,1,Chawkbazar,Developed,Building,Dhaka,0
2,Female,29,0,0,0,Paltan,Undeveloped,Other,Dhaka,0
3,Female,63,1,1,0,Motijheel,Developed,Other,Dhaka,1
4,Male,22,0,0,0,Gendaria,Undeveloped,Building,Dhaka,0


In [None]:
target_col = "Outcome"

X_raw = df.drop(columns=[target_col])
y = df[target_col]

X = pd.get_dummies(X_raw, drop_first=True)

print("Fitur setelah encoding:", X.shape[1])
display(X.head())

Fitur setelah encoding: 43


Unnamed: 0,Age,NS1,IgG,IgM,Gender_Male,Area_Badda,Area_Banasree,Area_Bangshal,Area_Biman Bandar,Area_Bosila,...,Area_Rampura,Area_Sabujbagh,Area_Shahbagh,Area_Sher-e-Bangla Nagar,Area_Shyampur,Area_Sutrapur,Area_Tejgaon,AreaType_Undeveloped,HouseType_Other,HouseType_Tinshed
0,45,0,0,0,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
1,17,0,0,1,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,29,0,0,0,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,True,False
3,63,1,1,0,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
4,22,0,0,0,True,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print("Train:", X_train.shape, "Test:", X_test.shape)

Train: (800, 43) Test: (200, 43)


In [None]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
k = 5
model = KNeighborsClassifier(n_neighbors=k)
model.fit(X_train_scaled, y_train)

print("Model trained with k =", k)

Model trained with k = 5


In [None]:
y_pred = model.predict(X_test_scaled)

acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.97

Confusion Matrix:
[[ 90   3]
 [  3 104]]

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.97      0.97        93
           1       0.97      0.97      0.97       107

    accuracy                           0.97       200
   macro avg       0.97      0.97      0.97       200
weighted avg       0.97      0.97      0.97       200



In [None]:
new_sample_raw = X_raw.iloc[[0]].copy()
new_sample = pd.get_dummies(new_sample_raw, drop_first=True)
new_sample = new_sample.reindex(columns=X.columns, fill_value=0)
new_sample_scaled = scaler.transform(new_sample)
new_pred = model.predict(new_sample_scaled)
new_proba = model.predict_proba(new_sample_scaled)[:, 1] 
print("Prediksi (Outcome):", int(new_pred[0]))
print("Probabilitas Outcome=1:", float(new_proba[0]))

Prediksi (Outcome): 0
Probabilitas Outcome=1: 0.2
