In [None]:
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import joblib  # Tambahan untuk menyimpan model dan scaler

# 1. Load data train dan test
train_df = pd.read_csv('train_features.csv')
test_df = pd.read_csv('test_features.csv')

# 2. Pisahkan fitur dan label
X_train = train_df.drop(columns=['Set_ID', 'Label', 'jumlah_motion_artifacts', 'siklus_motion_artifacts'])
y_train = train_df['Label']

X_test = test_df.drop(columns=['Set_ID', 'Label', 'jumlah_motion_artifacts', 'siklus_motion_artifacts'])
y_test = test_df['Label']

# 3. Normalisasi fitur menggunakan MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 4. Inisialisasi model KNN
knn_model = KNeighborsClassifier(n_neighbors=5, weights='uniform', metric='manhattan')

# 5. Cross-validation hanya pada data train
cv_scores_train = cross_val_score(knn_model, X_train_scaled, y_train, cv=5, scoring='accuracy')
print("=== Cross-Validation KNN (Data Train) ===")
print(f"Skor tiap fold: {cv_scores_train}")
print(f"Rata-rata Akurasi CV (Train): {cv_scores_train.mean():.4f}")

# 6. Latih model pada seluruh data train
knn_model.fit(X_train_scaled, y_train)

# 7. Simpan model dan scaler
joblib.dump(knn_model, 'knn_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
print("\n✅ Model dan Scaler berhasil disimpan sebagai 'model_knn.pkl' dan 'scaler.pkl'")

# 8. Prediksi pada data test
y_pred = knn_model.predict(X_test_scaled)

# 9. Evaluasi pada data test
print("\n=== Hasil Evaluasi Akhir ===")
print(classification_report(y_test, y_pred, target_names=['Normal', 'Stress']))

# 10. Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(5, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Normal', 'Stress'],
            yticklabels=['Normal', 'Stress'])
plt.xlabel('Prediksi')
plt.ylabel('Aktual')
plt.title('Confusion Matrix - KNN (Data Test)')
plt.tight_layout()
plt.show()

print("=== Pelatihan dan Evaluasi Akhir Selesai ===")
