In [587]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_predict
import numpy as np


In [588]:

# Membaca file Excel (.xlsx)
df = pd.read_excel("CPC5.xlsx")

# Menyimpan DataFrame ke file CSV
df.to_csv("CPC5.csv", index=False)

In [589]:
# df = pd.read_csv("CPC1.csv")

df = df.replace(",", ".", regex=True)
df.iloc[:, :-1] = df.iloc[:, :-1].astype(float)

In [590]:
df = pd.read_csv('CPC5.csv')

In [591]:
label_encoder = LabelEncoder()
df['kelas'] = label_encoder.fit_transform(df['kelas'])
# print(df)

In [592]:
X = df.iloc[:, :-1]  # Fitur
y = df.iloc[:, -1]   # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
# X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [593]:
svm_classifier = SVC()
svm_param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001], 'kernel': ['rbf']}
svm_grid_search = GridSearchCV(svm_classifier, svm_param_grid, cv=10, scoring='accuracy')
svm_grid_search.fit(X_train_scaled, y_train)
svm_best_params = svm_grid_search.best_params_

svm_classifier = SVC(**svm_best_params)
svm_classifier.fit(X_train_scaled, y_train)

svm_predictions = svm_classifier.predict(X_test_scaled)

svm_conf_matrix = confusion_matrix(y_test, svm_predictions)
# svm_TN, svm_FP, svm_FN, svm_TP = svm_conf_matrix.ravel() #nyalain ini kalo 2 targetnya
svm_TN, svm_FP, svm_FN, svm_TP, _, _, _, _, _ = svm_conf_matrix.ravel() #nyalain kalo 3 targetnya


In [594]:
print("Classes in y_test:", y_test.unique())
print("Classes in svm_predictions:", np.unique(svm_predictions))
print("Missing values in y_test:", y_test.isnull().sum())
print("Missing values in svm_predictions:", pd.Series(svm_predictions).isnull().sum())
print("Difference in classes between y_test and svm_predictions:", set(y_test.unique()) - set(np.unique(svm_predictions)))
print("Confusion Matrix:")
print(confusion_matrix(y_test, svm_predictions))

svm_TN = confusion_matrix(y_test, svm_predictions)[0, 0]
svm_FP = confusion_matrix(y_test, svm_predictions)[0, 1]
svm_FN = confusion_matrix(y_test, svm_predictions)[1, 0]
svm_TP = confusion_matrix(y_test, svm_predictions)[1, 1]



Classes in y_test: [1 0 2]
Classes in svm_predictions: [0 1 2]
Missing values in y_test: 0
Missing values in svm_predictions: 0
Difference in classes between y_test and svm_predictions: set()
Confusion Matrix:
[[36  9  6]
 [ 5 37  5]
 [13  4 10]]


In [595]:
knn_classifier = KNeighborsClassifier()

knn_param_grid = {'n_neighbors': [3, 5, 7, 9, 11], 'weights': ['uniform', 'distance']}
knn_grid_search = GridSearchCV(knn_classifier, knn_param_grid, cv=10, scoring='accuracy')
knn_grid_search.fit(X_train_scaled, y_train)
knn_best_params = knn_grid_search.best_params_

knn_classifier = KNeighborsClassifier(**knn_best_params)
knn_classifier.fit(X_train_scaled, y_train)

knn_predictions = knn_classifier.predict(X_test_scaled)

knn_conf_matrix = confusion_matrix(y_test, knn_predictions)
knn_TN, knn_FP, knn_FN, knn_TP, _, _, _, _, _ = knn_conf_matrix.ravel() #hapus , _, _, _, _, _ kalo 2 targetnya

In [596]:
svm_predictions = svm_classifier.predict(X_test_scaled)
knn_predictions = knn_classifier.predict(X_test_scaled)

svm_accuracy = accuracy_score(y_test, svm_predictions)
knn_accuracy = accuracy_score(y_test, knn_predictions)

svm_report = classification_report(y_test, svm_predictions, target_names=label_encoder.classes_)
knn_report = classification_report(y_test, knn_predictions, target_names=label_encoder.classes_)

svm_sensitivity = svm_TP / (svm_TP + svm_FN)
svm_specificity = svm_TN / (svm_TN + svm_FP)
knn_specificity = knn_TN / (knn_TN + knn_FP)
knn_sensitivity = knn_TP / (knn_TP + knn_FN)

svm_f1 = f1_score(y_test, svm_predictions, average='weighted')
knn_f1 = f1_score(y_test, knn_predictions, average='weighted')


In [597]:
print("Akurasi Klasifikasi SVM:", svm_accuracy)
print("Sensitivity SVM:", svm_sensitivity)
print("Specificity SVM:", svm_specificity)
print("F1-Score Klasifikasi SVM:", svm_f1)
print("Akurasi Klasifikasi K-NN:", knn_accuracy)

#print("\nLaporan Klasifikasi SVM:")
#print(svm_report)

#print("\nLaporan Klasifikasi K-NN:")
#print(knn_report)
print("Specificity K-NN:", knn_specificity)
print("Sensitivity K-NN:", knn_sensitivity)
print("F1-Score Klasifikasi K-NN:", knn_f1)


Akurasi Klasifikasi SVM: 0.664
Sensitivity SVM: 0.8809523809523809
Specificity SVM: 0.8
F1-Score Klasifikasi SVM: 0.6566167893961709
Akurasi Klasifikasi K-NN: 0.64
Specificity K-NN: 0.8260869565217391
Sensitivity K-NN: 0.5454545454545454
F1-Score Klasifikasi K-NN: 0.6307963097232292


# Validasi

In [598]:
X = df.iloc[:, :-1]  # Fitur
y = df.iloc[:, -1]   # Target

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.5, random_state=42)

X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [599]:
# Evaluasi model
svm_predictions = svm_classifier.predict(X_val_scaled)
knn_predictions = knn_classifier.predict(X_val_scaled)

svm_accuracy = accuracy_score(y_val, svm_predictions)
knn_accuracy = accuracy_score(y_val, knn_predictions)

svm_report = classification_report(y_val, svm_predictions, target_names=label_encoder.classes_)
knn_report = classification_report(y_val, knn_predictions, target_names=label_encoder.classes_)

svm_sensitivity = svm_TP / (svm_TP + svm_FN)
svm_specificity = svm_TN / (svm_TN + svm_FP)
knn_specificity = knn_TN / (knn_TN + knn_FP)
knn_sensitivity = knn_TP / (knn_TP + knn_FN)
svm_f1 = f1_score(y_test, svm_predictions, average='weighted')
knn_f1 = f1_score(y_test, knn_predictions, average='weighted')


In [600]:
svm_accuracy = accuracy_score(y_val, svm_predictions)
knn_accuracy = accuracy_score(y_val, knn_predictions)

svm_report = classification_report(y_val, svm_predictions, target_names=label_encoder.classes_)
knn_report = classification_report(y_val, knn_predictions, target_names=label_encoder.classes_)


In [601]:
print("Akurasi Klasifikasi SVM:", svm_accuracy)
print("Sensitivity SVM:", svm_sensitivity)
print("Specificity SVM:", svm_specificity)
print("F1-Score Klasifikasi SVM:", svm_f1)
print("Akurasi Klasifikasi K-NN:", knn_accuracy)

#print("\nLaporan Klasifikasi SVM:")
#print(svm_report)

#print("\nLaporan Klasifikasi K-NN:")
#print(knn_report)
print("Specificity K-NN:", knn_specificity)
print("Sensitivity K-NN:", knn_sensitivity)
print("F1-Score Klasifikasi K-NN:", knn_f1)


Akurasi Klasifikasi SVM: 0.816
Sensitivity SVM: 0.8809523809523809
Specificity SVM: 0.8
F1-Score Klasifikasi SVM: 0.43063654776024085
Akurasi Klasifikasi K-NN: 0.8
Specificity K-NN: 0.8260869565217391
Sensitivity K-NN: 0.5454545454545454
F1-Score Klasifikasi K-NN: 0.4644229519963293


# 10-fold CV

In [602]:
kf = KFold(n_splits=10, shuffle=True, random_state=42)

svm_accuracies = []
knn_accuracies = []

In [603]:
# Inisialisasi model SVM dan K-NN
svm_classifier = SVC(**svm_best_params)
knn_classifier = KNeighborsClassifier(**knn_best_params)

# Inisialisasi skalar untuk penskalaan fitur
scaler = StandardScaler()

# Penskalaan fitur
X_scaled = scaler.fit_transform(X)

In [604]:
# Mengukur performa SVM dengan 10-fold cross-validation
svm_predictions = cross_val_predict(svm_classifier, X_scaled, y, cv=10)
svm_accuracy = accuracy_score(y, svm_predictions)
svm_precision = precision_score(y, svm_predictions, average='weighted')
svm_recall = recall_score(y, svm_predictions, average='weighted')
svm_f1 = f1_score(y, svm_predictions, average='weighted')

In [605]:
# Menghitung confusion matrix untuk SVM
svm_conf_matrix = confusion_matrix(y, svm_predictions)
svm_TN, svm_FP, svm_FN, svm_TP, _, _, _, _, _ = svm_conf_matrix.ravel() #hapus , _, _, _, _, _ kalo 2 targetnya
svm_specificity = svm_TN / (svm_TN + svm_FP)

In [606]:
# Mengukur performa K-NN dengan 10-fold cross-validation
knn_predictions = cross_val_predict(knn_classifier, X_scaled, y, cv=10)
knn_accuracy = accuracy_score(y, knn_predictions)
knn_precision = precision_score(y, knn_predictions, average='weighted')
knn_recall = recall_score(y, knn_predictions, average='weighted')
knn_f1 = f1_score(y, knn_predictions, average='weighted')

In [607]:
# Menghitung confusion matrix untuk K-NN
knn_conf_matrix = confusion_matrix(y, knn_predictions)
knn_TN, knn_FP, knn_FN, knn_TP, _, _, _, _, _ = knn_conf_matrix.ravel() #hapus , _, _, _, _, _ kalo 2 targetnya
knn_specificity = knn_TN / (knn_TN + knn_FP)

In [608]:
# Menampilkan hasil
print("SVM Metrics (10-fold CV):")
print("Accuracy:", svm_accuracy)
print("Sensitivity (Recall):", svm_recall)
print("Specificity:", svm_specificity)
print("Precision:", svm_precision)
print("F1-Score:", svm_f1)

print("\nK-NN Metrics (10-fold CV):")
print("Accuracy:", knn_accuracy)
print("Sensitivity (Recall):", knn_recall)
print("Specificity:", knn_specificity)
print("Precision:", knn_precision)
print("F1-Score:", knn_f1)

SVM Metrics (10-fold CV):
Accuracy: 0.748
Sensitivity (Recall): 0.748
Specificity: 0.8044692737430168
Precision: 0.7411632253687023
F1-Score: 0.7408155086491023

K-NN Metrics (10-fold CV):
Accuracy: 0.726
Sensitivity (Recall): 0.726
Specificity: 0.8324324324324325
Precision: 0.7197890330578768
F1-Score: 0.7187307078642128
