In [2]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from knn import KNN
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [3]:
dataset = pd.read_csv('dataset.csv')

In [4]:
X = dataset[['Date', 'Day of the week', 'CarCount', 'BikeCount', 'BusCount', 'TruckCount', 'Total', 'Kategori Waktu']]
# X = dataset['Date', 'Day of the week']
y = dataset['Traffic Situation']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
model = KNN(k=5)

In [7]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [8]:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [9]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.9312080536912751
Classification Report:
               precision    recall  f1-score   support

           1       0.82      0.82      0.82        66
           2       0.94      0.94      0.94       340
           3       0.96      1.00      0.98       127
           4       0.93      0.87      0.90        63

    accuracy                           0.93       596
   macro avg       0.91      0.91      0.91       596
weighted avg       0.93      0.93      0.93       596

Confusion Matrix:
 [[ 54  12   0   0]
 [ 12 319   5   4]
 [  0   0 127   0]
 [  0   8   0  55]]


In [10]:
# Menyimpan hasil akurasi
accuracies = []
confusion_matrices = []
classification_reports = []

In [11]:
for train_index, test_index in kf.split(X):
    # Akses data dengan .iloc
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Latih model
    model.fit(X_train, y_train)

    # Prediksi dan evaluasi
    y_pred = model.predict(X_test)

    # Evaluasi setiap lipatan
    accuracies.append(accuracy_score(y_test, y_pred))
    confusion_matrices.append(confusion_matrix(y_test, y_pred))
    classification_reports.append(classification_report(y_test, y_pred, output_dict=True))

In [12]:
# Evaluasi keseluruhan
print("Rata-rata Akurasi:", sum(accuracies) / len(accuracies))
print("\nConfusion Matrix dari lipatan terakhir:")
print(confusion_matrices[-1])
print("\nClassification Report dari lipatan terakhir:")
print(classification_reports[-1])

Rata-rata Akurasi: 0.9368298460323727

Confusion Matrix dari lipatan terakhir:
[[ 49  11   0   0]
 [  7 315   1   4]
 [  0   3 151   4]
 [  0   9   1  40]]

Classification Report dari lipatan terakhir:
{'1': {'precision': 0.875, 'recall': 0.8166666666666667, 'f1-score': 0.8448275862068966, 'support': 60.0}, '2': {'precision': 0.9319526627218935, 'recall': 0.963302752293578, 'f1-score': 0.9473684210526315, 'support': 327.0}, '3': {'precision': 0.9869281045751634, 'recall': 0.9556962025316456, 'f1-score': 0.9710610932475884, 'support': 158.0}, '4': {'precision': 0.8333333333333334, 'recall': 0.8, 'f1-score': 0.8163265306122449, 'support': 50.0}, 'accuracy': 0.9327731092436975, 'macro avg': {'precision': 0.9068035251575975, 'recall': 0.8839164053729724, 'f1-score': 0.8948959077798404, 'support': 595.0}, 'weighted avg': {'precision': 0.9325207191589943, 'recall': 0.9327731092436975, 'f1-score': 0.9323077447400935, 'support': 595.0}}


In [13]:
report_dict = classification_report(y_test, y_pred, output_dict=True)

report_df = pd.DataFrame(report_dict).transpose()

report_df

Unnamed: 0,precision,recall,f1-score,support
1,0.875,0.816667,0.844828,60.0
2,0.931953,0.963303,0.947368,327.0
3,0.986928,0.955696,0.971061,158.0
4,0.833333,0.8,0.816327,50.0
accuracy,0.932773,0.932773,0.932773,0.932773
macro avg,0.906804,0.883916,0.894896,595.0
weighted avg,0.932521,0.932773,0.932308,595.0
