In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from knn import KNN
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [2]:
dataset = pd.read_csv('dataset.csv')

In [3]:
X = dataset[['Date', 'Day of the week', 'CarCount', 'BikeCount', 'BusCount', 'TruckCount', 'Total', 'Kategori Waktu']]

y = dataset['Traffic Situation']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
model = KNN(k=7)

In [6]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [7]:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [8]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.9312080536912751
Classification Report:
               precision    recall  f1-score   support

           1       0.82      0.83      0.83        66
           2       0.94      0.94      0.94       340
           3       0.96      1.00      0.98       127
           4       0.93      0.86      0.89        63

    accuracy                           0.93       596
   macro avg       0.91      0.91      0.91       596
weighted avg       0.93      0.93      0.93       596

Confusion Matrix:
 [[ 55  11   0   0]
 [ 12 319   5   4]
 [  0   0 127   0]
 [  0   9   0  54]]


In [9]:
# Menyimpan hasil akurasi
accuracies = []
confusion_matrices = []
classification_reports = []

In [10]:
for train_index, test_index in kf.split(X):
    # Akses data dengan .iloc
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Latih model
    model.fit(X_train, y_train)

    # Prediksi dan evaluasi
    y_pred = model.predict(X_test)

    # Evaluasi setiap lipatan
    accuracies.append(accuracy_score(y_test, y_pred))
    confusion_matrices.append(confusion_matrix(y_test, y_pred))
    classification_reports.append(classification_report(y_test, y_pred, output_dict=True))

In [11]:
# Evaluasi keseluruhan
print("Rata-rata Akurasi:", sum(accuracies) / len(accuracies))
print("\nConfusion Matrix dari lipatan terakhir:")
print(confusion_matrices[-1])
print("\nClassification Report dari lipatan terakhir:")
print(classification_reports[-1])

Rata-rata Akurasi: 0.9354853082172466

Confusion Matrix dari lipatan terakhir:
[[ 45  15   0   0]
 [  7 316   2   2]
 [  0   3 152   3]
 [  0  10   1  39]]

Classification Report dari lipatan terakhir:
{'1': {'precision': 0.8653846153846154, 'recall': 0.75, 'f1-score': 0.8035714285714286, 'support': 60.0}, '2': {'precision': 0.9186046511627907, 'recall': 0.9663608562691132, 'f1-score': 0.9418777943368107, 'support': 327.0}, '3': {'precision': 0.9806451612903225, 'recall': 0.9620253164556962, 'f1-score': 0.9712460063897763, 'support': 158.0}, '4': {'precision': 0.8863636363636364, 'recall': 0.78, 'f1-score': 0.8297872340425532, 'support': 50.0}, 'accuracy': 0.9277310924369748, 'macro avg': {'precision': 0.9127495160503412, 'recall': 0.8645965431812024, 'f1-score': 0.8866206158351422, 'support': 595.0}, 'weighted avg': {'precision': 0.927003218748508, 'recall': 0.9277310924369748, 'f1-score': 0.9263101767632523, 'support': 595.0}}


In [12]:
report_dict = classification_report(y_test, y_pred, output_dict=True)

report_df = pd.DataFrame(report_dict).transpose()

report_df

Unnamed: 0,precision,recall,f1-score,support
1,0.865385,0.75,0.803571,60.0
2,0.918605,0.966361,0.941878,327.0
3,0.980645,0.962025,0.971246,158.0
4,0.886364,0.78,0.829787,50.0
accuracy,0.927731,0.927731,0.927731,0.927731
macro avg,0.91275,0.864597,0.886621,595.0
weighted avg,0.927003,0.927731,0.92631,595.0
