In [None]:
# =====================================================
# KNN MURNI - TANPA CNN
# EKSTRAKSI RGB + HSV
# HASIL DISIMPAN KE CSV
# =====================================================

import numpy as np
import pandas as pd
import os
import cv2
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler

DATASET_DIR = "/content/drive/MyDrive/Kolong/dataset_citra"
CSV_FILE    = "/content/drive/MyDrive/Kolong/Dataset_skripsi.csv"
TEST_DIR    = "/content/drive/MyDrive/Kolong/Citra tidak diketahui"

# =========================
# EKSTRAKSI FITUR MANUAL
# =========================
def extract_feature(img_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
    hsv = cv2.cvtColor((img*255).astype(np.uint8), cv2.COLOR_RGB2HSV)

    return np.array([
        np.mean(img[:,:,0]),
        np.mean(img[:,:,1]),
        np.mean(img[:,:,2]),
        np.mean(hsv[:,:,0])/179,
        np.mean(hsv[:,:,1])/255,
        np.mean(hsv[:,:,2])/255
    ])

# =========================
# DATABASE FITUR
# =========================
df = pd.read_csv(CSV_FILE)

features, labels, files = [], [], []

for _, row in df.iterrows():
    img_path = os.path.join(DATASET_DIR, row["Tempat"])
    features.append(extract_feature(img_path))
    labels.append(row["Warna"])
    files.append(row["Tempat"])

scaler = MinMaxScaler()
features = scaler.fit_transform(features)

# =========================
# MODEL KNN
# =========================
knn = NearestNeighbors(n_neighbors=1, metric="euclidean")
knn.fit(features)

# =========================
# PENAMPUNG CSV
# =========================
summary_knn = []
detail_knn  = []

# =========================
# PENGUJIAN
# =========================
distances_all = []
test_images = sorted(os.listdir(TEST_DIR))

for test_img in test_images:
    test_path = os.path.join(TEST_DIR, test_img)
    test_feat = scaler.transform([extract_feature(test_path)])

    distances, indices = knn.kneighbors(test_feat)
    best_idx = indices[0][0]
    best_dist = distances[0][0]
    distances_all.append(best_dist)

    # ===== SIMPAN RINGKAS =====
    summary_knn.append({
        "citra_uji": test_img,
        "paling_mirip": files[best_idx],
        "warna_prediksi": labels[best_idx],
        "jarak_knn": best_dist
    })

    # ===== SIMPAN DETAIL =====
    all_dist, _ = knn.kneighbors(test_feat, n_neighbors=len(features))
    for i in range(len(files)):
        detail_knn.append({
            "citra_uji": test_img,
            "citra_referensi": files[i],
            "warna_referensi": labels[i],
            "jarak": all_dist[0][i]
        })

# =========================
# SIMPAN CSV
# =========================
pd.DataFrame(summary_knn).to_csv(
    "/content/drive/MyDrive/Kolong/hasil_knn_summary.csv",
    index=False
)

pd.DataFrame(detail_knn).to_csv(
    "/content/drive/MyDrive/Kolong/hasil_knn_detail.csv",
    index=False
)

print(f"Rata-rata Jarak KNN : {np.mean(distances_all):.4f}")
print("=== KNN MURNI SELESAI ===")


Rata-rata Jarak KNN : 0.2297
=== KNN MURNI SELESAI ===
