In [10]:
import os
import json
from collections import defaultdict
import random
from skimage import io
from skimage.transform import resize

image_folder = 'D:/Serba Serbi Kuliah/MATERI KULIAH SMT 5/AI/UAS_AI/Container-Detector-Use-KNN/data new'
json_file = 'D:/Serba Serbi Kuliah/MATERI KULIAH SMT 5/AI/UAS_AI/Container-Detector-Use-KNN/data_new.json'


In [11]:
def load_data_from_json_and_images(image_folder, json_file):
    images = []
    labels = []
    filenames = [] 
    
    # Memuat JSON
    with open(json_file) as f:
        data = json.load(f)  # JSON Anda adalah list, bukan dictionary

    # Membaca gambar dan label dari JSON
    for item in data:
        image_filename = item['file_name']  # Mengambil nama file gambar
        label = item['annotations']['color_label']  # Mengambil label warna dari anotasi
        
        # Memuat gambar dari folder
        image_path = os.path.join(image_folder, image_filename)
        image = io.imread(image_path)
        
        # Ubah ukuran gambar jika perlu (agar ukuran konsisten)
        image_resized = resize(image, (64, 64))  
        
        # Flatten gambar menjadi array 1D dengan list
        image_flattened = image_resized.flatten().tolist()  
        
        images.append(image_flattened)
        labels.append(label)
        filenames.append(image_filename) 
    
    return images, labels, filenames


In [12]:
def euclidean_distance(vec1, vec2):
    sum_squared = sum((a - b) ** 2 for a, b in zip(vec1, vec2))
    return sum_squared ** 0.5


In [13]:
def knn_classify(test_data, train_data, train_labels, k=3):
    distances = []
    for i in range(len(train_data)):
        dist = euclidean_distance(test_data, train_data[i])
        distances.append((dist, train_labels[i]))
    
    distances.sort(key=lambda x: x[0])
    k_nearest = distances[:k]
    
    label_votes = {}
    for _, label in k_nearest:
        if label in label_votes:
            label_votes[label] += 1
        else:
            label_votes[label] = 1
    
    return max(label_votes, key=label_votes.get)


In [14]:
# Memuat data
images, labels, filenames = load_data_from_json_and_images(image_folder, json_file)

# Stratification
label_to_indices = defaultdict(list)
for i, label in enumerate(labels):
    label_to_indices[label].append(i)

# Membagi data menjadi train dan test dengan stratification
train_indices, test_indices = [], []
for label, indices in label_to_indices.items():
    random.shuffle(indices)  # Acak data per label
    split_point = int(0.8 * len(indices))  # 80% untuk train, 20% untuk test
    train_indices.extend(indices[:split_point])
    test_indices.extend(indices[split_point:])

# Membuat data train dan test berdasarkan indeks
train_data = [images[i] for i in train_indices]
train_labels = [labels[i] for i in train_indices]
train_filenames = [filenames[i] for i in train_indices]

test_data = [images[i] for i in test_indices]
test_labels = [labels[i] for i in test_indices]
test_filenames = [filenames[i] for i in test_indices]

In [15]:
# KNN Classification
k = 1  # Tetangga terdekat
correct_predictions = 0

# Dictionary untuk menghitung akurasi per label
label_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

for i, test_sample in enumerate(test_data):
    predicted_label = knn_classify(test_sample, train_data, train_labels, k)
    
    # Cetak nama file gambar, label prediksi, dan true label
    print(f"Test Image {test_filenames[i]}: Predicted Label = {predicted_label}, True Label = {test_labels[i]}")
    
    # Hitung jumlah prediksi benar
    if predicted_label == test_labels[i]:
        correct_predictions += 1
        label_accuracy[test_labels[i]]["correct"] += 1

    # Tambahkan ke total label
    label_accuracy[test_labels[i]]["total"] += 1

# Akurasi keseluruhan
accuracy = correct_predictions / len(test_labels) * 100
print(f"Overall Accuracy: {accuracy:.2f}%")

# Akurasi per label
print("\nAccuracy Per Label:")
for label, stats in label_accuracy.items():
    label_accuracy_percentage = (stats["correct"] / stats["total"]) * 100
    print(f"{label}: {label_accuracy_percentage:.2f}% ({stats['correct']}/{stats['total']})")


Test Image VOLU2107100-5-_jpg.rf.19f19fe346a34df8aee60e312db8d6be.jpg: Predicted Label = Green, True Label = Green
Test Image IMG163_jpg.rf.fea2833584d2188641332467a806b320.jpg: Predicted Label = Blue, True Label = Green
Test Image VOLU2162999-7-_jpg.rf.c7292f12682a4708ffb3c5c33ab89bf0.jpg: Predicted Label = Green, True Label = Green
Test Image 1-155749001-OCR-AH-A01_jpg.rf.640b5b08c1e688f699230a6ac549861d.jpg: Predicted Label = Green, True Label = Green
Test Image VOLU2118804-4-_jpg.rf.af072cde26606140f35b191f95787ea7.jpg: Predicted Label = Green, True Label = Green
Test Image VOLU2135756-3-_jpg.rf.dbca7bcf22850c0f8f9e480af7eb7daa.jpg: Predicted Label = Red, True Label = Green
Test Image SLVU4655444-4-_361x480_jpg.rf.ef0efb313898251aa8577464de2fb121.jpg: Predicted Label = Red, True Label = Green
Test Image VOLU2125296-4-_jpg.rf.c93343cc344ae2dfd60bb6712c2734be.jpg: Predicted Label = Green, True Label = Green
Test Image SLVU4656841-5-_jpg.rf.2aee0af12f188c46d42be97316e9d080.jpg: Predic

In [16]:
# KNN Classification
k = 3  # Tetangga terdekat
correct_predictions = 0

# Dictionary untuk menghitung akurasi per label
label_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

for i, test_sample in enumerate(test_data):
    predicted_label = knn_classify(test_sample, train_data, train_labels, k)
    
    # Cetak nama file gambar, label prediksi, dan true label
    print(f"Test Image {test_filenames[i]}: Predicted Label = {predicted_label}, True Label = {test_labels[i]}")
    
    # Hitung jumlah prediksi benar
    if predicted_label == test_labels[i]:
        correct_predictions += 1
        label_accuracy[test_labels[i]]["correct"] += 1

    # Tambahkan ke total label
    label_accuracy[test_labels[i]]["total"] += 1

# Akurasi keseluruhan
accuracy = correct_predictions / len(test_labels) * 100
print(f"Overall Accuracy: {accuracy:.2f}%")

# Akurasi per label
print("\nAccuracy Per Label:")
for label, stats in label_accuracy.items():
    label_accuracy_percentage = (stats["correct"] / stats["total"]) * 100
    print(f"{label}: {label_accuracy_percentage:.2f}% ({stats['correct']}/{stats['total']})")


Test Image VOLU2107100-5-_jpg.rf.19f19fe346a34df8aee60e312db8d6be.jpg: Predicted Label = Green, True Label = Green
Test Image IMG163_jpg.rf.fea2833584d2188641332467a806b320.jpg: Predicted Label = Blue, True Label = Green
Test Image VOLU2162999-7-_jpg.rf.c7292f12682a4708ffb3c5c33ab89bf0.jpg: Predicted Label = Green, True Label = Green
Test Image 1-155749001-OCR-AH-A01_jpg.rf.640b5b08c1e688f699230a6ac549861d.jpg: Predicted Label = Green, True Label = Green
Test Image VOLU2118804-4-_jpg.rf.af072cde26606140f35b191f95787ea7.jpg: Predicted Label = Green, True Label = Green
Test Image VOLU2135756-3-_jpg.rf.dbca7bcf22850c0f8f9e480af7eb7daa.jpg: Predicted Label = Red, True Label = Green
Test Image SLVU4655444-4-_361x480_jpg.rf.ef0efb313898251aa8577464de2fb121.jpg: Predicted Label = Red, True Label = Green
Test Image VOLU2125296-4-_jpg.rf.c93343cc344ae2dfd60bb6712c2734be.jpg: Predicted Label = Red, True Label = Green
Test Image SLVU4656841-5-_jpg.rf.2aee0af12f188c46d42be97316e9d080.jpg: Predicte

In [17]:
# KNN Classification
k = 5  # Tetangga terdekat
correct_predictions = 0

# Dictionary untuk menghitung akurasi per label
label_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

for i, test_sample in enumerate(test_data):
    predicted_label = knn_classify(test_sample, train_data, train_labels, k)
    
    # Cetak nama file gambar, label prediksi, dan true label
    print(f"Test Image {test_filenames[i]}: Predicted Label = {predicted_label}, True Label = {test_labels[i]}")
    
    # Hitung jumlah prediksi benar
    if predicted_label == test_labels[i]:
        correct_predictions += 1
        label_accuracy[test_labels[i]]["correct"] += 1

    # Tambahkan ke total label
    label_accuracy[test_labels[i]]["total"] += 1

# Akurasi keseluruhan
accuracy = correct_predictions / len(test_labels) * 100
print(f"Overall Accuracy: {accuracy:.2f}%")

# Akurasi per label
print("\nAccuracy Per Label:")
for label, stats in label_accuracy.items():
    label_accuracy_percentage = (stats["correct"] / stats["total"]) * 100
    print(f"{label}: {label_accuracy_percentage:.2f}% ({stats['correct']}/{stats['total']})")


Test Image VOLU2107100-5-_jpg.rf.19f19fe346a34df8aee60e312db8d6be.jpg: Predicted Label = Green, True Label = Green
Test Image IMG163_jpg.rf.fea2833584d2188641332467a806b320.jpg: Predicted Label = Blue, True Label = Green
Test Image VOLU2162999-7-_jpg.rf.c7292f12682a4708ffb3c5c33ab89bf0.jpg: Predicted Label = Green, True Label = Green
Test Image 1-155749001-OCR-AH-A01_jpg.rf.640b5b08c1e688f699230a6ac549861d.jpg: Predicted Label = Green, True Label = Green
Test Image VOLU2118804-4-_jpg.rf.af072cde26606140f35b191f95787ea7.jpg: Predicted Label = Green, True Label = Green
Test Image VOLU2135756-3-_jpg.rf.dbca7bcf22850c0f8f9e480af7eb7daa.jpg: Predicted Label = Red, True Label = Green
Test Image SLVU4655444-4-_361x480_jpg.rf.ef0efb313898251aa8577464de2fb121.jpg: Predicted Label = Red, True Label = Green
Test Image VOLU2125296-4-_jpg.rf.c93343cc344ae2dfd60bb6712c2734be.jpg: Predicted Label = Green, True Label = Green
Test Image SLVU4656841-5-_jpg.rf.2aee0af12f188c46d42be97316e9d080.jpg: Predic

In [18]:
# KNN Classification
k =11  # Tetangga terdekat
correct_predictions = 0

# Dictionary untuk menghitung akurasi per label
label_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

for i, test_sample in enumerate(test_data):
    predicted_label = knn_classify(test_sample, train_data, train_labels, k)
    
    # Cetak nama file gambar, label prediksi, dan true label
    print(f"Test Image {test_filenames[i]}: Predicted Label = {predicted_label}, True Label = {test_labels[i]}")
    
    # Hitung jumlah prediksi benar
    if predicted_label == test_labels[i]:
        correct_predictions += 1
        label_accuracy[test_labels[i]]["correct"] += 1

    # Tambahkan ke total label
    label_accuracy[test_labels[i]]["total"] += 1

# Akurasi keseluruhan
accuracy = correct_predictions / len(test_labels) * 100
print(f"Overall Accuracy: {accuracy:.2f}%")

# Akurasi per label
print("\nAccuracy Per Label:")
for label, stats in label_accuracy.items():
    label_accuracy_percentage = (stats["correct"] / stats["total"]) * 100
    print(f"{label}: {label_accuracy_percentage:.2f}% ({stats['correct']}/{stats['total']})")


Test Image VOLU2107100-5-_jpg.rf.19f19fe346a34df8aee60e312db8d6be.jpg: Predicted Label = Green, True Label = Green
Test Image IMG163_jpg.rf.fea2833584d2188641332467a806b320.jpg: Predicted Label = Blue, True Label = Green
Test Image VOLU2162999-7-_jpg.rf.c7292f12682a4708ffb3c5c33ab89bf0.jpg: Predicted Label = Green, True Label = Green
Test Image 1-155749001-OCR-AH-A01_jpg.rf.640b5b08c1e688f699230a6ac549861d.jpg: Predicted Label = Green, True Label = Green
Test Image VOLU2118804-4-_jpg.rf.af072cde26606140f35b191f95787ea7.jpg: Predicted Label = Green, True Label = Green
Test Image VOLU2135756-3-_jpg.rf.dbca7bcf22850c0f8f9e480af7eb7daa.jpg: Predicted Label = Blue, True Label = Green
Test Image SLVU4655444-4-_361x480_jpg.rf.ef0efb313898251aa8577464de2fb121.jpg: Predicted Label = Red, True Label = Green
Test Image VOLU2125296-4-_jpg.rf.c93343cc344ae2dfd60bb6712c2734be.jpg: Predicted Label = Green, True Label = Green
Test Image SLVU4656841-5-_jpg.rf.2aee0af12f188c46d42be97316e9d080.jpg: Predi