In [26]:
import os
import json
from collections import defaultdict
import random
from skimage import io
from skimage.transform import resize

image_folder = 'D:/Serba Serbi Kuliah/MATERI KULIAH SMT 5/AI/UAS_AI/Container-Detector-Use-KNN/data new'
json_file = 'D:/Serba Serbi Kuliah/MATERI KULIAH SMT 5/AI/UAS_AI/Container-Detector-Use-KNN/data_new.json'


In [27]:
def load_data_from_json_and_images(image_folder, json_file):
    images = []
    labels = []
    filenames = [] 
    
    # Memuat JSON
    with open(json_file) as f:
        data = json.load(f)  # JSON Anda adalah list, bukan dictionary

    # Membaca gambar dan label dari JSON
    for item in data:
        image_filename = item['file_name']  # Mengambil nama file gambar
        label = item['annotations']['color_label']  # Mengambil label warna dari anotasi
        
        # Memuat gambar dari folder
        image_path = os.path.join(image_folder, image_filename)
        image = io.imread(image_path)
        
        # Ubah ukuran gambar jika perlu (agar ukuran konsisten)
        image_resized = resize(image, (64, 64))  
        
        # Flatten gambar menjadi array 1D dengan list
        image_flattened = image_resized.flatten().tolist()  
        
        images.append(image_flattened)
        labels.append(label)
        filenames.append(image_filename) 
    
    return images, labels, filenames


In [28]:
def euclidean_distance(vec1, vec2):
    sum_squared = sum((a - b) ** 2 for a, b in zip(vec1, vec2))
    return sum_squared ** 0.5


In [29]:
def knn_classify(test_data, train_data, train_labels, k=3):
    distances = []
    for i in range(len(train_data)):
        dist = euclidean_distance(test_data, train_data[i])
        distances.append((dist, train_labels[i]))
    
    distances.sort(key=lambda x: x[0])
    k_nearest = distances[:k]
    
    label_votes = {}
    for _, label in k_nearest:
        if label in label_votes:
            label_votes[label] += 1
        else:
            label_votes[label] = 1
    
    return max(label_votes, key=label_votes.get)


In [30]:
# Memuat data
images, labels, filenames = load_data_from_json_and_images(image_folder, json_file)

# Stratification
label_to_indices = defaultdict(list)
for i, label in enumerate(labels):
    label_to_indices[label].append(i)

# Membagi data menjadi train dan test dengan stratification
train_indices, test_indices = [], []
for label, indices in label_to_indices.items():
    random.shuffle(indices)  # Acak data per label
    split_point = int(0.8 * len(indices))  # 80% untuk train, 20% untuk test
    train_indices.extend(indices[:split_point])
    test_indices.extend(indices[split_point:])

# Membuat data train dan test berdasarkan indeks
train_data = [images[i] for i in train_indices]
train_labels = [labels[i] for i in train_indices]
train_filenames = [filenames[i] for i in train_indices]

test_data = [images[i] for i in test_indices]
test_labels = [labels[i] for i in test_indices]
test_filenames = [filenames[i] for i in test_indices]

In [31]:
# KNN Classification
k = 1  # Tetangga terdekat
correct_predictions = 0

# Dictionary untuk menghitung akurasi per label
label_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

for i, test_sample in enumerate(test_data):
    predicted_label = knn_classify(test_sample, train_data, train_labels, k)
    
    # Cetak nama file gambar, label prediksi, dan true label
    print(f"Test Image {test_filenames[i]}: Predicted Label = {predicted_label}, True Label = {test_labels[i]}")
    
    # Hitung jumlah prediksi benar
    if predicted_label == test_labels[i]:
        correct_predictions += 1
        label_accuracy[test_labels[i]]["correct"] += 1

    # Tambahkan ke total label
    label_accuracy[test_labels[i]]["total"] += 1

# Akurasi keseluruhan
accuracy = correct_predictions / len(test_labels) * 100
print(f"Overall Accuracy: {accuracy:.2f}%")

# Akurasi per label
print("\nAccuracy Per Label:")
for label, stats in label_accuracy.items():
    label_accuracy_percentage = (stats["correct"] / stats["total"]) * 100
    print(f"{label}: {label_accuracy_percentage:.2f}% ({stats['correct']}/{stats['total']})")


Test Image Container-687_jpg.rf.a5d21aa91f47321dd718762cda856ffd.jpg: Predicted Label = Blue, True Label = Green
Test Image CIMG17627413008_jpeg_jpg.rf.8fed38560b4d335998d2dd77b3d1bb05.jpg: Predicted Label = Blue, True Label = Green
Test Image VOLU2125383-6-_jpg.rf.3213cc0b121b2b2dbde53051d3d6a7a9.jpg: Predicted Label = Green, True Label = Green
Test Image Container-212_jpg.rf.c79aeaadcb78b9475d15ebaaecefd7df.jpg: Predicted Label = Blue, True Label = Green
Test Image Container-106_jpg.rf.10df732b375d1ec24139b5547473259d.jpg: Predicted Label = Blue, True Label = Green
Test Image 1-122720001-OCR-RF-D01_jpg.rf.9a472e57bd5ae1b2b247623c42dfdb12.jpg: Predicted Label = Green, True Label = Green
Test Image 1-123009001-OCR-LB-C02_jpg.rf.821ba7c9792be2c1850cfdda503ee959.jpg: Predicted Label = Green, True Label = Green
Test Image VOLU2154036-3-_360x480_jpg.rf.6b91449c7953af08e8b0237c20b2ee6c.jpg: Predicted Label = Blue, True Label = Green
Test Image IMG148_jpg.rf.eec8b36959203d444bfe8c1d0f70108d.

In [32]:
# KNN Classification
k = 3  # Tetangga terdekat
correct_predictions = 0

# Dictionary untuk menghitung akurasi per label
label_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

for i, test_sample in enumerate(test_data):
    predicted_label = knn_classify(test_sample, train_data, train_labels, k)
    
    # Cetak nama file gambar, label prediksi, dan true label
    print(f"Test Image {test_filenames[i]}: Predicted Label = {predicted_label}, True Label = {test_labels[i]}")
    
    # Hitung jumlah prediksi benar
    if predicted_label == test_labels[i]:
        correct_predictions += 1
        label_accuracy[test_labels[i]]["correct"] += 1

    # Tambahkan ke total label
    label_accuracy[test_labels[i]]["total"] += 1

# Akurasi keseluruhan
accuracy = correct_predictions / len(test_labels) * 100
print(f"Overall Accuracy: {accuracy:.2f}%")

# Akurasi per label
print("\nAccuracy Per Label:")
for label, stats in label_accuracy.items():
    label_accuracy_percentage = (stats["correct"] / stats["total"]) * 100
    print(f"{label}: {label_accuracy_percentage:.2f}% ({stats['correct']}/{stats['total']})")


Test Image Container-687_jpg.rf.a5d21aa91f47321dd718762cda856ffd.jpg: Predicted Label = Blue, True Label = Green
Test Image CIMG17627413008_jpeg_jpg.rf.8fed38560b4d335998d2dd77b3d1bb05.jpg: Predicted Label = Blue, True Label = Green
Test Image VOLU2125383-6-_jpg.rf.3213cc0b121b2b2dbde53051d3d6a7a9.jpg: Predicted Label = Green, True Label = Green
Test Image Container-212_jpg.rf.c79aeaadcb78b9475d15ebaaecefd7df.jpg: Predicted Label = Blue, True Label = Green
Test Image Container-106_jpg.rf.10df732b375d1ec24139b5547473259d.jpg: Predicted Label = Blue, True Label = Green
Test Image 1-122720001-OCR-RF-D01_jpg.rf.9a472e57bd5ae1b2b247623c42dfdb12.jpg: Predicted Label = Green, True Label = Green
Test Image 1-123009001-OCR-LB-C02_jpg.rf.821ba7c9792be2c1850cfdda503ee959.jpg: Predicted Label = Green, True Label = Green
Test Image VOLU2154036-3-_360x480_jpg.rf.6b91449c7953af08e8b0237c20b2ee6c.jpg: Predicted Label = Blue, True Label = Green
Test Image IMG148_jpg.rf.eec8b36959203d444bfe8c1d0f70108d.

In [33]:
# KNN Classification
k = 5  # Tetangga terdekat
correct_predictions = 0

# Dictionary untuk menghitung akurasi per label
label_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

for i, test_sample in enumerate(test_data):
    predicted_label = knn_classify(test_sample, train_data, train_labels, k)
    
    # Cetak nama file gambar, label prediksi, dan true label
    print(f"Test Image {test_filenames[i]}: Predicted Label = {predicted_label}, True Label = {test_labels[i]}")
    
    # Hitung jumlah prediksi benar
    if predicted_label == test_labels[i]:
        correct_predictions += 1
        label_accuracy[test_labels[i]]["correct"] += 1

    # Tambahkan ke total label
    label_accuracy[test_labels[i]]["total"] += 1

# Akurasi keseluruhan
accuracy = correct_predictions / len(test_labels) * 100
print(f"Overall Accuracy: {accuracy:.2f}%")

# Akurasi per label
print("\nAccuracy Per Label:")
for label, stats in label_accuracy.items():
    label_accuracy_percentage = (stats["correct"] / stats["total"]) * 100
    print(f"{label}: {label_accuracy_percentage:.2f}% ({stats['correct']}/{stats['total']})")


Test Image Container-687_jpg.rf.a5d21aa91f47321dd718762cda856ffd.jpg: Predicted Label = Blue, True Label = Green
Test Image CIMG17627413008_jpeg_jpg.rf.8fed38560b4d335998d2dd77b3d1bb05.jpg: Predicted Label = Blue, True Label = Green
Test Image VOLU2125383-6-_jpg.rf.3213cc0b121b2b2dbde53051d3d6a7a9.jpg: Predicted Label = Green, True Label = Green
Test Image Container-212_jpg.rf.c79aeaadcb78b9475d15ebaaecefd7df.jpg: Predicted Label = Blue, True Label = Green
Test Image Container-106_jpg.rf.10df732b375d1ec24139b5547473259d.jpg: Predicted Label = Blue, True Label = Green
Test Image 1-122720001-OCR-RF-D01_jpg.rf.9a472e57bd5ae1b2b247623c42dfdb12.jpg: Predicted Label = Green, True Label = Green
Test Image 1-123009001-OCR-LB-C02_jpg.rf.821ba7c9792be2c1850cfdda503ee959.jpg: Predicted Label = Green, True Label = Green
Test Image VOLU2154036-3-_360x480_jpg.rf.6b91449c7953af08e8b0237c20b2ee6c.jpg: Predicted Label = Green, True Label = Green
Test Image IMG148_jpg.rf.eec8b36959203d444bfe8c1d0f70108d

In [34]:
# KNN Classification
k =11  # Tetangga terdekat
correct_predictions = 0

# Dictionary untuk menghitung akurasi per label
label_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

for i, test_sample in enumerate(test_data):
    predicted_label = knn_classify(test_sample, train_data, train_labels, k)
    
    # Cetak nama file gambar, label prediksi, dan true label
    print(f"Test Image {test_filenames[i]}: Predicted Label = {predicted_label}, True Label = {test_labels[i]}")
    
    # Hitung jumlah prediksi benar
    if predicted_label == test_labels[i]:
        correct_predictions += 1
        label_accuracy[test_labels[i]]["correct"] += 1

    # Tambahkan ke total label
    label_accuracy[test_labels[i]]["total"] += 1

# Akurasi keseluruhan
accuracy = correct_predictions / len(test_labels) * 100
print(f"Overall Accuracy: {accuracy:.2f}%")

# Akurasi per label
print("\nAccuracy Per Label:")
for label, stats in label_accuracy.items():
    label_accuracy_percentage = (stats["correct"] / stats["total"]) * 100
    print(f"{label}: {label_accuracy_percentage:.2f}% ({stats['correct']}/{stats['total']})")


Test Image Container-687_jpg.rf.a5d21aa91f47321dd718762cda856ffd.jpg: Predicted Label = Blue, True Label = Green
Test Image CIMG17627413008_jpeg_jpg.rf.8fed38560b4d335998d2dd77b3d1bb05.jpg: Predicted Label = Blue, True Label = Green
Test Image VOLU2125383-6-_jpg.rf.3213cc0b121b2b2dbde53051d3d6a7a9.jpg: Predicted Label = Green, True Label = Green
Test Image Container-212_jpg.rf.c79aeaadcb78b9475d15ebaaecefd7df.jpg: Predicted Label = Blue, True Label = Green
Test Image Container-106_jpg.rf.10df732b375d1ec24139b5547473259d.jpg: Predicted Label = Blue, True Label = Green
Test Image 1-122720001-OCR-RF-D01_jpg.rf.9a472e57bd5ae1b2b247623c42dfdb12.jpg: Predicted Label = Green, True Label = Green
Test Image 1-123009001-OCR-LB-C02_jpg.rf.821ba7c9792be2c1850cfdda503ee959.jpg: Predicted Label = Green, True Label = Green
Test Image VOLU2154036-3-_360x480_jpg.rf.6b91449c7953af08e8b0237c20b2ee6c.jpg: Predicted Label = Blue, True Label = Green
Test Image IMG148_jpg.rf.eec8b36959203d444bfe8c1d0f70108d.