In [8]:
import cv2
cluster = 4
def preprocess_image(image_path):
    # Baca gambar
    img = cv2.imread(image_path)
    # Resize gambar menjadi 100x100
    resized_img = cv2.resize(img, (100, 100))
    return resized_img


In [9]:
import cv2
import numpy as np
import matplotlib.pyplot as plt


def extract_and_show_histogram(hsv_img):
    # Visualisasi histogram untuk setiap kanal
    plt.figure(figsize=(12, 4))
    
    # Plot histogram untuk Hue
    plt.subplot(1, 3, 1)
    plt.title('Hue Channel')
    plt.hist(hsv_img[:, :, 0].ravel(), bins=180, range=[0, 180], color='r')
    plt.xlabel('Hue')
    plt.ylabel('Frequency')
    
    # Plot histogram untuk Saturation
    plt.subplot(1, 3, 2)
    plt.title('Saturation Channel')
    plt.hist(hsv_img[:, :, 1].ravel(), bins=256, range=[0, 256], color='g')
    plt.xlabel('Saturation')
    plt.ylabel('Frequency')

    # Plot histogram untuk Value
    plt.subplot(1, 3, 3)
    plt.title('Value Channel')
    plt.hist(hsv_img[:, :, 2].ravel(), bins=256, range=[0, 256], color='b')
    plt.xlabel('Value')
    plt.ylabel('Frequency')

    # Tampilkan histogram
    plt.tight_layout()
    plt.show()

def extract_features(image):
    hsv_img = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv_img], [0, 1, 2], None, [8, 8, 8], [0, 180, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    extract_and_show_histogram(hsv_img)
    return hist.flatten()


In [10]:
import numpy as np

def euclidean_distance(point1, point2):
    """Hitung jarak Euclidean antara dua titik."""
    return np.sqrt(np.sum((point1 - point2) ** 2))

def kmeans_manual(features, k, centroids, max_iters=100):
    for _ in range(max_iters):
        # 2. Assign cluster
        labels = np.zeros(features.shape[0])
        for i in range(features.shape[0]):
            distances = np.array([euclidean_distance(features[i], centroid) for centroid in centroids])
            labels[i] = np.argmin(distances)  

        # 3. Update centroid
        new_centroids = np.zeros(centroids.shape)
        for j in range(k):
            if np.any(labels == j):  
                new_centroids[j] = features[labels == j].mean(axis=0)

        # 4. Periksa konvergensi (jika centroid tidak berubah)
        if np.all(centroids == new_centroids):
            break

        centroids = new_centroids
    return labels, centroids

In [11]:
import numpy as np

def euclidean_distance(point1, point2):
    """Hitung jarak Euclidean antara dua titik."""
    return np.sqrt(np.sum((point1 - point2) ** 2))

def calculate_distance_matrix(features):
    """Buat matriks jarak antara setiap titik fitur."""
    num_points = features.shape[0]
    distance_matrix = np.zeros((num_points, num_points))
    
    for i in range(num_points):
        for j in range(i + 1, num_points):
            distance_matrix[i, j] = euclidean_distance(features[i], features[j])
            distance_matrix[j, i] = distance_matrix[i, j]
    
    return distance_matrix

def find_closest_clusters(distance_matrix, active_clusters):
    """Cari dua cluster terdekat berdasarkan matriks jarak."""
    min_distance = np.inf
    cluster_pair = (-1, -1)
    
    for i in active_clusters:
        for j in active_clusters:
            if i < j and distance_matrix[i, j] < min_distance:
                min_distance = distance_matrix[i, j]
                cluster_pair = (i, j)
    
    return cluster_pair, min_distance

def update_distance_matrix(distance_matrix, cluster_a, cluster_b):
    """Perbarui matriks jarak setelah dua cluster digabung."""
    # Complete-linkage: gunakan jarak maksimum antara anggota cluster yang digabung
    for i in range(distance_matrix.shape[0]):
        if i != cluster_a and i != cluster_b:
            distance_matrix[cluster_a, i] = max(distance_matrix[cluster_a, i], distance_matrix[cluster_b, i])
            distance_matrix[i, cluster_a] = distance_matrix[cluster_a, i]
    
    distance_matrix = np.delete(distance_matrix, cluster_b, axis=0)
    distance_matrix = np.delete(distance_matrix, cluster_b, axis=1)
    
    return distance_matrix

def hierarchical_clustering(features, num_clusters):
    """Algoritma Agglomerative Hierarchical Clustering."""
    num_points = features.shape[0]
    
    # Awal: Setiap titik adalah cluster
    clusters = {i: [i] for i in range(num_points)}
    active_clusters = list(clusters.keys())
    
    # Buat matriks jarak awal
    distance_matrix = calculate_distance_matrix(features)
    
    while len(active_clusters) > num_clusters:
        # Temukan dua cluster terdekat
        (cluster_a, cluster_b), min_distance = find_closest_clusters(distance_matrix, active_clusters)
        
        if cluster_a in clusters and cluster_b in clusters:
            # Gabungkan cluster_b ke cluster_a
            clusters[cluster_a].extend(clusters[cluster_b])
            del clusters[cluster_b]  
            active_clusters.remove(cluster_b)  
        
            # Perbarui matriks jarak
            distance_matrix = update_distance_matrix(distance_matrix, cluster_a, cluster_b)
            print(clusters)
    
    return clusters

In [None]:
from sklearn.cluster import KMeans
import numpy as np
import os
from sklearn.cluster import AgglomerativeClustering
import numpy as np

features = []
data_folder = 'data/'
labelimg = []

for file_name in os.listdir(data_folder):
    if file_name.endswith(('.jpg', '.png', '.jpeg')):
        img_path = os.path.join(data_folder, file_name)
        labelimg.append(file_name)
        img = preprocess_image(img_path)
        feature = extract_features(img)
        features.append(feature)

features = np.array(features)
unique_labels = set([img.split('_')[0] for img in labelimg])
unique_labels = list(unique_labels)

centroids = np.array([features[0], features[4], features[8], features[12]])
labels, centroids = kmeans_manual(features, k=cluster, centroids=centroids, max_iters=10000)

print(labels) 
print(centroids)  


In [None]:
for i in range(cluster):  
    cluster_images = [labelimg[j] for j in range(len(labelimg)) if labels[j] == i]
    print(f"Cluster {i}: {cluster_images}")

In [None]:
from sklearn.metrics import silhouette_score
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt


sil_score = silhouette_score(features, labels)

print(f"Silhouette Score: {sil_score:.3f}")

plt.scatter(features[:, 0], features[:, 1], c=labels, cmap='viridis')
plt.title(f'Clustering Results (Silhouette Score: {sil_score:.3f})')
plt.show()
