In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.mixture import GaussianMixture
from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score, silhouette_samples
from sklearn.manifold import TSNE
from skimage.feature import hog
import cv2
from sklearn.preprocessing import StandardScaler
import os

In [None]:
DATASET_PATH = r"D:\Download\archive\seg_train\seg_train"

In [None]:
def load_dataset(dataset_path, img_size=(64, 64)):
    images = []
    labels = []
    
    for category in os.listdir(dataset_path):
        category_path = os.path.join(dataset_path, category)
        if not os.path.isdir(category_path):
            continue
        
        for file in os.listdir(category_path):
            file_path = os.path.join(category_path, file)
            img = cv2.imread(file_path)
            img = cv2.resize(img, img_size)
            images.append(img)
            labels.append(category)
    
    return np.array(images), np.array(labels)

In [None]:
X = load_dataset(DATASET_PATH)

In [None]:
def extract_features(images):
    feature_list = []
    
    for img in images:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        hog_features = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                           cells_per_block=(2, 2), feature_vector=True)
        feature_list.append(hog_features)
    
    return np.array(feature_list)

In [None]:
X = extract_features(X)

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
def plot_sample_images(X, num_samples=10):
    plt.figure(figsize=(12, 6))
    indices = np.random.choice(len(X), num_samples, replace=False)
    
    for i, idx in enumerate(indices):
        plt.subplot(2, 5, i + 1)
        plt.imshow(cv2.cvtColor(X[idx], cv2.COLOR_BGR2RGB))
        plt.axis("off")
    
    plt.tight_layout()
    plt.show()

In [None]:
def plot_hog_feature(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    features, hog_image = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                              cells_per_block=(2, 2), visualize=True)
    
    fig, ax = plt.subplots(1, 2, figsize=(10, 5))
    ax[0].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    ax[0].set_title("Original Image")
    ax[0].axis("off")
    
    ax[1].imshow(hog_image, cmap="gray")
    ax[1].set_title("HOG Features")
    ax[1].axis("off")
    
    plt.show()

In [None]:
plot_sample_images(X, num_samples=10)
plot_hog_feature(X[0])

In [None]:
n_components_range = range(3,15)
covariance_types = ['spherical','diag','tied','full']

best_n = None
best_cov_type = None
best_bic= np.inf

for n in n_components_range:
    for cov_type in covariance_types:
        gmm= GaussianMixture(n_components=n, covariance_type=cov_type,random_state=42)
        gmm.fit(X)
        bic =gmm.bic(X)

        if bic < best_bic:
            best_bic =bic
            best_n =n
            best_cov_type= cov_type

print(best_n)
print(best_cov_type)


In [None]:
model = GaussianMixture(n_components=best_n, covariance_type=best_cov_type, random_state=42, max_iter=40, tol=0.01)
labels = model.fit_predict(X)

In [None]:
silhouette_vals = silhouette_samples(X, labels)
avg_score = silhouette_score(X, labels)

plt.bar(range(len(silhouette_vals)), silhouette_vals, color='blue', alpha=0.6)
plt.axhline(y=avg_score, color='red', linestyle='--')
plt.xlabel('Data Points')
plt.ylabel('Silhouette Score')
plt.title('Silhouette Plot')
plt.show()

In [None]:
silhouette_avg = silhouette_score(X, labels)
print(silhouette_avg)

davies_bouldin_score = davies_bouldin_score(X, labels)
print(davies_bouldin_score)

calinski_harabasz_score = calinski_harabasz_score(X, labels)
print(calinski_harabasz_score)

In [None]:
perplexities = [20, 30, 40, 50, 60]
silhouette_scores = []

for perplexity in perplexities:
    tsne = TSNE(n_components=2, perplexity=perplexity, random_state=42)
    X_tsne = tsne.fit_transform(X) 
    model = GaussianMixture(n_components=best_n, covariance_type=best_cov_type, random_state=42, max_iter=40, tol=0.01)
    labels = model.fit_predict(X_tsne)   

    if len(set(labels)) > 1:
        score = silhouette_score(X_tsne, labels)
        silhouette_scores.append(score)
    else:
        silhouette_scores.append(-1)

In [None]:
plt.plot(perplexities, silhouette_scores, marker='o', linestyle='--')
plt.xlabel('Perplexity')
plt.ylabel('Silhouette Score')
plt.title('Evaluasi Perplexity untuk KMeans dengan Silhouette Score')
plt.show()

In [None]:
X_tsne = TSNE(n_components=2, perplexity=30).fit_transform(X)
plt.scatter(X_tsne[:,0], X_tsne[:,1], c = labels, cmap = 'viridis', alpha=0.5)
plt.title('Clustering T-SNE')
plt.xlabel('Dimension 1')
plt.ylabel('Dimensian 2')
plt.show()