In [None]:
from PIL import Image
import os
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict

def extract_features(image_path):
    # 이미지 열기
    img = Image.open(image_path)
    # 이미지를 RGB 배열로 변환
    img_array = np.array(img)
    return img_array.flatten()  # 이미지를 1차원 벡터로 변환하여 특징 추출

def compute_cosine_similarity(features):
    similarity_matrix = cosine_similarity(features, features)
    return similarity_matrix

def get_all_image_paths(root_dir):
    image_paths = []
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename.lower().endswith(('jpg', 'jpeg', 'png')):
                image_paths.append(os.path.join(dirpath, filename))
    return image_paths

def cluster_images(image_dir, num_clusters):
    image_paths = get_all_image_paths(image_dir)
    all_features = [extract_features(image_path) for image_path in image_paths]

    # 코사인 유사도 행렬 계산
    similarity_matrix = compute_cosine_similarity(all_features)

    kmeans = KMeans(n_clusters=num_clusters, random_state=42)
    kmeans.fit(similarity_matrix)
    
    # 클러스터별로 이미지 경로 저장
    clusters = defaultdict(list)
    for idx, label in enumerate(kmeans.labels_):
        clusters[label].append(image_paths[idx])

    return dict(clusters)

# 이미지 디렉토리 경로와 클러스터 수를 지정하여 호출
image_directory = "your_image_directory"  # 이미지 디렉토리 경로를 입력하세요
num_clusters = 5  # 클러스터 수를 원하는 값으로 지정하세요

image_clusters = cluster_images(image_directory, num_clusters)

#------------------------------------------------------------------------------------------#
from sklearn.metrics import confusion_matrix

# K-means 클러스터링 결과와 이미지 폴더에 따른 실제 클래스 레이블 비교 함수
def compare_labels(kmeans_labels, folder_labels):
    confusion_mat = confusion_matrix(kmeans_labels, folder_labels)
    return confusion_mat

# K-means 클러스터링 결과를 기반으로 예측된 레이블
kmeans_predicted_labels = [get_predicted_label(image_path, image_clusters) for image_path in image_paths]
# 이미지 폴더에 따른 실제 클래스 레이블
folder_true_labels = [get_true_label(image_path) for image_path in image_paths]

# Confusion Matrix 생성
confusion_matrix_result = compare_labels(kmeans_predicted_labels, folder_true_labels)
print("Confusion Matrix:")
print(confusion_matrix_result)
