# clustering similar images 

In [3]:
# Import necessary libraries
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

# Function to load images from a directory
def load_images_from_directory(directory_path, image_size=(64, 64)):
    images = []
    for filename in os.listdir(directory_path):
        img_path = os.path.join(directory_path, filename)
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, image_size)  # Resize to keep dimensions consistent
            images.append(img)
    return np.array(images)

# Load images from two directories
Mix_testData = load_images_from_directory(r"D:\Model_eval\test_data\case\結合データセット\images")
v360_testData = load_images_from_directory(r"\\wsl.localhost\Debian\home\akashanil\Projects\yolov5_hyouka\test_datasets\new\v360_test_dataset\images")

# Combine the images from both sets for comparison
all_images = np.concatenate((Mix_testData, v360_testData))

# Preprocess the images: Flatten the images (turn into 1D vectors)
all_images_flattened = all_images.reshape(len(all_images), -1)

# Step 1: Apply KMeans Clustering to the flattened images
n_clusters = 5  # Number of clusters to group images
kmeans = KMeans(n_clusters=n_clusters)
kmeans_labels = kmeans.fit_predict(all_images_flattened)

# Step 2: Reduce dimensionality with PCA or t-SNE for visualization

# Option 1: PCA
pca = PCA(n_components=2)
images_pca = pca.fit_transform(all_images_flattened)

# Option 2: t-SNE (slower but sometimes gives better separation)
# tsne = TSNE(n_components=2, perplexity=30, random_state=42)
# images_tsne = tsne.fit_transform(all_images_flattened)

# Visualize the clusters with PCA
plt.figure(figsize=(10, 6))
plt.scatter(images_pca[:, 0], images_pca[:, 1], c=kmeans_labels, cmap='viridis')
plt.title('Image Clustering with PCA')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.colorbar(label='Cluster')
plt.show()

# You can repeat the above with t-SNE if you want to compare the difference:
# Visualize the clusters with t-SNE
# plt.figure(figsize=(10, 6))
# plt.scatter(images_tsne[:, 0], images_tsne[:, 1], c=kmeans_labels, cmap='viridis')
# plt.title('Image Clustering with t-SNE')
# plt.xlabel('t-SNE Component 1')
# plt.ylabel('t-SNE Component 2')
# plt.colorbar(label='Cluster')
# plt.show()


ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 1 has 4 dimension(s)