# Images
## TSNE

In [None]:
import os
import numpy as np
from PIL import Image
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import matplotlib.patheffects as PathEffects
import random


In [None]:
def load_images_and_labels(base_path, target_size=(64, 64)):
    images = []
    labels = []
    label_dict = {}
    current_label = 0
    for folder in sorted(os.listdir(base_path)):
        folder_path = os.path.join(base_path, folder)
        if os.path.isdir(folder_path):
            label_dict[folder] = current_label
            for file in sorted(os.listdir(folder_path)):
                if file.endswith(".png"):
                    img_path = os.path.join(folder_path, file)
                    img = Image.open(img_path).resize(target_size)
                    img_array = np.asarray(img)
                    images.append(img_array)
                    labels.append(current_label)
            current_label += 1
    return np.array(images), np.array(labels), label_dict


In [None]:
base_path = "/root/shiym_proj/DiffLook/notebooks/img_datasets"
images, labels, label_dict = load_images_and_labels(base_path)

print(images.shape)


n_samples, width, height, channels = images.shape
X = images.reshape((n_samples, width * height * channels))
X = X / 255.0

# 随机选择一个子集
indices = np.arange(len(X))
random.shuffle(indices)
n_select = 1000000  # 根据你的计算能力选择
X_subset = X[indices[:n_select]]
y_subset = labels[indices[:n_select]]

tsne = TSNE(n_components=2, random_state=0)
X_2d = tsne.fit_transform(X_subset)



In [None]:
def scatter(x, colors):
    palette = np.array(plt.cm.jet(np.linspace(0, 1, len(set(colors)))))
    f = plt.figure(figsize=(8, 8))
    ax = plt.subplot(aspect='equal')
    # sc = ax.scatter(x[:,0], x[:,1], lw=0, s=40, c=palette[colors.astype(np.int)])
    sc = ax.scatter(x[:,0], x[:,1], lw=0, s=40, c=palette[colors.astype(int)])

    plt.xlim(-25, 25)
    plt.ylim(-25, 25)
    ax.axis('off')
    ax.axis('tight')
    
    # 添加标签
    txts = []
    for i in range(len(set(colors))):
        xtext, ytext = np.median(x[colors == i, :], axis=0)
        txt = ax.text(xtext, ytext, str(i), fontsize=24)
        txt.set_path_effects([
            PathEffects.Stroke(linewidth=5, foreground="w"),
            PathEffects.Normal()])
        txts.append(txt)

scatter(X_2d, y_subset)
plt.show()


## UMAP

In [None]:
import os
import numpy as np
from PIL import Image
import umap
import matplotlib.pyplot as plt
import matplotlib.patheffects as PathEffects
import random


In [None]:
reducer = umap.UMAP(random_state=42)
X_2d = reducer.fit_transform(X_subset)

def scatter(x, colors):
    palette = np.array(plt.cm.jet(np.linspace(0, 1, len(set(colors)))))
    f = plt.figure(figsize=(8, 8))
    ax = plt.subplot(aspect='equal')
    # sc = ax.scatter(x[:,0], x[:,1], lw=0, s=40, c=palette[colors.astype(np.int)])
    sc = ax.scatter(x[:,0], x[:,1], lw=0, s=40, c=palette[colors.astype(int)])

    plt.xlim(-25, 25)
    plt.ylim(-25, 25)
    ax.axis('off')
    ax.axis('tight')
    
    # 添加标签
    txts = []
    for i in range(len(set(colors))):
        xtext, ytext = np.median(x[colors == i, :], axis=0)
        txt = ax.text(xtext, ytext, str(i), fontsize=24)
        txt.set_path_effects([
            PathEffects.Stroke(linewidth=5, foreground="w"),
            PathEffects.Normal()])
        txts.append(txt)

scatter(X_2d, y_subset)
plt.show()
