1. 载入数据

In [None]:
import numpy as np
import pandas as pd

IMAGE_DATA_FILE = "../data/caltech-256_features.npz"
CLASS_NAME_FILE = "../data/256_ObjectCategories_map_ZH.csv"

image_data = np.load(IMAGE_DATA_FILE)
# print(f"Components of image_data: {list(image_data.keys())}")

X_vit = image_data.get("vit_features")
print(f"The shape of X_vit is {X_vit.shape}")

X_clip = image_data.get("clip_features")
print(f"The shape of X_clip is {X_clip.shape}")

y = image_data.get("labels")
print(f"The shape of y is {y.shape}")

class_name_df = pd.read_csv(CLASS_NAME_FILE)
class_name_map = class_name_df.set_index("class")["handle"].to_dict()

2. 数据标准化，分别针对 ViT 和 Clip 两模型的输出特征，在待分析(聚类或可视化)样本集合上，尝试多种强度（对原始特征的影响）不同的标准化手段：
    - 无标准化。
    - 样本点 L2 范数均值的单位化，即仅放缩两个模型的输出，使全部待分析样本点分布在单位球面附近。
    - 各特征的 Standard Scaler, 使全部待分析样本点分布呈近似单位立方体分布, scipy.cluster.vq.whiten or sklearn.preprocessing.StandardScaler。
    - PCA/ZCA Whitening。

In [None]:
from standardization import get_standard_data

# STANDARD_METHOD = None
# STANDARD_METHOD = "l2_norm"
STANDARD_METHOD = "feature_standard"
# STANDARD_METHOD = "PCA_whiten"
# STANDARD_METHOD = "ZCA_whiten"

X_vit = get_standard_data(X_vit, STANDARD_METHOD)
X_clip = get_standard_data(X_clip, STANDARD_METHOD)

3. 获取各类别的表示
    - 在数据中剔除“其它”
    - 各类别全部样本点的重心。

In [None]:
labels = np.unique(y)
# 最后一类是其它，后续分析应剔除
print(f"The last categoty is \"{labels[-1]}\".")
not_clutter_index = np.where(y != labels[-1])
X_vit = X_vit[not_clutter_index]
X_clip = X_clip[not_clutter_index]
y = y[not_clutter_index]
print(f"Samples of Category \"{labels[-1]}\" are removed.")

X_vit_class = np.array([np.mean(X_vit[np.where(y == label)], axis=0)
                        for label in labels[:-1]])
X_clip_class = np.array([np.mean(X_clip[np.where(y == label)], axis=0)
                         for label in labels[:-1]])
y_class = np.array([label for label in labels[:-1]])

In [None]:
from sklearn.manifold import TSNE

# FIT_LEVEL = "image"
FIT_LEVEL = "class"

# METRIC = "euclidean"
METRIC = "cityblock"

N_ITER = 10000
VERBOSE = 1
N_JOBS = 12

if FIT_LEVEL == "image":

    PERPLEXITY = 30
    transformer = TSNE(
        n_components=2,
        perplexity=PERPLEXITY,
        early_exaggeration=12,
        learning_rate="auto",
        n_iter=N_ITER,
        n_iter_without_progress=300,
        metric=METRIC,
        init="pca",
        verbose=VERBOSE,
        random_state=0,
        method="barnes_hut",
        angle=0.5,
        n_jobs=N_JOBS
    )

    X_vit_class_embedded = transformer.fit_transform(
        np.vstack((X_vit, X_vit_class)))[-len(X_vit_class):]
    X_clip_class_embedded = transformer.fit_transform(
        np.vstack((X_clip, X_clip_class)))[-len(X_clip_class):]

elif FIT_LEVEL == "class":

    PERPLEXITY = 10
    transformer = TSNE(
        n_components=2,
        perplexity=PERPLEXITY,
        early_exaggeration=12,
        learning_rate="auto",
        n_iter=N_ITER,
        n_iter_without_progress=300,
        metric=METRIC,
        init="pca",
        verbose=VERBOSE,
        random_state=0,
        method="barnes_hut",
        angle=0.5,
        n_jobs=N_JOBS
    )

    X_vit_class_embedded = transformer.fit_transform(X_vit_class)
    X_clip_class_embedded = transformer.fit_transform(X_clip_class)

else:
    ValueError(
        f"Target level {FIT_LEVEL} not available, only image or class.")

In [None]:
import matplotlib.pyplot as plt
from matplotlib import colormaps
# from sklearn.preprocessing import MinMaxScaler
# from IPython.display import display, clear_output

FIGURE_PATH = "../results/t-SNE/"
FIGURE_FILE = f"fit_{FIT_LEVEL}_transform_class_&_{STANDARD_METHOD}_preprocess_&_{METRIC}_metric_&_{N_ITER}_iters"

fig, axes = plt.subplots(1, 2,
                         figsize=(70, 30),
                         gridspec_kw={'width_ratios': [1, 1]})
# plt.show()


def plot_embedding(X, y, ax, title):
    # X = MinMaxScaler().fit_transform(X)

    labels = np.unique(y)
    print(f"There are {len(labels)} classes in {title} data.")

    cmap = colormaps.get_cmap("rainbow")
    colors = cmap(np.linspace(0, 1, len(labels)))
    np.random.seed(0)
    np.random.shuffle(colors)

    for i, label in enumerate(labels):
        ax.scatter(
            *X[y == label].T,
            color=colors[i]
        )
        # display(fig)
        # clear_output(wait = True)

    for i in range(len(y)):
        y_code, y_en = y[i].split(".")
        y_zh = class_name_map[y_en]
        ax.annotate(y_code+"."+y_zh,
                    (X[i][0], X[i][1]), fontproperties="SimHei")

    ax.set_title(title, fontsize=30)


plot_embedding(X_vit_class_embedded, y_class, axes[0], "vit_class")
plot_embedding(X_clip_class_embedded, y_class, axes[1], "clip_class")


plt.suptitle(FIGURE_FILE, x=0.5, y=0.98, fontsize=40)
plt.savefig(FIGURE_PATH+FIGURE_FILE+".pdf", format="pdf")