In [1]:
import torch
import numpy as np
import os
from sklearn.cluster import KMeans


## 直接合并

In [2]:
def load_and_merge_features(base_path):
    # 读取目录并按数值排序
    categories = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]
    categories.sort(key=int)  # 将目录名转换为整数进行排序
    print(categories)
    all_features = []
    
    for category in categories:
        category_path = os.path.join(base_path, category)
        features_files = [f for f in os.listdir(category_path) if f.endswith('.npy')]
        category_features = [np.load(os.path.join(category_path, f)) for f in features_files]
        
        if category_features:
            category_features = np.concatenate(category_features, axis=0)
            all_features.append(category_features)
    
    if all_features:
        # 使用 np.stack 添加新的维度来区分不同的类别
        all_features = np.stack(all_features, axis=0)
    
    return all_features

In [3]:
coco_attribute_path = "../exemplar_prototype/coco/ins100_attribute/embeddings/"
all_features = load_and_merge_features(coco_attribute_path)

['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65']


In [4]:
all_features.shape

(65, 500, 2048)

In [5]:
coco_attribute_save_path = "../exemplar_prototype/coco/coco_ins100_attribute_perimg.pth"
tensor = torch.from_numpy(all_features)
torch.save(tensor, coco_attribute_save_path)

## 单类聚类合并

In [2]:
import numpy as np
import os
from sklearn.cluster import KMeans

# 设置数据目录
data_directory = '/raid/mqsen/CKDet/exemplar_prototype/coco/ins100_attribute/embeddings/2'

# 加载所有.npy文件
files = [f for f in os.listdir(data_directory) if f.endswith('.npy')]
data = [np.load(os.path.join(data_directory, f)) for f in files]
combined_data = np.concatenate(data, axis=0)
print("combined_data.shape: ", combined_data.shape)
# 使用k-means++进行聚类
kmeans = KMeans(n_clusters=10, init='k-means++')
kmeans.fit(combined_data)

# 输出聚类结果
print("Cluster centers:", kmeans.cluster_centers_)
print("Labels:", kmeans.labels_)
print("kmeans.cluster_centers_.shape: ", kmeans.cluster_centers_.shape)

FileNotFoundError: [Errno 2] No such file or directory: '/raid/mqsen/CKDet/exemplar_prototype/coco/ins100_attribute/embeddings/2'

In [None]:
import numpy as np
import os
from sklearn.mixture import GaussianMixture  # 修改这里

# 设置数据目录
data_directory = '/raid/mqsen/CKDet/exemplar_prototype/coco/ins100_attribute/embeddings/2'

# 加载所有.npy文件
files = [f for f in os.listdir(data_directory) if f.endswith('.npy')]
data = [np.load(os.path.join(data_directory, f)) for f in files]
combined_data = np.concatenate(data, axis=0)
print("combined_data.shape: ", combined_data.shape)

# 使用高斯混合模型（GMM）进行聚类，指定聚类数目
gmm = GaussianMixture(n_components=10, init_params='kmeans')  # 替换 KMeans 为 GaussianMixture
gmm.fit(combined_data)

# 输出聚类结果
print("Cluster centers:", gmm.means_)  # GMM 中的簇中心是 gmm.means_
print("Labels:", gmm.predict(combined_data))  # 使用 predict 得到数据的簇标签
print("gmm.means_.shape: ", gmm.means_.shape)

In [10]:
import numpy as np
import os
from sklearn.cluster import KMeans, MiniBatchKMeans, AgglomerativeClustering, Birch
from sklearn.mixture import GaussianMixture

def load_and_merge_features(base_path, n_clusters, method='kmeans'):
    # 读取目录并按数值排序
    categories = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]
    categories.sort(key=int)  # 将目录名转换为整数进行排序
    print(categories)
    all_features = []
    
    for category in categories:
        category_path = os.path.join(base_path, category)
        features_files = [f for f in os.listdir(category_path) if f.endswith('.npy')]
        half_length = len(features_files) // 2 # 消融实验用，之后使用要注释掉
        features_files = features_files[:half_length] # 消融实验用，之后使用要注释掉
        
        category_features = [np.load(os.path.join(category_path, f)) for f in features_files]
     
        if category_features:
            category_features = np.concatenate(category_features, axis=0)
            
            if method == 'kmeans':
                kmeans = KMeans(n_clusters=n_clusters, init='k-means++')
                kmeans.fit(category_features)
                all_features.append(kmeans.cluster_centers_)
                
            elif method == 'gmm':
                gmm = GaussianMixture(n_components=n_clusters, init_params='kmeans')
                gmm.fit(category_features)
                all_features.append(gmm.means_)
                
            elif method == 'kmeans++':
                kmeans_plus = KMeans(n_clusters=n_clusters, init='k-means++')
                kmeans_plus.fit(category_features)
                all_features.append(kmeans_plus.cluster_centers_)
            elif method == 'agglomerative':
                model = AgglomerativeClustering(n_clusters=n_clusters)
                model.fit(category_features)
                # 注意，AgglomerativeClustering没有 cluster_centers_，这里取每个簇的平均值作为中心
                centers = [category_features[model.labels_ == i].mean(axis=0) for i in range(n_clusters)]
                all_features.append(np.array(centers))
            elif method == 'birch':
                model = Birch(n_clusters=n_clusters)
                model.fit(category_features)
                # Birch 中的簇中心
                all_features.append(model.subcluster_centers_)
    
    if all_features:
        # 使用 np.stack 添加新的维度来区分不同的类别
        all_features = np.stack(all_features, axis=0)
    
    return all_features

# 使用示例
coco_attribute_path = "../exemplar_prototype/coco/ins100_attribute/embeddings/"
n_clusters = 15


all_features = load_and_merge_features(coco_attribute_path, n_clusters, method='kmeans++')

# 可以选择不同的聚类方法
# all_features_kmeans = load_and_merge_features(coco_attribute_path, n_clusters, method='kmeans')
# all_features_gmm = load_and_merge_features(coco_attribute_path, n_clusters, method='gmm')
# all_features_kmeans_plus = load_and_merge_features(coco_attribute_path, n_clusters, method='kmeans++')
# all_features_mini_batch = load_and_merge_features(coco_attribute_path, n_clusters, method='mini_batch_kmeans') # list形式，不难直接用
# all_features_agglomerative = load_and_merge_features(coco_attribute_path, n_clusters, method='agglomerative') # all input arrays must have the same shape
# all_features_birch = load_and_merge_features(coco_attribute_path, n_clusters, method='birch')

['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65']


In [12]:
print("all_features.shape: ", all_features.shape)

all_features.shape:  (65, 20, 2048)


In [13]:
print("all_features_birch.shape: ", all_features_birch.shape)

all_features_agglomerative.shape:  (65, 15, 2048)


In [None]:
print("all_features_kmeans.shape: ", all_features_kmeans.shape)
print("all_features_kmeans_plus.shape: ", all_features_kmeans.shape)
print("all_features_gmm.shape: ", all_features_gmm.shape)

In [5]:
method = "kmeans"
coco_attribute_save_path = f"../exemplar_prototype/coco/coco_ins100_attribute_comparision_{method}.pth"
tensor = torch.from_numpy(all_features_kmeans)
torch.save(tensor, coco_attribute_save_path)

In [14]:

method = "agglomerative"
coco_attribute_save_path = f"../exemplar_prototype/coco/coco_ins100_attribute_comparision_{method}.pth"
tensor = torch.from_numpy(all_features_agglomerative)
torch.save(tensor, coco_attribute_save_path)

In [11]:
coco_attribute_save_path = f"../exemplar_prototype/coco/coco_ins50_attribute_kmeans{n_clusters}_scale23.pth"
tensor = torch.from_numpy(all_features)
torch.save(tensor, coco_attribute_save_path)