In [3]:
from sklearn.cluster import KMeans
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import torch
import torchvision.models as models
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset
import random
from PIL import Image
import linecache
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import h5py

In [4]:
# 孪生网络提取特征的Dataset
class Eval_Dataset(Dataset):

    def __init__(self, txt, transform=None, initial=False):
        self.transform = transform
        self.txt = txt
        self.initial = initial

    def __getitem__(self, index):
        line = linecache.getline(self.txt, index+1).strip('\n')
        img = Image.open(line)

        if not self.initial:
            img = img.convert("L")

        if self.transform is not None:
            img = self.transform(img)
        
        return img

    def __len__(self):
        with open(self.txt, 'r') as f:
            num = len(f.readlines())
        return num  

In [121]:
# 生成图像路径txt文件
def generate_img_txt(root):
    f = open('./images.txt','w')
    for i in range(3):
        for j in range(900):
            if j < 9:
                img_path = root+str(i)+'/images00000'+str(j+1)+'.jpg'
            elif j < 99:
                img_path = root+str(i)+'/images0000'+str(j+1)+'.jpg'
            else:
                img_path = root+str(i)+'/images000'+str(j+1)+'.jpg'
            f.write(img_path+'\n')
    f.close()

# 降维加聚类
def feature2label(features, dim_reduction='pca'):
    if dim_reduction == 'tsne':
        tsne = TSNE(n_components=2)
        X = tsne.fit_transform(features)
    else:
        pca = PCA(n_components=2)
        X = pca.fit_transform(features)
    label = KMeans(5).fit_predict(features)
    return X,label

# 预训练模型提取特征
def pretrain(dataloader):
    pre_model = models.resnet18(pretrained=True).to(device)
    result = []
    for img in dataloader:
        pre_model.fc = nn.ReLU()
        pre_model.eval()
        with torch.no_grad():
            img = img.to(device)
            feature = pre_model(img).data.cpu().numpy().squeeze()
            result.append(feature)
    return result

In [58]:
def pre_work():
    init_dataset = Eval_Dataset('images.txt',transforms.ToTensor(),initial=True)
    init_dataloader = DataLoader(init_dataset)

    init_feature = pretrain(init_dataloader)
    init_labellist = feature2label(init_feature)
    return init_labellist[1]

In [125]:
generate_img_txt(root='./5gjq_snr_0_1_2_new/')

In [127]:
init_dataset = Eval_Dataset('./images.txt',transforms.ToTensor(),initial=True)
init_dataloader = DataLoader(init_dataset)

In [126]:
temp = []
for img in init_dataloader:
    x = np.reshape(img.numpy(), (1,-1))
    temp.append(x.squeeze())

In [101]:
(temp[0])

array([0.45490196, 0.4509804 , 0.32941177, ..., 0.7137255 , 0.43529412,
       0.44705883], dtype=float32)

In [91]:
label = KMeans(3).fit_predict(temp)

In [29]:
label_true = []
for i in range(3):
    for j in range(900):
        label_true.append(i)

In [24]:
f = h5py.File('./5k0y_snr_0_1_2_new.h5','r')

In [25]:
temp = []
for img in f['X']:
    x = np.reshape(img, (1,-1))
    temp.append(x.squeeze())

In [26]:
label = KMeans(3).fit_predict(temp)
label

array([1, 1, 1, ..., 2, 0, 0], dtype=int32)

In [27]:
if 4 in label:
    print('1')

In [30]:
from sklearn.metrics.cluster import v_measure_score
v_measure_score(label_true, label)

0.3382663931639901

In [31]:
label_0 = []
label_1 = []
label_2 = []
for i in range(len(f['X'])):
    if label[i] == 0:
        label_0.append(i)
    elif label[i] == 1:
        label_1.append(i)
    else:
        label_2.append(i)

In [32]:
f = h5py.File('./5k0y_kmeans_label.h5', 'w')
f.create_dataset('0', data=label_0)
f.create_dataset('1', data=label_1)
f.create_dataset('2', data=label_2)
f.close()