In [None]:
## 提取图像和标签

import pandas as pd
import os
import numpy as np
import cv2
from PIL import Image

image_size = 224          # 指定图片大小
path = './Animals_with_Attributes2/'   #文件读取路径

classname = pd.read_csv(path+'classes.txt',header=None,sep = '\t')
dic_class2name = {classname.index[i]:classname.loc[i][1] for i in range(classname.shape[0])}    
dic_name2class = {classname.loc[i][1]:classname.index[i] for i in range(classname.shape[0])}
# 两个字典，记录标签信息，分别是数字对应到文字，文字对应到数字

#根据目录读取一类图像，read_num指定每一类读取多少图片，图片大小统一为image_size
def load_Img(imgDir,read_num = 'max'):
    imgs = os.listdir(imgDir)
    imgs = np.ravel(pd.DataFrame(imgs).sort_values(by=0).values)
    if read_num == 'max':
        imgNum = len(imgs)
    else:
        imgNum = read_num
    data = np.empty((imgNum,image_size,image_size,3),dtype="float32")
    print(imgDir, ' : ', imgNum)
    for i in range (imgNum):
        img = Image.open(imgDir+"/"+imgs[i])
        arr = np.asarray(img,dtype="float32")
        if arr.shape[1] > arr.shape[0]:
            arr = cv2.copyMakeBorder(arr,int((arr.shape[1]-arr.shape[0])/2),int((arr.shape[1]-arr.shape[0])/2),0,0,cv2.BORDER_CONSTANT,value=0)
        else:
            arr = cv2.copyMakeBorder(arr,0,0,int((arr.shape[0]-arr.shape[1])/2),int((arr.shape[0]-arr.shape[1])/2),cv2.BORDER_CONSTANT,value=0)       #长宽不一致时，用padding使长宽一致
        arr = cv2.resize(arr,(image_size,image_size))
        if len(arr.shape) == 2:
            temp = np.empty((image_size,image_size,3))
            temp[:,:,0] = arr
            temp[:,:,1] = arr
            temp[:,:,2] = arr
            arr = temp        
        # data 可以取到第i张图片
        data[i,:,:,:] = arr
    return data,imgNum  

#读取数据
def load_data(all_classes,num):
    read_num = num
    
    traindata_list = []
    trainlabel_list = []
    testdata_list = []
    testlabel_list = []    
    
    for item in all_classes.iloc[:,0].values.tolist():
        tup = load_Img(path+'JPEGImages/'+item,read_num=read_num)
        # 一共有 tup[1] 张图片
        trainNum = int(tup[1] * 0.6)
        testNum = tup[1] - trainNum
        # 加载训练集
        traindata_list.append(tup[0][0:trainNum])
        trainlabel_list += [dic_name2class[item]]*trainNum

        # 加载测试集
        testdata_list.append(tup[0][trainNum:])
        testlabel_list += [dic_name2class[item]]*testNum
    
    np1 = np.row_stack(traindata_list)
    np2 = np.array(trainlabel_list)
    np3 = np.row_stack(testdata_list)
    np4 = np.array(testlabel_list)

    return np1,np2,np3,np4

all_classes = pd.read_csv(path+'allclasses.txt',header=None)

traindata,trainlabel,testdata,testlabel = load_data(all_classes,num='max')

print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape)

#降图像和标签保存为numpy数组，下次可以直接读取
np.save(path+'AWA2_224_traindata.npy',traindata)
np.save(path+'AWA2_224_testdata.npy',testdata)

np.save(path+'AWA2_trainlabel.npy',trainlabel)
np.save(path+'AWA2_testlabel.npy',testlabel)


In [None]:
# 使用resnet50进行特征提取并保存

import torch
import numpy as np
from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader
import torchvision.models as models
import torch.nn as nn

class AwADataset(Dataset):
    def __init__(self, data, label, is_normalize=True):
        self.data = torch.tensor(data, dtype=torch.float32) / 255.0
        self.label = torch.tensor(label, dtype=torch.int8).unsqueeze(-1)

        if is_normalize:
            self.all_imgs = transforms.Normalize(mean=[0.485, 0.456,0.406],std=[0.229, 0.224, 0.2])(self.all_imgs)
    
    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self, index):
        return self.data[index], self.label[index]

traindata = np.load('./Animals_with_Attributes2/AWA2_224_traindata.npy')
traindata = traindata.transpose(0,3,1,2)
print(traindata.shape)
trainlabel = np.load('./Animals_with_Attributes2/AWA2_trainlabel.npy')
testdata = np.load('./Animals_with_Attributes2/AWA2_224_testdata.npy')
testdata = testdata.transpose(0,3,1,2)
print(testdata.shape)
testlabel = np.load('./Animals_with_Attributes2/AWA2_testlabel.npy')

train_dataset = AwADataset(traindata, trainlabel, is_normalize=False)
train_dataLoader = DataLoader(dataset=train_dataset, batch_size=1, shuffle=False)

test_dataset = AwADataset(testdata, testlabel, is_normalize=False)
test_dataLoader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False)

# 数据集加载正确性测试
# data_iter = iter(train_dataLoader)
# for j in range(10):
#     for i in range(1000):
#         data_iter.__next__()
#     data, label = data_iter.__next__()
#     print('label: ', dic_class2name[label[0][0].item()])
#     print(data[0].shape)
#     display(transforms.ToPILImage()(data[0]))


device = torch.device('cuda:0')
resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT).to(device)

resnet.fc = nn.Sequential() # 去掉分类器，提取出2048维的图像特征
# print(resnet)

train_features = []
train_labels = []
test_features = []
test_labels = []

count = 0

with torch.no_grad():
    for data, label in train_dataLoader:
        data = data.to(device)
        output = resnet(data)
        train_features.append(output[0].to('cpu').numpy())
        train_labels.append(label[0].numpy())
        count += 1

print('train count : ', count)

count = 0
with torch.no_grad():
    for data, label in test_dataLoader:
        data = data.to(device)
        output = resnet(data)
        test_features.append(output[0].to('cpu').numpy())
        test_labels.append(label[0].numpy())
        count += 1
        
print('test count : ', count)

train_features = np.array(train_features)
train_labels = np.array(train_labels)
print(train_features.shape)
print(train_labels.shape)
np.save('./train_features.npy', train_features)
np.save('./train_labels.npy', train_labels)

test_features = np.array(test_features)
test_labels = np.array(test_labels)
print(test_features.shape)
print(test_labels.shape)
np.save('./test_features.npy', test_features)
np.save('./test_labels.npy', test_labels)