# 计算测试集图像语义特征

## 导入工具包

In [1]:
from tqdm import tqdm

import pandas as pd
import numpy as np

import torch

import cv2
from PIL import Image

# 忽略烦人的红色提示
import warnings
warnings.filterwarnings("ignore")

# 有 GPU 就用 GPU，没有就用 CPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('device', device)

device cuda:0


## 图像预处理

In [2]:
from torchvision import transforms

# # 训练集图像预处理：缩放裁剪、图像增强、转 Tensor、归一化
# train_transform = transforms.Compose([transforms.RandomResizedCrop(224),
#                                       transforms.RandomHorizontalFlip(),
#                                       transforms.ToTensor(),
#                                       transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
#                                      ])

# 测试集图像预处理-RCTN：缩放、裁剪、转 Tensor、归一化
test_transform = transforms.Compose([transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize(
                                         mean=[0.485, 0.456, 0.406], 
                                         std=[0.229, 0.224, 0.225])
                                    ])

## 导入训练好的模型

In [4]:
model = torch.load('checkpoint/animal_pytorch_C1.pth')
model = model.eval().to(device)

## 抽取模型中间层输出结果作为语义特征

In [5]:
from torchvision.models.feature_extraction import create_feature_extractor

In [6]:
model_trunc = create_feature_extractor(model, return_nodes={'avgpool': 'semantic_feature'})

## 计算单张图像的语义特征

In [8]:
img_path = 'dataset_split/val/乌龟/106.jpg'
img_pil = Image.open(img_path)
input_img = test_transform(img_pil) # 预处理
input_img = input_img.unsqueeze(0).to(device)
# 执行前向预测，得到指定中间层的输出
pred_logits = model_trunc(input_img) 

In [9]:
pred_logits['semantic_feature'].squeeze().detach().cpu().numpy().shape

(512,)

In [10]:
# pred_logits['semantic_feature'].squeeze().detach().cpu().numpy()

## 载入测试集图像分类结果

In [11]:
df = pd.read_csv('测试集预测结果.csv')

In [12]:
df.head()

Unnamed: 0,图像路径,标注类别ID,标注类别名称,top-1-预测ID,top-1-预测名称,top-2-预测ID,top-2-预测名称,top-3-预测ID,top-3-预测名称,top-n预测正确,乌龟-预测置信度,仓鼠-预测置信度,兔子-预测置信度,狗-预测置信度,猫-预测置信度,金鱼-预测置信度,鹦鹉-预测置信度
0,dataset_split/val/乌龟/0.jpg,0,乌龟,0.0,乌龟,5.0,金鱼,3.0,狗,1.0,0.99979,1.3e-05,5.688636e-07,1.9e-05,1.5e-05,0.000151,1.2e-05
1,dataset_split/val/乌龟/106.jpg,0,乌龟,0.0,乌龟,6.0,鹦鹉,5.0,金鱼,1.0,0.970566,0.000354,0.0009084896,0.000648,0.001596,0.005402,0.020526
2,dataset_split/val/乌龟/115.jpg,0,乌龟,0.0,乌龟,5.0,金鱼,1.0,仓鼠,1.0,0.963122,0.004735,0.000522849,0.002156,0.000909,0.027109,0.001446
3,dataset_split/val/乌龟/123.jpg,0,乌龟,0.0,乌龟,5.0,金鱼,6.0,鹦鹉,1.0,0.991517,0.000895,0.0002685588,0.000279,8.8e-05,0.003942,0.00301
4,dataset_split/val/乌龟/164.jpg,0,乌龟,0.0,乌龟,5.0,金鱼,3.0,狗,1.0,0.999598,1e-05,6.147398e-05,8.8e-05,2.2e-05,0.000191,3e-05


## 计算测试集每张图像的语义特征

In [13]:
encoding_array = []
img_path_list = []

for img_path in tqdm(df['图像路径']):
    img_path_list.append(img_path)
    img_pil = Image.open(img_path).convert('RGB')
    input_img = test_transform(img_pil).unsqueeze(0).to(device) # 预处理
    feature = model_trunc(input_img)['semantic_feature'].squeeze().detach().cpu().numpy() # 执行前向预测，得到 avgpool 层输出的语义特征
    encoding_array.append(feature)
encoding_array = np.array(encoding_array)

100%|██████████| 576/576 [00:08<00:00, 67.37it/s]


In [14]:
encoding_array.shape

(576, 512)

## 保存为本地的.npy文件

In [15]:
# 保存为本地的 npy 文件
np.save('测试集语义特征.npy', encoding_array)