# 计算测试集图像语义特征

抽取Pytorch训练得到的图像分类模型中间层的输出特征，作为输入图像的语义特征。

计算测试集所有图像的语义特征，使用t-SNE和UMAP两种降维方法降维至二维和三维，可视化。

分析不同类别的语义距离、异常数据、细粒度分类、高维数据结构。

同济子豪兄：https://space.bilibili.com/1900783

[代码运行云GPU环境](https://featurize.cn/?s=d7ce99f842414bfcaea5662a97581bd1)：GPU RTX 3060、CUDA v11.2

## 导入工具包

In [1]:
from tqdm import tqdm

import pandas as pd
import numpy as np

import torch

import cv2
from PIL import Image

# 忽略烦人的红色提示
import warnings
warnings.filterwarnings("ignore")

# 有 GPU 就用 GPU，没有就用 CPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('device', device)

device cuda:0


## 图像预处理

In [2]:
from torchvision import transforms

# # 训练集图像预处理：缩放裁剪、图像增强、转 Tensor、归一化
# train_transform = transforms.Compose([transforms.RandomResizedCrop(224),
#                                       transforms.RandomHorizontalFlip(),
#                                       transforms.ToTensor(),
#                                       transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
#                                      ])

# 测试集图像预处理-RCTN：缩放、裁剪、转 Tensor、归一化
test_transform = transforms.Compose([transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize(
                                         mean=[0.485, 0.456, 0.406], 
                                         std=[0.229, 0.224, 0.225])
                                    ])

## 导入训练好的模型

In [3]:
model = torch.load('best-0.839.pth')
model = model.eval().to(device)

## 抽取模型中间层输出结果作为语义特征

In [4]:
from torchvision.models.feature_extraction import create_feature_extractor

In [5]:
model_trunc = create_feature_extractor(model, return_nodes={'avgpool': 'semantic_feature'})

## 计算单张图像的语义特征

In [6]:
img_path = 'D:/dataset/c100sp/val/山/3.jpg'
img_pil = Image.open(img_path)
input_img = test_transform(img_pil) # 预处理
input_img = input_img.unsqueeze(0).to(device)
# 执行前向预测，得到指定中间层的输出
pred_logits = model_trunc(input_img) 

In [7]:
pred_logits['semantic_feature'].squeeze().detach().cpu().numpy().shape

(512,)

In [19]:
# pred_logits['semantic_feature'].squeeze().detach().cpu().numpy()

## 载入测试集图像分类结果

In [8]:
df = pd.read_csv('测试集预测结果.csv')

In [9]:
df.head()

Unnamed: 0,图像路径,标注类别ID,标注类别名称,top-1-预测ID,top-1-预测名称,top-2-预测ID,top-2-预测名称,top-3-预测ID,top-3-预测名称,top-n预测正确,...,马路-预测置信度,骆驼-预测置信度,鲨鱼-预测置信度,鲸鱼-预测置信度,鳄鱼-预测置信度,鳐-预测置信度,鳟鱼-预测置信度,黑猩猩-预测置信度,鼩鼱-预测置信度,龙虾-预测置信度
0,D:\dataset\c100sp\val\云\1.jpg,0,云,0,云,7,大海,14,平原,True,...,6e-05,4e-05,1.181372e-07,4e-06,2.977387e-09,6.304667e-09,3.189405e-09,4.727857e-11,2.193562e-09,2.824035e-09
1,D:\dataset\c100sp\val\云\104.jpg,0,云,14,平原,0,云,90,马路,True,...,0.069697,0.004808,0.0001293536,0.001736,8.536536e-05,4.171297e-05,6.39651e-05,3.752041e-06,4.593624e-05,7.94698e-06
2,D:\dataset\c100sp\val\云\114.jpg,0,云,0,云,14,平原,31,棕榈树,True,...,0.022053,0.000182,1.216493e-05,3.6e-05,2.341005e-06,2.928424e-07,2.839105e-06,2.938487e-09,1.435803e-07,2.018639e-07
3,D:\dataset\c100sp\val\云\117.jpg,0,云,12,山,32,森林,53,狼,False,...,0.000292,1e-05,0.0005225038,0.000428,8.715899e-06,5.395877e-05,0.0003008884,2.87189e-07,1.960987e-05,1.221918e-07
4,D:\dataset\c100sp\val\云\119.jpg,0,云,0,云,7,大海,31,棕榈树,True,...,6.8e-05,1e-05,7.296929e-08,4.4e-05,1.468976e-08,1.890725e-07,1.472791e-08,2.531981e-10,1.882827e-08,3.410407e-08


## 计算测试集每张图像的语义特征

In [10]:
encoding_array = []
img_path_list = []

for img_path in tqdm(df['图像路径']):
    img_path_list.append(img_path)
    img_pil = Image.open(img_path).convert('RGB')
    input_img = test_transform(img_pil).unsqueeze(0).to(device) # 预处理
    feature = model_trunc(input_img)['semantic_feature'].squeeze().detach().cpu().numpy() # 执行前向预测，得到 avgpool 层输出的语义特征
    encoding_array.append(feature)
encoding_array = np.array(encoding_array)

100%|██████████| 9978/9978 [02:36<00:00, 63.88it/s]


In [11]:
encoding_array.shape

(9978, 512)

## 保存为本地的.npy文件

In [12]:
# 保存为本地的 npy 文件
np.save('测试集语义特征.npy', encoding_array)