# 计算测试集图像语义特征

抽取Pytorch训练得到的图像分类模型中间层的输出特征，作为输入图像的语义特征。

计算测试集所有图像的语义特征，使用t-SNE和UMAP两种降维方法降维至二维和三维，可视化。

分析不同类别的语义距离、异常数据、细粒度分类、高维数据结构。


## 导入工具包

In [1]:
from tqdm import tqdm

import pandas as pd
import numpy as np

import torch

import cv2
from PIL import Image

# 忽略烦人的红色提示
import warnings
warnings.filterwarnings("ignore")

# 有 GPU 就用 GPU，没有就用 CPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('device', device)

device cuda:0


## 图像预处理

In [2]:
from torchvision import transforms

# # 训练集图像预处理：缩放裁剪、图像增强、转 Tensor、归一化
# train_transform = transforms.Compose([transforms.RandomResizedCrop(224),
#                                       transforms.RandomHorizontalFlip(),
#                                       transforms.ToTensor(),
#                                       transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
#                                      ])

# 测试集图像预处理-RCTN：缩放、裁剪、转 Tensor、归一化
test_transform = transforms.Compose([transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize(
                                         mean=[0.485, 0.456, 0.406], 
                                         std=[0.229, 0.224, 0.225])
                                    ])

## 导入训练好的模型

In [3]:
model = torch.load('Particle Figures_pytorch_C1.pth')
model = model.eval().to(device)

## 抽取模型中间层输出结果作为语义特征

In [4]:
from torchvision.models.feature_extraction import create_feature_extractor

In [5]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [6]:
model_trunc = create_feature_extractor(model, return_nodes={'avgpool': 'semantic_feature'})

## 计算单张图像的语义特征

In [10]:
img_path = 'Particle Figures_split/val/Subrounded/Subrounded (118).jpg'
img_pil = Image.open(img_path)
input_img = test_transform(img_pil) # 预处理
input_img = input_img.unsqueeze(0).to(device)
# 执行前向预测，得到指定中间层的输出
pred_logits = model_trunc(input_img) 

In [11]:
pred_logits['semantic_feature'].squeeze().detach().cpu().numpy().shape

(512,)

In [12]:
#看一下这个512的向量
pred_logits['semantic_feature'].squeeze().detach().cpu().numpy()

array([3.57622802e-01, 2.74432158e+00, 4.54366624e-01, 3.13249946e-01,
       9.51173246e-01, 1.93364418e+00, 5.63114166e-01, 1.25701833e+00,
       1.69599224e-02, 7.80059472e-02, 7.16230035e-01, 1.56417751e+00,
       2.15641665e+00, 3.20910499e-03, 2.95293522e+00, 2.87022412e-01,
       2.70752096e+00, 2.04778123e+00, 9.03674185e-01, 9.58989412e-02,
       5.83214343e-01, 2.62369633e-01, 4.29129273e-01, 5.19005954e-01,
       7.05936670e-01, 1.49380192e-02, 1.42471874e+00, 3.71838689e-01,
       9.17355716e-01, 1.18289679e-01, 1.34165871e+00, 2.54763573e-01,
       5.40444613e-01, 4.50425774e-01, 9.50839758e-01, 3.81728113e-01,
       0.00000000e+00, 3.13819671e+00, 8.45684856e-02, 1.23655641e+00,
       9.80314910e-01, 6.35334134e-01, 6.64814472e-01, 5.87586701e-01,
       6.59596264e-01, 2.05589056e-01, 1.95308760e-01, 1.78505585e-01,
       2.19146776e+00, 1.34618962e+00, 1.02341783e+00, 1.11081815e+00,
       7.30819702e-01, 1.86079895e+00, 9.73689258e-01, 2.49367118e+00,
      

## 载入测试集图像分类结果

In [15]:
df = pd.read_csv('测试集预测结果.csv')

In [16]:
df.head()

Unnamed: 0,Image path,Labeling categories ID,Labeling categories name,top-1-Predictions ID,top-1-Predictions name,top-2-Predictions ID,top-2-Predictions name,top-3-Predictions ID,top-3-Predictions name,top-n Predictions correction,Angular-Predictions Confidence Level,Rounded-Predictions Confidence Level,Subangular-Predictions Confidence Level,Subrounded-Predictions Confidence Level,Very angular-Predictions Confidence Level,Well rounded-Predictions Confidence Level
0,Particle Figures_split/val/Angular/Angular (10...,0,Angular,0,Angular,4,Very angular,2,Subangular,True,0.574018,0.003499,0.111645,0.016418,0.29403,0.00039
1,Particle Figures_split/val/Angular/Angular (10...,0,Angular,3,Subrounded,2,Subangular,0,Angular,True,0.020437,0.010734,0.284462,0.672124,0.012037,0.000206
2,Particle Figures_split/val/Angular/Angular (11...,0,Angular,0,Angular,2,Subangular,4,Very angular,True,0.510571,0.087094,0.166346,0.10469,0.122172,0.009128
3,Particle Figures_split/val/Angular/Angular (13...,0,Angular,0,Angular,2,Subangular,4,Very angular,True,0.425345,0.004741,0.424306,0.027567,0.117619,0.000422
4,Particle Figures_split/val/Angular/Angular (13...,0,Angular,0,Angular,2,Subangular,4,Very angular,True,0.682034,0.010761,0.177304,0.050383,0.074711,0.004807


## 计算测试集每张图像的语义特征

In [17]:
encoding_array = []
img_path_list = []

for img_path in tqdm(df['Image path']):
    img_path_list.append(img_path)
    img_pil = Image.open(img_path).convert('RGB')
    input_img = test_transform(img_pil).unsqueeze(0).to(device) # 预处理
    feature = model_trunc(input_img)['semantic_feature'].squeeze().detach().cpu().numpy() # 执行前向预测，得到 avgpool 层输出的语义特征
    encoding_array.append(feature)
encoding_array = np.array(encoding_array)

100%|██████████| 248/248 [00:05<00:00, 45.02it/s]


In [18]:
encoding_array.shape

(248, 512)

## 保存为本地的.npy文件

In [19]:
# 保存为本地的 npy 文件
np.save('测试集语义特征.npy', encoding_array)