# 图像相似度比较

## 载入套件

In [1]:
import torch
from torchvision import models
from torch import nn
from torchsummary import summary
import numpy as np

## 检查 GPU

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
"cuda" if torch.cuda.is_available() else "cpu"

'cuda'

## 载入VGG 16 模型

In [11]:
model = models.vgg16(pretrained=True)
model._modules

OrderedDict([('features',
              Sequential(
                (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                (1): ReLU(inplace=True)
                (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                (3): ReLU(inplace=True)
                (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
                (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                (6): ReLU(inplace=True)
                (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                (8): ReLU(inplace=True)
                (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
                (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                (11): ReLU(inplace=True)
                (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                (13): ReL

## 移除 avgpool 后面的神经层

In [12]:
class new_model(nn.Module):
    def __init__(self, pretrained, output_layer):
        super().__init__()
        self.output_layer = output_layer
        self.pretrained = pretrained
        self.children_list = []
        # 依序取得每一层
        for n,c in self.pretrained.named_children():
            self.children_list.append(c)
            # 找到特定层即终止
            if n == self.output_layer:
                print('found !!')
                break

        # 建构新模型
        self.net = nn.Sequential(*self.children_list)
        self.pretrained = None
        
    def forward(self,x):
        x = self.net(x)
        return x
    
model = new_model(model, 'avgpool')
model = model.to(device)   
model._modules

found !!


OrderedDict([('pretrained', None),
             ('net', Sequential(
                (0): Sequential(
                  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                  (1): ReLU(inplace=True)
                  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                  (3): ReLU(inplace=True)
                  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
                  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                  (6): ReLU(inplace=True)
                  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                  (8): ReLU(inplace=True)
                  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
                  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                  (11): ReLU(inplace=True)
                  (12): Conv2d(256, 256, k

In [29]:
# 任选一张图片，例如老虎侧面照，取得图档的特征向量
from PIL import Image
from torchvision import transforms

filename = './images_test/tiger2.jpg'
input_image = Image.open(filename)

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])
input_tensor = transform(input_image)
input_batch = input_tensor.unsqueeze(0).to(device) # 增加一维(笔数)

# 预测
model.eval()
with torch.no_grad():
    output = model(input_batch)
output    

tensor([[[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.2543, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [2.1993, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.2719],
          [1.5349, 0.0000, 0.0000,  ..., 0.0000, 1.7577, 5.2424],
          [0.0000, 0.0000, 0.0000,  ..., 0.4238, 2.0388, 5.9582]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.6102, 1.0531],
          [0.7704, 0.0000, 0.0000,  ..., 0.0000, 2.1455, 2.3483],
          [2.3654, 0.4831, 0.0000,  ..., 0.0000, 0.0000, 1.4597],
          ...,
          [0.0000, 2.7323, 5.3333,  ..., 1.9977, 2.2498, 1.4196],
          [0.0000, 3.2158, 3.4539,  ..., 0.5091, 1.0910, 0.5525],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 2.0101,  ..., 0

In [30]:
print(output.shape)

torch.Size([1, 512, 7, 7])


# 使用 cosine_similarity 比较特征向量

### 步骤 1. 取得 images_test 目录下所有 .jpg 档案名称

In [31]:
from os import listdir
from os.path import isfile, join

# 取得 images_test 目录下所有 .jpg 档案名称
img_path = './images_test/'
image_files = np.array([f for f in listdir(img_path) 
        if isfile(join(img_path, f)) and f[-3:] == 'jpg'])
image_files

array(['astronaut.jpg', 'bird.jpg', 'bird2.jpg', 'cat.jpg', 'daisy1.jpg',
       'daisy2.jpg', 'deer.jpg', 'elephant.jpg', 'elephant2.jpg',
       'lion1.jpg', 'lion2.jpg', 'panda1.jpg', 'panda2.jpg', 'panda3.jpg',
       'rose2.jpg', 'tiger1.jpg', 'tiger2.jpg', 'tiger3.jpg'],
      dtype='<U13')

### 步骤 2. 取得 images_test 目录下所有 .jpg 档案的像素

In [34]:
import os

# 合并所有图档
model.eval()
X = torch.tensor([])
for filename in image_files:
    input_image = Image.open(os.path.join(img_path, filename))
    input_tensor = transform(input_image)
    input_batch = input_tensor.unsqueeze(0).to(device) # 增加一维(笔数)
    if len(X.shape) == 1:
        # print(input_batch.shape)
        X = input_batch
    else:
        # print(input_batch.shape)
        X = torch.cat((X, input_batch), dim=0)

### 步骤 3. 取得所有图档的特征向量

In [38]:
# 预测所有图档
with torch.no_grad():
    features = model(X)
features.shape

torch.Size([18, 512, 7, 7])

### 步骤 4. 使用 cosine_similarity 函数比较特征向量

In [40]:
from sklearn.metrics.pairwise import cosine_similarity

# 比较 Tiger2.jpg 与其他图档特征向量
no=-2
print(image_files[no])

# 转为二维向量，类似扁平层(Flatten)
features2 = features.cpu().reshape((features.shape[0], -1))

# 排除 Tiger2.jpg 的其他图档特征向量
other_features = np.concatenate((features2[:no], features2[no+1:]))

# 使用 cosine_similarity 计算 Cosine 函数
similar_list = cosine_similarity(features2[no:no+1], other_features, 
                                 dense_output=False)

# 显示相似度，由大排到小
print(np.sort(similar_list[0])[::-1])

# 依相似度，由大排到小，显示档名
image_files2 = np.delete(image_files, no)
image_files2[np.argsort(similar_list[0])[::-1]]

tiger2.jpg
[0.28911456 0.2833875  0.23362085 0.18441461 0.17196876 0.16713579
 0.14983664 0.12871663 0.11995038 0.11563288 0.10740422 0.09983709
 0.09405126 0.08491081 0.08096127 0.06599604 0.04436902]


array(['tiger1.jpg', 'tiger3.jpg', 'lion1.jpg', 'lion2.jpg',
       'elephant2.jpg', 'cat.jpg', 'elephant.jpg', 'panda1.jpg',
       'bird2.jpg', 'panda3.jpg', 'bird.jpg', 'panda2.jpg', 'deer.jpg',
       'daisy2.jpg', 'rose2.jpg', 'astronaut.jpg', 'daisy1.jpg'],
      dtype='<U13')

### 其他图档比较

In [41]:
# 比较对象：bird.jpg
no=1
print(image_files[no])


# 使用 cosine_similarity 计算 Cosine 函数
other_features = np.concatenate((features2[:no], features2[no+1:]))
similar_list = cosine_similarity(features2[no:no+1], other_features, 
                                 dense_output=False)

# 显示相似度，由大排到小
print(np.sort(similar_list[0])[::-1])

# 依相似度，由大排到小，显示档名
image_files2 = np.delete(image_files, no)
image_files2[np.argsort(similar_list[0])[::-1]]

bird.jpg
[0.21015988 0.17159882 0.14975291 0.1455198  0.1438895  0.10740422
 0.09881952 0.09107401 0.08833732 0.08464108 0.07035439 0.06275161
 0.06064527 0.05807994 0.0558636  0.05117093 0.04122205]


array(['panda3.jpg', 'bird2.jpg', 'deer.jpg', 'cat.jpg', 'panda1.jpg',
       'tiger2.jpg', 'lion2.jpg', 'tiger1.jpg', 'rose2.jpg',
       'elephant2.jpg', 'tiger3.jpg', 'panda2.jpg', 'lion1.jpg',
       'astronaut.jpg', 'daisy2.jpg', 'elephant.jpg', 'daisy1.jpg'],
      dtype='<U13')