In [2]:
'''
    采用部分模型,只提取特征,不做辨识。如根据图片找到相似的图文件

    步骤:
    上传要查找的图片 --> 加载VGG16部分模型 --> 前置处理  --> 模型预测提取特征 --> 相似度比较  --> 找出相似的模型
'''
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
import numpy as np

# 1.加载模型,模型只包含卷积层(不包含最后的三个辨识层
model = VGG16(weights='imagenet', include_top=False)
model.summary()

# 2.提取特征
img_path = './img/elephant1.png'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

## 取得图片的特征向量
features = model.predict(x)
print(features[0])

# 3.相似度比较: 使用cosine similarity比较特征向量


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


Exception: URL fetch failure on https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5: None -- [WinError 10054] 远程主机强迫关闭了一个现有的连接。

In [8]:
# 4.先取得img_test目录下所有.jpg文件
from os import listdir
from os.path import isfile, join
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
import numpy as np

img_test_path = "./img_test"
img_files = np.array([f for f in listdir(img_test_path)
                    if isfile(join(img_test_path, f)) and f.endswith('.jpg')
            ])

##合并所有元素的像素
X = np.array([])
for img_file in img_files:
    img_path = join(img_test_path, img_file)
    image1 = image.load_img(img_path, target_size=(224, 224))
    image1 = image.img_to_array(image1)
    image1 = np.expand_dims(image1, axis=0)
    if len(X.shape) == 1:
        X = image1
    else:
        X = np.concatenate((X,image1),axis=0)

X = preprocess_input(X)

# 5.取得所有文件的特征向量
features = model.predict(X)
print(features.shape)
print(X.shape)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 592ms/step
(6, 7, 7, 512)
(6, 224, 224, 3)


In [1]:
'''
    使用cosine_similarity函数比较特征向量相似度
'''
from sklearn.metrics.pairwise import cosine_similarity
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from os import listdir
from os.path import isfile, join
import numpy as np

# 1. 加载模型（只包含卷积层，不包含最后的全连接层）
model = VGG16(weights='imagenet', include_top=False)

# 2. 提取 tiger.jpg 的特征向量
img_tiger_path = './img_test/tiger.jpg'
img_tiger = image.load_img(img_tiger_path, target_size=(224, 224))
img_tiger_array = image.img_to_array(img_tiger)
img_tiger_array = np.expand_dims(img_tiger_array, axis=0)
img_tiger_preprocessed = preprocess_input(img_tiger_array)
features_tiger = model.predict(img_tiger_preprocessed)

# 展平特征向量 (1, 7, 7, 512) -> (1, 25088)
features_tiger_flat = features_tiger.reshape((features_tiger.shape[0], -1))

# 3. 获取 img_test 目录下所有 .jpg 文件
img_test_path = "./img_test"
img_files = [f for f in listdir(img_test_path) if isfile(join(img_test_path, f)) and f.endswith('.jpg')]

# 4. 遍历每张图片，提取特征并计算相似度
similarity_list = []

for img_file in img_files:
    img_path = join(img_test_path, img_file)
    # 图像预处理
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_preprocessed = preprocess_input(img_array)

    # 提取特征
    features = model.predict(img_preprocessed)

    # 展平特征向量
    features_flat = features.reshape((features.shape[0], -1))

    # 计算余弦相似度
    sim = cosine_similarity(features_tiger_flat, features_flat)[0][0]
    similarity_list.append((img_file, sim))

# 5. 按照相似度从高到低排序输出
similarity_list.sort(key=lambda x: x[1], reverse=True)

# 输出结果
print("与 tiger.jpg 的相似度排序：")
print(np.sort(similarity_list[0][::-1]))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 984ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 323ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 299ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 286ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 304ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 277ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 281ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 304ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 406ms/step
与 tiger.jpg 的相似度排序：
['1.0' 'tiger.jpg']
