In [1]:
import os
import numpy as np
import cv2
import torch
from PIL import Image
from torchvision import models, transforms
from facenet_pytorch import MTCNN, InceptionResnetV1
from scipy.spatial.distance import cosine, euclidean
import mediapipe as mp
from sklearn.metrics.pairwise import cosine_similarity
from PIL import Image
import matplotlib.pyplot as plt


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# ======================
# 1. 加载数据集
# ======================

# 设定数据集路径
train_dir = 'ImgNationalGalleryOfArt' #存放国家美术馆数据集的目录路径
image_size = (224, 224)  # 统一尺寸， 因为 ResNet50 和 FaceNet 预期的输入尺寸是 224x224
batch_size = 32 #批处理大小

# 获取所有图片路径
image_paths = [os.path.join(train_dir, fname) for fname in os.listdir(train_dir) if fname.endswith('.jpg')]

In [3]:
# ======================
# 2. 预处理
# ======================

# 预处理：归一化 & 颜色调整
preprocess = transforms.Compose([
    #transforms.ToPILImage(), # 把 NumPy 数组转换为 PIL 格式 #########!!!!!!
    transforms.Resize(image_size), #调整所有图片大小，确保输入网络的尺寸一致
    # 绘画作品的风格可能不同，所以颜色抖动适应不同风格
    transforms.ColorJitter(
                        brightness=0.2,  #在 ±20% 范围内随机调整亮度
                        contrast=0.2, #在 ±20% 范围内随机调整对比度
                        saturation=0.2, #在 ±20% 范围内随机调整颜色饱和度
                        hue=0.1 #在 ±10% 范围内随机调整色调
                        ),  
    transforms.ToTensor(), #将 PIL.Image 转换为 PyTorch 张量，并将像素值归一化到 [0,1]
    transforms.Normalize( #ImageNet 预训练模型的标准化参数
                        mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225]
                        ),
])

In [4]:
# ==============================================
# 3. 加载模型 (MTCNN) 人脸检测
# ==============================================

# 用于检测图片中的人脸（返回多个检测到的面部）
# MTCNN 适用于检测绘画中的人脸，比 YOLO 更适合非真实照片
mtcnn = MTCNN(keep_all=True)

In [5]:
# ==============================================
# 4. 加载模型 (FaceNet) 人脸特征提取 
# ==============================================

# FaceNet，计算人脸特征向量
# VGGFace2，大规模人脸数据集，预训练模型能够提取高质量的面部特征
facenet = InceptionResnetV1(pretrained='vggface2').eval()

In [6]:
# ==============================================
# 5. 加载模型 (ResNet) 整体风格特征提取
# ==============================================

# ResNet50，深度残差网络，
# 提取图片的整体视觉特征（风格、颜色、结构。。。
resnet_model = models.resnet50(pretrained=True)
resnet_model.eval()



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [7]:
# ==============================================
# 6. 加载模型 (Mediapipe) 姿态估计
# ==============================================

#  Pose() 检测人体姿态，提取关键点用于计算姿势相似度
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

I0000 00:00:1742069513.238679 6905876 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro


In [8]:
# ===========================
# 7. 特征提取函数： 人脸特征提取
# ===========================

def extract_face_features(image):
    """ 提取人脸特征 """
    img_cvt = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    faces = mtcnn(img_cvt)  # 人脸检测
    if faces is None:
        return None  # 没检测到人脸，返回空
    
    # 取第一张脸（如果有多个）
    face = faces[0].unsqueeze(0)
    
    # 提取特征
    with torch.no_grad():
        face_embedding = facenet(face) #提取人脸特征向量
    
    return face_embedding.numpy().flatten() #展平为一维数组，便于计算相似度

In [9]:
# ===========================
# 8. 特征提取函数： 姿态特征提取
# ===========================

def extract_pose_features(image):
    """ 提取人体关键点特征 """
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pose.process(img_rgb) #计算人体关键点（17 个）
    
    if results.pose_landmarks:
        keypoints = np.array([[lm.x, lm.y] for lm in results.pose_landmarks.landmark])
        return keypoints.flatten() #展平成一维数组，用于相似度计算
    
    return None  # 没检测到人体，返回空


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [10]:
def extract_image_features(image):
    """ 提取整张图片的视觉特征 """
    img_resized = preprocess(Image.fromarray(image)).unsqueeze(0)  # 预处理
    with torch.no_grad():
        features = resnet_model(img_resized)
    return features.numpy().flatten()


In [11]:
# ======================
# 4. 计算相似性
# ======================

def compute_similarity(feature1, feature2, method='cosine'):
    """ 计算特征相似度 """
    if feature1 is None or feature2 is None:
        return 0  # 没有匹配特征时返回 0 相似度
    # 余弦相似度
    if method == 'cosine':
        return 1 - cosine(feature1, feature2)  # 两个向量的夹角，数值在 [0,1] 之间
    # 欧几里得距离 
    elif method == 'euclidean':
        return 1 / (1 + euclidean(feature1, feature2))  # 两个向量的几何距离，归一化到 [0,1]
    else:
        raise ValueError("Unsupported similarity method!")

In [12]:
# ======================
# 5. 计算数据集中的相似度
# ======================

# 计算所有图片的特征
image_features = []
face_features = []
pose_features = []

for path in image_paths:
    img = cv2.imread(path)
    
    # 提取特征
    image_feat = extract_image_features(img)
    face_feat = extract_face_features(img)
    pose_feat = extract_pose_features(img)
    
    image_features.append(image_feat)
    face_features.append(face_feat)
    pose_features.append(pose_feat)

# 转换为 NumPy 数组
image_features = np.array(image_features)
face_features = np.array(face_features, dtype=object)  # 可能有 None 值
pose_features = np.array(pose_features, dtype=object)

W0000 00:00:1742069513.323819 6906426 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742069513.336186 6906430 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742069521.151832 6906428 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


In [13]:
# ======================
# 6. 评估相似性
# ======================

def find_most_similar(target_idx, top_k=5):
    """ 在数据集中找到与目标图片最相似的前 K 张 """
    target_img_feat = image_features[target_idx]
    target_face_feat = face_features[target_idx]
    target_pose_feat = pose_features[target_idx]
    
    similarities = []
    
    for i in range(len(image_paths)):
        if i == target_idx:
            continue
        
        img_sim = compute_similarity(target_img_feat, image_features[i], method='cosine')
        face_sim = compute_similarity(target_face_feat, face_features[i], method='cosine')
        pose_sim = compute_similarity(target_pose_feat, pose_features[i], method='euclidean')
        
        # 计算综合相似度 (加权平均)
        total_sim = (0.5 * img_sim) + (0.3 * face_sim) + (0.2 * pose_sim)
        similarities.append((image_paths[i], total_sim))
    
    # 排序并返回最相似的 top_k 图片
    similarities.sort(key=lambda x: x[1], reverse=True)
    return similarities[:top_k]

In [14]:
# 测试：查询某张图的相似结果
query_idx = 10  # 任选一张图片
similar_images = find_most_similar(query_idx)

print("与目标图片最相似的 5 张图片：")
for img_path, sim_score in similar_images:
    print(f"{img_path} - 相似度: {sim_score:.4f}")

与目标图片最相似的 5 张图片：
ImgNationalGalleryOfArt/1920px-After_Raphael,_The_Deluge,_from_the_Loggia_of_the_Vatican,_NGA_11394.jpg - 相似度: 0.4198
ImgNationalGalleryOfArt/800px-Allan_Ramsay,_Lord_George_Villiers,_NGA_76122.jpg - 相似度: 0.4190
ImgNationalGalleryOfArt/800px-After_Francesco_Salviati,_Saint_Peter,_NGA_11388.jpg - 相似度: 0.4026
ImgNationalGalleryOfArt/After_Pietro_da_Cortona,_Masinissa_and_Sophonisba,_NGA_65744.jpg - 相似度: 0.3990
ImgNationalGalleryOfArt/1280px-Arthur_B._Davies,_Seated_Nude_and_a_Foot,_probably_1920,_NGA_56982.jpg - 相似度: 0.3918
