In [18]:
import os
import numpy as np
import cv2
import torch
from PIL import Image
from torchvision import models, transforms
from facenet_pytorch import MTCNN, InceptionResnetV1
from scipy.spatial.distance import cosine, euclidean
import mediapipe as mp
from sklearn.metrics.pairwise import cosine_similarity

# ======================
# 1. 加载数据集 & 预处理
# ======================

# 设定数据集路径
train_dir = 'ImgNationalGalleryOfArt'
image_size = (224, 224)  # 统一尺寸
batch_size = 32

# 获取所有图片路径
image_paths = [os.path.join(train_dir, fname) for fname in os.listdir(train_dir) if fname.endswith('.jpg')]

# 预处理：归一化 & 颜色调整
preprocess = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # 颜色抖动适应不同风格
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# ======================
# 2. 加载模型 (MTCNN, FaceNet, ResNet, Mediapipe)
# ======================

# 人脸检测
mtcnn = MTCNN(keep_all=True)

# 人脸特征提取 (FaceNet)
facenet = InceptionResnetV1(pretrained='vggface2').eval()

# 整体风格特征提取 (ResNet50)
resnet_model = models.resnet50(pretrained=True)
resnet_model.eval()

# 姿态估计 (Mediapipe)
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

# ======================
# 3. 特征提取函数
# ======================

def extract_face_features(image):
    """ 提取人脸特征 """
    img_cvt = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    faces = mtcnn(img_cvt)  # 进行人脸检测
    if faces is None:
        return None  # 没检测到人脸，返回空
    
    # 取第一张脸（如果有多个）
    face = faces[0].unsqueeze(0)
    
    # 提取特征
    with torch.no_grad():
        face_embedding = facenet(face)
    
    return face_embedding.numpy().flatten()

def extract_pose_features(image):
    """ 提取人体关键点特征 """
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pose.process(img_rgb)
    
    if results.pose_landmarks:
        keypoints = np.array([[lm.x, lm.y] for lm in results.pose_landmarks.landmark])
        return keypoints.flatten()
    
    return None  # 没检测到人体，返回空

def extract_image_features(image):
    """ 提取整张图片的视觉特征 """
    img_resized = preprocess(Image.fromarray(image)).unsqueeze(0)  # 预处理
    with torch.no_grad():
        features = resnet_model(img_resized)
    return features.numpy().flatten()

# ======================
# 4. 计算相似性
# ======================

def compute_similarity(feature1, feature2, method='cosine'):
    """ 计算特征相似度 """
    if feature1 is None or feature2 is None:
        return 0  # 没有匹配特征时返回 0 相似度

    if method == 'cosine':
        return 1 - cosine(feature1, feature2)  # 余弦相似度
    elif method == 'euclidean':
        return 1 / (1 + euclidean(feature1, feature2))  # 欧几里得距离，归一化
    else:
        raise ValueError("Unsupported similarity method!")

# ======================
# 5. 计算数据集中的相似度
# ======================

# 计算所有图片的特征
image_features = []
face_features = []
pose_features = []

for path in image_paths:
    img = cv2.imread(path)
    
    # 提取特征
    image_feat = extract_image_features(img)
    face_feat = extract_face_features(img)
    pose_feat = extract_pose_features(img)
    
    image_features.append(image_feat)
    face_features.append(face_feat)
    pose_features.append(pose_feat)

# 转换为 NumPy 数组
image_features = np.array(image_features)
face_features = np.array(face_features, dtype=object)  # 可能有 None 值
pose_features = np.array(pose_features, dtype=object)

# ======================
# 6. 评估相似性
# ======================

def find_most_similar(target_idx, top_k=5):
    """ 在数据集中找到与目标图片最相似的前 K 张 """
    target_img_feat = image_features[target_idx]
    target_face_feat = face_features[target_idx]
    target_pose_feat = pose_features[target_idx]
    
    similarities = []
    
    for i in range(len(image_paths)):
        if i == target_idx:
            continue
        
        img_sim = compute_similarity(target_img_feat, image_features[i], method='cosine')
        face_sim = compute_similarity(target_face_feat, face_features[i], method='cosine')
        pose_sim = compute_similarity(target_pose_feat, pose_features[i], method='euclidean')
        
        # 计算综合相似度 (加权平均)
        total_sim = (0.5 * img_sim) + (0.3 * face_sim) + (0.2 * pose_sim)
        similarities.append((image_paths[i], total_sim))
    
    # 排序并返回最相似的 top_k 图片
    similarities.sort(key=lambda x: x[1], reverse=True)
    return similarities[:top_k]

# 测试：查询某张图的相似结果
query_idx = 10  # 任选一张图片
similar_images = find_most_similar(query_idx)

print("与目标图片最相似的 5 张图片：")
for img_path, sim_score in similar_images:
    print(f"{img_path} - 相似度: {sim_score:.4f}")


  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 107M/107M [00:32<00:00, 3.43MB/s] 
I0000 00:00:1742057642.393285 6758927 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1742057642.465932 6765302 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742057642.477523 6765302 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742057646.838639 6765306 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


与目标图片最相似的 5 张图片：
ImgNationalGalleryOfArt/800px-Allan_Ramsay,_Lord_George_Villiers,_NGA_76122.jpg - 相似度: 0.4323
ImgNationalGalleryOfArt/1024px-Attributed_to_Hugo_van_der_Goes,_Saint_George_and_the_Dragon,_NGA_39733.jpg - 相似度: 0.3980
ImgNationalGalleryOfArt/After_Pietro_da_Cortona,_Masinissa_and_Sophonisba,_NGA_65744.jpg - 相似度: 0.3951
ImgNationalGalleryOfArt/800px-After_Francesco_Salviati,_Saint_Peter,_NGA_11388 (1).jpg - 相似度: 0.3942
ImgNationalGalleryOfArt/1280px-Arthur_B._Davies,_Seated_Nude_and_a_Foot,_probably_1920,_NGA_56982.jpg - 相似度: 0.3879


In [1]:
import os
import numpy as np
from tensorflow.keras import layers, models
import pandas as pd
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, ConvLSTM2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import Model
from scipy.spatial import distance
from sklearn.preprocessing import normalize
from PIL import Image
from scipy.spatial.distance import cdist # 创建函数来查找测试图像的最近邻
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import LabelEncoder
import types
from tensorflow.keras.models import load_model
from PIL import Image
Image.MAX_IMAGE_PIXELS = None  # 取消大小限制
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from tensorflow.keras.utils import Sequence
import torch
import cv2
from scipy.spatial.distance import cosine

In [10]:
import os
import cv2
import torch
import numpy as np
import mediapipe as mp
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from torchvision import models, transforms
from scipy.spatial.distance import cosine
from sklearn.decomposition import PCA


In [2]:
# 设置路径和参数
train_dir = 'ImgNationalGalleryOfArt'
image_size = (64, 64)
batch_size = 32

In [11]:
# 1. 载入数据
def load_images(train_dir, image_size=(64, 64)):
    image_paths = [os.path.join(train_dir, f) for f in os.listdir(train_dir) if f.endswith('.jpg')]
    images = []
    for path in image_paths:
        img = load_img(path, target_size=image_size)
        img = img_to_array(img) / 255.0  # 归一化
        images.append(img)
    return np.array(images), image_paths


In [12]:
# 2. 目标检测（YOLOv5 检测人物）
def detect_person(image):
    model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
    results = model(image)
    results.render()
    return results

In [13]:
# 3. 姿态估计（Mediapipe）
def extract_pose(image):
    mp_pose = mp.solutions.pose
    pose = mp_pose.Pose()
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pose.process(img_rgb)
    if results.pose_landmarks:
        return np.array([[lm.x, lm.y] for lm in results.pose_landmarks.landmark])
    return None


In [14]:
# 4. CNN 特征提取（ResNet50）
def extract_features(image):
    preprocess = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    img_resized = preprocess(image).unsqueeze(0)  
    resnet_model = models.resnet50(pretrained=True)
    resnet_model.eval()
    with torch.no_grad():
        features = resnet_model(img_resized)
    return features.numpy().flatten()


In [15]:
# 5. 计算相似度（Cosine Similarity）
def compute_similarity(features1, features2):
    return 1 - cosine(features1, features2)


In [16]:
# 示例
train_dir = 'ImgNationalGalleryOfArt'
images, paths = load_images(train_dir)

# 取两张图进行比较
image1 = cv2.imread(paths[0])
image2 = cv2.imread(paths[1])

# 提取特征
feat1 = extract_features(image1)
feat2 = extract_features(image2)

# 计算相似度
similarity = compute_similarity(feat1, feat2)
print(f"Cosine Similarity: {similarity}")

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /Users/bocongzhao/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:20<00:00, 5.03MB/s]


Cosine Similarity: 0.7652250109297196


In [2]:
# 设置路径和参数
train_dir = 'ImgNationalGalleryOfArt'
image_size = (64, 64)
batch_size = 32

# 获取所有图片路径
image_paths = [os.path.join(train_dir, fname) for fname in os.listdir(train_dir) if fname.endswith('.jpg')]

# 手动加载图片并转成数组
images = []
for path in image_paths:
    img = load_img(path, target_size=image_size)  # 调整图片大小
    img = img_to_array(img) / 255.0  # 转为数组并归一化
    images.append(img)

# 转换为 NumPy 数组
images = np.array(images)

# 使用 flow 方法创建生成器
datagen = ImageDataGenerator(
    rotation_range=30, # 随机旋转
    width_shift_range=0.2, # 水平平移
    height_shift_range=0.2, # 垂直平移
    shear_range=0.2, # 随机错切变换
    zoom_range=0.2, # 随机缩放
    horizontal_flip=True # 随机水平翻转
)

train_generator = datagen.flow(
    images, # 数据集路径
    batch_size=batch_size,  # 批量大小
    shuffle=True, # 是否打乱数据，通常无监督学习需要打乱
    #class_mode=None        # 无监督学习不需要标签
)

print(f'已加载图片数量: {len(images)}')


已加载图片数量: 64
