<a href="https://colab.research.google.com/github/MoqiSheng/MoqiSheng.github.io/blob/main/1*4-0206.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!unzip /content/drive/MyDrive/UrbanASIFpro/Anchor.zip -d /content/anchor

Archive:  /content/drive/MyDrive/UrbanASIFpro/Anchor.zip
  inflating: /content/anchor/1007.png  
  inflating: /content/anchor/1037.png  
  inflating: /content/anchor/1039.png  
  inflating: /content/anchor/1052.png  
  inflating: /content/anchor/1094.png  
  inflating: /content/anchor/1095.png  
  inflating: /content/anchor/1097.png  
  inflating: /content/anchor/1104.png  
  inflating: /content/anchor/111.png  
  inflating: /content/anchor/1112.png  
  inflating: /content/anchor/1117.png  
  inflating: /content/anchor/1140.png  
  inflating: /content/anchor/1142.png  
  inflating: /content/anchor/115.png  
  inflating: /content/anchor/1162.png  
  inflating: /content/anchor/1172.png  
  inflating: /content/anchor/118.png  
  inflating: /content/anchor/1193.png  
  inflating: /content/anchor/1194.png  
  inflating: /content/anchor/1201.png  
  inflating: /content/anchor/1202.png  
  inflating: /content/anchor/1206.png  
  inflating: /content/anchor/1216.png  
  inflating: /content/anch

In [None]:
pip install transformers



In [10]:
import os
import torch
from transformers import ViTFeatureExtractor, ViTModel
from PIL import Image
from tqdm import tqdm

# 获取脚本所在的目录路径并切换当前工作目录
# abspath = os.path.abspath(__file__)  # 获取脚本文件的绝对路径
# dname = os.path.dirname(abspath)     # 提取脚本所在目录的路径
# os.chdir(dname)                      # 切换当前工作目录到脚本所在的目录

# 加载预训练的 google/vit-base-patch16-224-in21k 模型
device = "cuda" if torch.cuda.is_available() else "cpu"
model = ViTModel.from_pretrained("google/vit-base-patch16-224-in21k").to(device)
feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")

# 定义一个函数来裁剪图像并提取四个区域
def get_image_quarters(image):
    width, height = image.size
    left_upper = image.crop((0, 0, width // 2, height // 2))
    left_lower = image.crop((0, height // 2, width // 2, height))
    right_upper = image.crop((width // 2, 0, width, height // 2))
    right_lower = image.crop((width // 2, height // 2, width, height))

    return left_upper, left_lower, right_upper, right_lower

# 定义特征提取函数 extract_features
def extract_features(image_folder, output_file):
    image_features_list = []

    # 读取 image_folder 中所有扩展名为 .jpg 或 .png 的文件，并将文件名按自然数顺序排序（例如，1.jpg、2.jpg、3.jpg 顺序排列）
    image_paths = sorted(
        [os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.lower().endswith(('.jpg', '.png'))],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
    )

    for image_path in tqdm(image_paths, desc=f"Processing {image_folder}"):
        try:
            # 加载图像
            image = Image.open(image_path)
            quarters = get_image_quarters(image)  # 获取四个区域
            inputs = feature_extractor(images=image, return_tensors="pt").to(device)  # 预处理原始图像

            # 获取完整图像的特征
            with torch.no_grad():  # 禁用梯度计算（不需要反向传播）
                outputs = model(**inputs)  # 获取模型输出
                image_features = outputs.last_hidden_state[:, 0]  # 获取 [CLS] token 的特征
                image_features /= image_features.norm(dim=-1, keepdim=True)  # 归一化处理

            # 获取四个子区域的特征
            quarter_features = []
            for quarter in quarters:
                quarter_inputs = feature_extractor(images=quarter, return_tensors="pt").to(device)
                with torch.no_grad():
                    quarter_outputs = model(**quarter_inputs)
                    quarter_feature = quarter_outputs.last_hidden_state[:, 0]
                    quarter_feature /= quarter_feature.norm(dim=-1, keepdim=True)  # 归一化处理
                    quarter_features.append(quarter_feature)

            # 计算每个子区域与完整图像的相似度
            similarities = []
            for quarter_feature in quarter_features:
                similarity = torch.cosine_similarity(image_features, quarter_feature, dim=-1)
                similarities.append(similarity)

            # 找出与完整图像最相似的两个子区域
            similarities = torch.stack(similarities, dim=0)
            sorted_indices = torch.argsort(similarities, descending=True)  # 按相似度排序
            top_two_indices = sorted_indices[:2]  # 选择最相似的两个子区域

            # 根据相似度比例加权叠加这两个子区域的特征
            similarity_weights = similarities[top_two_indices]
            # weighted_embedding = (quarter_features[top_two_indices[0]] * similarity_weights[1] +
            #                       quarter_features[top_two_indices[1]] * similarity_weights[0]) / similarity_weights.sum()
            weighted_embedding = quarter_features[top_two_indices[0]]

            # 归一化加权后的子区域特征
            weighted_embedding /= weighted_embedding.norm(dim=-1, keepdim=True)

            # 将加权后的子区域特征与完整图像特征拼接
            final_embedding = torch.cat([image_features, weighted_embedding], dim=-1)

            # 归一化处理
            final_embedding /= final_embedding.norm(dim=-1, keepdim=True)

            # 保存图像特征
            image_features_list.append(final_embedding.cpu())  # 将特征向量移动到 CPU 并添加到列表

        except Exception as e:
            print(f"Error processing {image_path}: {e}")

    # 将所有图像特征保存为矩阵
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)  # 拼接所有图像特征
        torch.save(image_features, output_file)
        print(f"Features saved to {output_file}")
    else:
        print(f"No valid images found in {image_folder}")

# 调用特征提取函数，提取并保存每个城市的特征
extract_features('./anchor', 'anchor_image.pt')

Processing ./anchor: 100%|██████████| 152/152 [00:08<00:00, 18.41it/s]

Features saved to anchor_image.pt





In [5]:
!unzip /content/drive/MyDrive/UrbanASIFpro/Predict_image.zip -d /content/predict

Archive:  /content/drive/MyDrive/UrbanASIFpro/Predict_image.zip
  inflating: /content/predict/0.png  
  inflating: /content/predict/1.png  
  inflating: /content/predict/10.png  
  inflating: /content/predict/100.png  
  inflating: /content/predict/1000.png  
  inflating: /content/predict/1001.png  
  inflating: /content/predict/1002.png  
  inflating: /content/predict/1003.png  
  inflating: /content/predict/1004.png  
  inflating: /content/predict/1005.png  
  inflating: /content/predict/1006.png  
  inflating: /content/predict/1008.png  
  inflating: /content/predict/1009.png  
  inflating: /content/predict/101.png  
  inflating: /content/predict/1010.png  
  inflating: /content/predict/1011.png  
  inflating: /content/predict/1012.png  
  inflating: /content/predict/1013.png  
  inflating: /content/predict/1014.png  
  inflating: /content/predict/1015.png  
  inflating: /content/predict/1016.png  
  inflating: /content/predict/1017.png  
  inflating: /content/predict/1018.png  
  i

In [11]:
import os
import torch
from transformers import ViTFeatureExtractor, ViTModel
from PIL import Image
from tqdm import tqdm

# 获取脚本所在的目录路径并切换当前工作目录
# abspath = os.path.abspath(__file__)  # 获取脚本文件的绝对路径
# dname = os.path.dirname(abspath)     # 提取脚本所在目录的路径
# os.chdir(dname)                      # 切换当前工作目录到脚本所在的目录

# 加载预训练的 google/vit-base-patch16-224-in21k 模型
device = "cuda" if torch.cuda.is_available() else "cpu"
model = ViTModel.from_pretrained("google/vit-base-patch16-224-in21k").to(device)
feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")

# 定义一个函数来裁剪图像并提取四个区域
def get_image_quarters(image):
    width, height = image.size
    left_upper = image.crop((0, 0, width // 2, height // 2))
    left_lower = image.crop((0, height // 2, width // 2, height))
    right_upper = image.crop((width // 2, 0, width, height // 2))
    right_lower = image.crop((width // 2, height // 2, width, height))

    return left_upper, left_lower, right_upper, right_lower

# 定义特征提取函数 extract_features
def extract_features(image_folder, output_file):
    image_features_list = []

    # 读取 image_folder 中所有扩展名为 .jpg 或 .png 的文件，并将文件名按自然数顺序排序（例如，1.jpg、2.jpg、3.jpg 顺序排列）
    image_paths = sorted(
        [os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.lower().endswith(('.jpg', '.png'))],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
    )

    for image_path in tqdm(image_paths, desc=f"Processing {image_folder}"):
        try:
            # 加载图像
            image = Image.open(image_path)
            quarters = get_image_quarters(image)  # 获取四个区域
            inputs = feature_extractor(images=image, return_tensors="pt").to(device)  # 预处理原始图像

            # 获取完整图像的特征
            with torch.no_grad():  # 禁用梯度计算（不需要反向传播）
                outputs = model(**inputs)  # 获取模型输出
                image_features = outputs.last_hidden_state[:, 0]  # 获取 [CLS] token 的特征
                image_features /= image_features.norm(dim=-1, keepdim=True)  # 归一化处理

            # 获取四个子区域的特征
            quarter_features = []
            for quarter in quarters:
                quarter_inputs = feature_extractor(images=quarter, return_tensors="pt").to(device)
                with torch.no_grad():
                    quarter_outputs = model(**quarter_inputs)
                    quarter_feature = quarter_outputs.last_hidden_state[:, 0]
                    quarter_feature /= quarter_feature.norm(dim=-1, keepdim=True)  # 归一化处理
                    quarter_features.append(quarter_feature)

            # 计算每个子区域与完整图像的相似度
            similarities = []
            for quarter_feature in quarter_features:
                similarity = torch.cosine_similarity(image_features, quarter_feature, dim=-1)
                similarities.append(similarity)

            # 找出与完整图像最相似的两个子区域
            similarities = torch.stack(similarities, dim=0)
            sorted_indices = torch.argsort(similarities, descending=True)  # 按相似度排序
            top_two_indices = sorted_indices[:2]  # 选择最相似的两个子区域

            # 根据相似度比例加权叠加这两个子区域的特征
            similarity_weights = similarities[top_two_indices]
            # weighted_embedding = (quarter_features[top_two_indices[0]] * similarity_weights[1] +
            #                      quarter_features[top_two_indices[1]] * similarity_weights[0]) / similarity_weights.sum()
            weighted_embedding = quarter_features[top_two_indices[0]]

            # 归一化加权后的子区域特征
            weighted_embedding /= weighted_embedding.norm(dim=-1, keepdim=True)

            # 将加权后的子区域特征与完整图像特征拼接
            final_embedding = torch.cat([image_features, weighted_embedding], dim=-1)

            # 归一化处理
            final_embedding /= final_embedding.norm(dim=-1, keepdim=True)

            # 保存图像特征
            image_features_list.append(final_embedding.cpu())  # 将特征向量移动到 CPU 并添加到列表

        except Exception as e:
            print(f"Error processing {image_path}: {e}")

    # 将所有图像特征保存为矩阵
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)  # 拼接所有图像特征
        torch.save(image_features, output_file)
        print(f"Features saved to {output_file}")
    else:
        print(f"No valid images found in {image_folder}")

# 调用特征提取函数，提取并保存每个城市的特征
extract_features('./predict', 'predict_image.pt')

Processing ./predict: 100%|██████████| 1366/1366 [01:14<00:00, 18.30it/s]

Features saved to predict_image.pt



