<a href="https://colab.research.google.com/github/MoqiSheng/MoqiSheng.github.io/blob/main/add_function_vit_embedding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!unzip /content/drive/MyDrive/UrbanASIF/unique/anchor_Shenzhen.zip -d /content/anchor

Archive:  /content/drive/MyDrive/UrbanASIF/unique/anchor_Shenzhen.zip
  inflating: /content/anchor/0.png   
  inflating: /content/anchor/1.png   
  inflating: /content/anchor/100.png  
  inflating: /content/anchor/1021.png  
  inflating: /content/anchor/106.png  
  inflating: /content/anchor/1062.png  
  inflating: /content/anchor/107.png  
  inflating: /content/anchor/1073.png  
  inflating: /content/anchor/108.png  
  inflating: /content/anchor/1087.png  
  inflating: /content/anchor/1110.png  
  inflating: /content/anchor/1111.png  
  inflating: /content/anchor/1166.png  
  inflating: /content/anchor/1175.png  
  inflating: /content/anchor/118.png  
  inflating: /content/anchor/1180.png  
  inflating: /content/anchor/119.png  
  inflating: /content/anchor/121.png  
  inflating: /content/anchor/1219.png  
  inflating: /content/anchor/1232.png  
  inflating: /content/anchor/1233.png  
  inflating: /content/anchor/1246.png  
  inflating: /content/anchor/125.png  
  inflating: /content/

In [5]:
import os
import torch
import pandas as pd
from transformers import ViTFeatureExtractor, ViTModel
from PIL import Image
from tqdm import tqdm
import re  # 导入正则表达式模块

# 加载预训练的 google/vit-base-patch16-224-in21k 模型
device = "cuda" if torch.cuda.is_available() else "cpu"
model = ViTModel.from_pretrained("google/vit-base-patch16-224-in21k").to(device)
feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")

# 加载 anchor_Shenzhen.csv 并将其转换为 DataFrame
csv_file = './anchor_Shenzhen.csv'
df = pd.read_csv(csv_file)
print(f"Loaded {len(df)} rows from {csv_file}")

# 定义特征提取函数 extract_features
def extract_features(image_folder, output_file):
    image_features_list = []

    # 读取 image_folder 中所有扩展名为 .jpg 或 .png 的文件，并将文件名按自然数顺序排序（例如，1.jpg、2.jpg、3.jpg 顺序排列）
    image_paths = sorted(
        [os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.lower().endswith(('.jpg', '.png'))],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
    )
    print(f"Found {len(image_paths)} images in {image_folder}")

    for image_path in tqdm(image_paths, desc=f"Processing {image_folder}"):
        try:
            # 获取图片ID
            image_id = os.path.splitext(os.path.basename(image_path))[0]

            # 从 CSV 文件中获取对应的 primary_function 列的第一个单词
            primary_function = df.loc[df['ID'] == int(image_id), 'primary_function'].values
            if primary_function.size == 0:
                print(f"Warning: No primary_function found for image ID {image_id}, skipping.")
                continue  # 如果没有找到对应的行，跳过该图片

            # 使用正则表达式去除标点符号，提取第一个单词
            # 正则表达式 '[^\w\s]' 用于去除所有非字母和空格字符
            first_word = re.sub(r'[^\w\s]', '', primary_function[0]).split()[0]  # 去掉标点符号并取第一个单词
            print(f"Image {image_id}: Primary function is {primary_function[0]}, using {first_word} as the embedding keyword.")

            # 加载并预处理图像
            image = Image.open(image_path)
            inputs = feature_extractor(images=image, return_tensors="pt").to(device)  # 使用 ViTFeatureExtractor 进行预处理

            # 获取图像特征
            with torch.no_grad():  # 禁用了梯度计算（不需要反向传播）
                outputs = model(**inputs)  # 获取模型输出
                image_features = outputs.last_hidden_state[:, 0]  # 取 [CLS] token 的特征
                image_features /= image_features.norm(dim=-1, keepdim=True)  # 归一化处理
                image_features *= 0.8  # 对图像特征向量进行 0.8 的缩放
                print(f"Image {image_id}: Extracted features and scaled by 0.8")

                # 加载与第一个单词对应的嵌入文件（.pt）
                embedding_file = f'./{first_word}.pt'
                if os.path.exists(embedding_file):
                    word_embedding = torch.load(embedding_file).to(device)
                    image_features += 0.2 * word_embedding  # 加权叠加
                    print(f"Image {image_id}: Added embedding from {first_word}.pt with weight 0.2")
                else:
                    print(f"Warning: Embedding file {embedding_file} not found, skipping embedding addition.")

                image_features_list.append(image_features.cpu())  # 将特征向量移动到 CPU ，然后将其追加到列表中

        except Exception as e:
            print(f"Error processing {image_path}: {e}")

    # 将所有图像特征保存为矩阵
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)  # 将存储在 image_features_list 中的所有图像特征向量拼接成一个大的张量
        torch.save(image_features, output_file)
        print(f"Features saved to {output_file}")
    else:
        print(f"No valid images found in {image_folder}")

# 调用特征提取函数，提取并保存每个城市的特征
extract_features('./anchor', './imgs_anchor')





Loaded 143 rows from ./anchor_Shenzhen.csv
Found 143 images in ./anchor


  word_embedding = torch.load(embedding_file).to(device)
Processing ./anchor:   4%|▍         | 6/143 [00:00<00:02, 54.84it/s]

Image 0: Primary function is civic, governmental and cultural, using civic as the embedding keyword.
Image 0: Extracted features and scaled by 0.8
Image 0: Added embedding from civic.pt with weight 0.2
Image 1: Primary function is transportation, using transportation as the embedding keyword.
Image 1: Extracted features and scaled by 0.8
Image 1: Added embedding from transportation.pt with weight 0.2
Image 2: Primary function is commercial, using commercial as the embedding keyword.
Image 2: Extracted features and scaled by 0.8
Image 2: Added embedding from commercial.pt with weight 0.2
Image 3: Primary function is commercial, using commercial as the embedding keyword.
Image 3: Extracted features and scaled by 0.8
Image 3: Added embedding from commercial.pt with weight 0.2
Image 4: Primary function is residential, using residential as the embedding keyword.
Image 4: Extracted features and scaled by 0.8
Image 4: Added embedding from residential.pt with weight 0.2
Image 6: Primary functi

Processing ./anchor:  13%|█▎        | 18/143 [00:00<00:02, 55.87it/s]

Image 15: Extracted features and scaled by 0.8
Image 15: Added embedding from civic.pt with weight 0.2
Image 17: Primary function is commercial, using commercial as the embedding keyword.
Image 17: Extracted features and scaled by 0.8
Image 17: Added embedding from commercial.pt with weight 0.2
Image 21: Primary function is residential, using residential as the embedding keyword.
Image 21: Extracted features and scaled by 0.8
Image 21: Added embedding from residential.pt with weight 0.2
Image 23: Primary function is hotel, using hotel as the embedding keyword.
Image 23: Extracted features and scaled by 0.8
Image 23: Added embedding from hotel.pt with weight 0.2
Image 25: Primary function is sports and recreation, using sports as the embedding keyword.
Image 25: Extracted features and scaled by 0.8
Image 25: Added embedding from sports.pt with weight 0.2
Image 26: Primary function is residential, using residential as the embedding keyword.
Image 26: Extracted features and scaled by 0.8


Processing ./anchor:  21%|██        | 30/143 [00:00<00:01, 56.96it/s]

Image 55: Extracted features and scaled by 0.8
Image 55: Added embedding from sports.pt with weight 0.2
Image 57: Primary function is transportation, using transportation as the embedding keyword.
Image 57: Extracted features and scaled by 0.8
Image 57: Added embedding from transportation.pt with weight 0.2
Image 65: Primary function is education, using education as the embedding keyword.
Image 65: Extracted features and scaled by 0.8
Image 65: Added embedding from education.pt with weight 0.2
Image 70: Primary function is health care, using health as the embedding keyword.
Image 70: Extracted features and scaled by 0.8
Image 70: Added embedding from health.pt with weight 0.2
Image 71: Primary function is hotel, using hotel as the embedding keyword.
Image 71: Extracted features and scaled by 0.8
Image 71: Added embedding from hotel.pt with weight 0.2
Image 72: Primary function is transportation, using transportation as the embedding keyword.
Image 72: Extracted features and scaled by 0

Processing ./anchor:  29%|██▉       | 42/143 [00:00<00:01, 57.35it/s]

Image 98: Extracted features and scaled by 0.8
Image 98: Added embedding from commercial.pt with weight 0.2
Image 99: Primary function is transportation, using transportation as the embedding keyword.
Image 99: Extracted features and scaled by 0.8
Image 99: Added embedding from transportation.pt with weight 0.2
Image 100: Primary function is commercial, using commercial as the embedding keyword.
Image 100: Extracted features and scaled by 0.8
Image 100: Added embedding from commercial.pt with weight 0.2
Image 106: Primary function is commercial, using commercial as the embedding keyword.
Image 106: Extracted features and scaled by 0.8
Image 106: Added embedding from commercial.pt with weight 0.2
Image 107: Primary function is industrial, using industrial as the embedding keyword.
Image 107: Extracted features and scaled by 0.8
Image 107: Added embedding from industrial.pt with weight 0.2
Image 108: Primary function is commercial, using commercial as the embedding keyword.
Image 108: Ex

Processing ./anchor:  38%|███▊      | 54/143 [00:00<00:01, 57.07it/s]

Image 133: Extracted features and scaled by 0.8
Image 133: Added embedding from residential.pt with weight 0.2
Image 135: Primary function is residential, using residential as the embedding keyword.
Image 135: Extracted features and scaled by 0.8
Image 135: Added embedding from residential.pt with weight 0.2
Image 144: Primary function is transportation, using transportation as the embedding keyword.
Image 144: Extracted features and scaled by 0.8
Image 144: Added embedding from transportation.pt with weight 0.2
Image 157: Primary function is commercial, using commercial as the embedding keyword.
Image 157: Extracted features and scaled by 0.8
Image 157: Added embedding from commercial.pt with weight 0.2
Image 159: Primary function is residential, using residential as the embedding keyword.
Image 159: Extracted features and scaled by 0.8
Image 159: Added embedding from residential.pt with weight 0.2
Image 167: Primary function is industrial, using industrial as the embedding keyword.
I

Processing ./anchor:  46%|████▌     | 66/143 [00:01<00:01, 56.85it/s]

Image 196: Extracted features and scaled by 0.8
Image 196: Added embedding from sports.pt with weight 0.2
Image 198: Primary function is civic, governmental and cultural, using civic as the embedding keyword.
Image 198: Extracted features and scaled by 0.8
Image 198: Added embedding from civic.pt with weight 0.2
Image 200: Primary function is health care, using health as the embedding keyword.
Image 200: Extracted features and scaled by 0.8
Image 200: Added embedding from health.pt with weight 0.2
Image 205: Primary function is outdoors and natural, using outdoors as the embedding keyword.
Image 205: Extracted features and scaled by 0.8
Image 205: Added embedding from outdoors.pt with weight 0.2
Image 208: Primary function is residential, using residential as the embedding keyword.
Image 208: Extracted features and scaled by 0.8
Image 208: Added embedding from residential.pt with weight 0.2
Image 210: Primary function is residential, using residential as the embedding keyword.
Image 21

Processing ./anchor:  55%|█████▍    | 78/143 [00:01<00:01, 57.09it/s]

Image 285: Extracted features and scaled by 0.8
Image 285: Added embedding from commercial.pt with weight 0.2
Image 293: Primary function is commercial, using commercial as the embedding keyword.
Image 293: Extracted features and scaled by 0.8
Image 293: Added embedding from commercial.pt with weight 0.2
Image 295: Primary function is residential, using residential as the embedding keyword.
Image 295: Extracted features and scaled by 0.8
Image 295: Added embedding from residential.pt with weight 0.2
Image 301: Primary function is commercial, using commercial as the embedding keyword.
Image 301: Extracted features and scaled by 0.8
Image 301: Added embedding from commercial.pt with weight 0.2
Image 304: Primary function is transportation, using transportation as the embedding keyword.
Image 304: Extracted features and scaled by 0.8
Image 304: Added embedding from transportation.pt with weight 0.2
Image 316: Primary function is education, using education as the embedding keyword.
Image 3

Processing ./anchor:  63%|██████▎   | 90/143 [00:01<00:00, 56.43it/s]

Image 392: Extracted features and scaled by 0.8
Image 392: Added embedding from commercial.pt with weight 0.2
Image 411: Primary function is commercial, using commercial as the embedding keyword.
Image 411: Extracted features and scaled by 0.8
Image 411: Added embedding from commercial.pt with weight 0.2
Image 422: Primary function is outdoors and natural, using outdoors as the embedding keyword.
Image 422: Extracted features and scaled by 0.8
Image 422: Added embedding from outdoors.pt with weight 0.2
Image 454: Primary function is health care, using health as the embedding keyword.
Image 454: Extracted features and scaled by 0.8
Image 454: Added embedding from health.pt with weight 0.2
Image 466: Primary function is outdoors and natural, using outdoors as the embedding keyword.
Image 466: Extracted features and scaled by 0.8
Image 466: Added embedding from outdoors.pt with weight 0.2
Image 468: Primary function is education, using education as the embedding keyword.
Image 468: Extrac

Processing ./anchor:  71%|███████▏  | 102/143 [00:01<00:00, 57.15it/s]

Image 571: Extracted features and scaled by 0.8
Image 571: Added embedding from sports.pt with weight 0.2
Image 589: Primary function is commercial, using commercial as the embedding keyword.
Image 589: Extracted features and scaled by 0.8
Image 589: Added embedding from commercial.pt with weight 0.2
Image 590: Primary function is industrial, using industrial as the embedding keyword.
Image 590: Extracted features and scaled by 0.8
Image 590: Added embedding from industrial.pt with weight 0.2
Image 593: Primary function is outdoors and natural, using outdoors as the embedding keyword.
Image 593: Extracted features and scaled by 0.8
Image 593: Added embedding from outdoors.pt with weight 0.2
Image 616: Primary function is commercial, using commercial as the embedding keyword.
Image 616: Extracted features and scaled by 0.8
Image 616: Added embedding from commercial.pt with weight 0.2
Image 621: Primary function is education, using education as the embedding keyword.
Image 621: Extracted

Processing ./anchor:  80%|███████▉  | 114/143 [00:02<00:00, 56.80it/s]

Image 722: Extracted features and scaled by 0.8
Image 722: Added embedding from outdoors.pt with weight 0.2
Image 751: Primary function is commercial, using commercial as the embedding keyword.
Image 751: Extracted features and scaled by 0.8
Image 751: Added embedding from commercial.pt with weight 0.2
Image 752: Primary function is residential, using residential as the embedding keyword.
Image 752: Extracted features and scaled by 0.8
Image 752: Added embedding from residential.pt with weight 0.2
Image 769: Primary function is industrial, using industrial as the embedding keyword.
Image 769: Extracted features and scaled by 0.8
Image 769: Added embedding from industrial.pt with weight 0.2
Image 781: Primary function is industrial, using industrial as the embedding keyword.
Image 781: Extracted features and scaled by 0.8
Image 781: Added embedding from industrial.pt with weight 0.2
Image 802: Primary function is education, using education as the embedding keyword.
Image 802: Extracted 

Processing ./anchor:  88%|████████▊ | 126/143 [00:02<00:00, 56.64it/s]

Image 946: Extracted features and scaled by 0.8
Image 946: Added embedding from commercial.pt with weight 0.2
Image 983: Primary function is commercial, using commercial as the embedding keyword.
Image 983: Extracted features and scaled by 0.8
Image 983: Added embedding from commercial.pt with weight 0.2
Image 1021: Primary function is transportation, using transportation as the embedding keyword.
Image 1021: Extracted features and scaled by 0.8
Image 1021: Added embedding from transportation.pt with weight 0.2
Image 1062: Primary function is commercial, using commercial as the embedding keyword.
Image 1062: Extracted features and scaled by 0.8
Image 1062: Added embedding from commercial.pt with weight 0.2
Image 1073: Primary function is residential, using residential as the embedding keyword.
Image 1073: Extracted features and scaled by 0.8
Image 1073: Added embedding from residential.pt with weight 0.2
Image 1087: Primary function is education, using education as the embedding keywor

Processing ./anchor:  97%|█████████▋| 138/143 [00:02<00:00, 56.25it/s]

Image 1232: Extracted features and scaled by 0.8
Image 1232: Added embedding from transportation.pt with weight 0.2
Image 1233: Primary function is residential, using residential as the embedding keyword.
Image 1233: Extracted features and scaled by 0.8
Image 1233: Added embedding from residential.pt with weight 0.2
Image 1246: Primary function is civic, governmental and cultural, using civic as the embedding keyword.
Image 1246: Extracted features and scaled by 0.8
Image 1246: Added embedding from civic.pt with weight 0.2
Image 1265: Primary function is outdoors and natural, using outdoors as the embedding keyword.
Image 1265: Extracted features and scaled by 0.8
Image 1265: Added embedding from outdoors.pt with weight 0.2
Image 1286: Primary function is civic, governmental and cultural, using civic as the embedding keyword.
Image 1286: Extracted features and scaled by 0.8
Image 1286: Added embedding from civic.pt with weight 0.2
Image 1291: Primary function is civic, governmental and

Processing ./anchor: 100%|██████████| 143/143 [00:02<00:00, 56.61it/s]

Image 1461: Extracted features and scaled by 0.8
Image 1461: Added embedding from education.pt with weight 0.2





Features saved to ./imgs_anchor
