<a href="https://colab.research.google.com/github/MoqiSheng/MoqiSheng.github.io/blob/main/emb0206.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!unzip /content/drive/MyDrive/UrbanASIFpr/Anchor.zip -d /content/anchor

Archive:  /content/drive/MyDrive/UrbanASIFpr/Anchor.zip
  inflating: /content/anchor/1014.png  
  inflating: /content/anchor/1027.png  
  inflating: /content/anchor/1049.png  
  inflating: /content/anchor/1052.png  
  inflating: /content/anchor/1075.png  
  inflating: /content/anchor/1080.png  
  inflating: /content/anchor/1081.png  
  inflating: /content/anchor/1094.png  
  inflating: /content/anchor/1102.png  
  inflating: /content/anchor/1108.png  
  inflating: /content/anchor/111.png  
  inflating: /content/anchor/1111.png  
  inflating: /content/anchor/1116.png  
  inflating: /content/anchor/1117.png  
  inflating: /content/anchor/1136.png  
  inflating: /content/anchor/1140.png  
  inflating: /content/anchor/1143.png  
  inflating: /content/anchor/1162.png  
  inflating: /content/anchor/1198.png  
  inflating: /content/anchor/1209.png  
  inflating: /content/anchor/1216.png  
  inflating: /content/anchor/1220.png  
  inflating: /content/anchor/1246.png  
  inflating: /content/anc

In [4]:
import os
import torch
from transformers import ViTFeatureExtractor, ViTModel
from PIL import Image
from tqdm import tqdm

# 获取脚本所在的目录路径并切换当前工作目录
# abspath = os.path.abspath(__file__)  # 获取脚本文件的绝对路径
# dname = os.path.dirname(abspath)     # 提取脚本所在目录的路径
# os.chdir(dname)                      # 切换当前工作目录到脚本所在的目录

# 加载预训练的 google/vit-base-patch16-224-in21k 模型
device = "cuda" if torch.cuda.is_available() else "cpu"
model = ViTModel.from_pretrained("google/vit-base-patch16-224-in21k").to(device)
feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")

# 定义特征提取函数 extract_features
def extract_features(image_folder, output_file):
    image_features_list = []
    # 读取 image_folder 中所有扩展名为 .jpg 或 .png 的文件，并将文件名按自然数顺序排序（例如，1.jpg、2.jpg、3.jpg 顺序排列）
    image_paths = sorted(
        [os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.lower().endswith(('.jpg', '.png'))],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
    )

    for image_path in tqdm(image_paths, desc=f"Processing {image_folder}"):
        try:
            # 加载并预处理图像
            image = Image.open(image_path)
            inputs = feature_extractor(images=image, return_tensors="pt").to(device)  # 使用 ViTFeatureExtractor 进行预处理

            # 获取图像特征
            with torch.no_grad():  # 禁用了梯度计算（不需要反向传播）
                outputs = model(**inputs)  # 获取模型输出
                image_features = outputs.last_hidden_state[:, 0]  # 取 [CLS] token 的特征
                image_features /= image_features.norm(dim=-1, keepdim=True)  # 归一化处理
                image_features_list.append(image_features.cpu())  # 将特征向量移动到 CPU ，然后将其追加到列表中
        except Exception as e:
            print(f"Error processing {image_path}: {e}")

    # 将所有图像特征保存为矩阵
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)  # 将存储在 image_features_list 中的所有图像特征向量拼接成一个大的张量
        torch.save(image_features, output_file)
        print(f"Features saved to {output_file}")
    else:
        print(f"No valid images found in {image_folder}")

# 调用特征提取函数，提取并保存每个城市的特征
extract_features('./anchor', 'anchor_image.pt')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

Processing ./anchor: 100%|██████████| 152/152 [00:03<00:00, 44.62it/s]

Features saved to anchor_image.pt





In [5]:
!unzip /content/drive/MyDrive/UrbanASIFpr/Predict.zip -d /content/predict

Archive:  /content/drive/MyDrive/UrbanASIFpr/Predict.zip
  inflating: /content/predict/0.png  
  inflating: /content/predict/1.png  
  inflating: /content/predict/10.png  
  inflating: /content/predict/100.png  
  inflating: /content/predict/1000.png  
  inflating: /content/predict/1001.png  
  inflating: /content/predict/1002.png  
  inflating: /content/predict/1003.png  
  inflating: /content/predict/1004.png  
  inflating: /content/predict/1005.png  
  inflating: /content/predict/1006.png  
  inflating: /content/predict/1007.png  
  inflating: /content/predict/1008.png  
  inflating: /content/predict/1009.png  
  inflating: /content/predict/101.png  
  inflating: /content/predict/1010.png  
  inflating: /content/predict/1011.png  
  inflating: /content/predict/1012.png  
  inflating: /content/predict/1013.png  
  inflating: /content/predict/1015.png  
  inflating: /content/predict/1016.png  
  inflating: /content/predict/1017.png  
  inflating: /content/predict/1018.png  
  inflatin

In [7]:
import os
import torch
from transformers import ViTFeatureExtractor, ViTModel
from PIL import Image
from tqdm import tqdm

# 获取脚本所在的目录路径并切换当前工作目录
# abspath = os.path.abspath(__file__)  # 获取脚本文件的绝对路径
# dname = os.path.dirname(abspath)     # 提取脚本所在目录的路径
# os.chdir(dname)                      # 切换当前工作目录到脚本所在的目录

# 加载预训练的 google/vit-base-patch16-224-in21k 模型
device = "cuda" if torch.cuda.is_available() else "cpu"
model = ViTModel.from_pretrained("google/vit-base-patch16-224-in21k").to(device)
feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")

# 定义特征提取函数 extract_features
def extract_features(image_folder, output_file):
    image_features_list = []
    # 读取 image_folder 中所有扩展名为 .jpg 或 .png 的文件，并将文件名按自然数顺序排序（例如，1.jpg、2.jpg、3.jpg 顺序排列）
    image_paths = sorted(
        [os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.lower().endswith(('.jpg', '.png'))],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
    )

    for image_path in tqdm(image_paths, desc=f"Processing {image_folder}"):
        try:
            # 加载并预处理图像
            image = Image.open(image_path)
            inputs = feature_extractor(images=image, return_tensors="pt").to(device)  # 使用 ViTFeatureExtractor 进行预处理

            # 获取图像特征
            with torch.no_grad():  # 禁用了梯度计算（不需要反向传播）
                outputs = model(**inputs)  # 获取模型输出
                image_features = outputs.last_hidden_state[:, 0]  # 取 [CLS] token 的特征
                image_features /= image_features.norm(dim=-1, keepdim=True)  # 归一化处理
                image_features_list.append(image_features.cpu())  # 将特征向量移动到 CPU ，然后将其追加到列表中
        except Exception as e:
            print(f"Error processing {image_path}: {e}")

    # 将所有图像特征保存为矩阵
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)  # 将存储在 image_features_list 中的所有图像特征向量拼接成一个大的张量
        torch.save(image_features, output_file)
        print(f"Features saved to {output_file}")
    else:
        print(f"No valid images found in {image_folder}")

# 调用特征提取函数，提取并保存每个城市的特征
extract_features('./predict', 'predict_image.pt')

Processing ./predict: 100%|██████████| 1366/1366 [00:22<00:00, 60.98it/s]

Features saved to predict_image.pt





In [8]:
import os
import torch
from transformers import SwinForImageClassification, AutoImageProcessor
from PIL import Image
from tqdm import tqdm

# 获取脚本所在的目录路径并切换当前工作目录
# abspath = os.path.abspath(__file__)  # 获取脚本文件的绝对路径
# dname = os.path.dirname(abspath)     # 提取脚本所在目录的路径
# os.chdir(dname)                      # 切换当前工作目录到脚本所在的目录

# 加载预训练的 microsoft/swin-base-patch4-window7-224 模型
device = "cuda" if torch.cuda.is_available() else "cpu"
model = SwinForImageClassification.from_pretrained("microsoft/swin-base-patch4-window7-224").to(device)
feature_extractor = AutoImageProcessor.from_pretrained("microsoft/swin-base-patch4-window7-224")

# 定义特征提取函数 extract_features
def extract_features(image_folder, output_file):
    image_features_list = []
    # 读取 image_folder 中所有扩展名为 .jpg 或 .png 的文件，并将文件名按自然数顺序排序（例如，1.jpg、2.jpg、3.jpg 顺序排列）
    image_paths = sorted(
        [os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.lower().endswith(('.jpg', '.png'))],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
    )

    for image_path in tqdm(image_paths, desc=f"Processing {image_folder}"):
        try:
            # 加载并预处理图像
            image = Image.open(image_path)
            inputs = feature_extractor(images=image, return_tensors="pt").to(device)  # 使用 AutoImageProcessor 进行预处理

            # 获取图像特征
            with torch.no_grad():  # 禁用了梯度计算（不需要反向传播）
                outputs = model(**inputs)  # 获取模型输出
                image_features = outputs.logits  # 获取 logits（最后的分类输出）
                image_features /= image_features.norm(dim=-1, keepdim=True)  # 归一化处理
                image_features_list.append(image_features.cpu())  # 将特征向量移动到 CPU ，然后将其追加到列表中
        except Exception as e:
            print(f"Error processing {image_path}: {e}")

    # 将所有图像特征保存为矩阵
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)  # 将存储在 image_features_list 中的所有图像特征向量拼接成一个大的张量
        torch.save(image_features, output_file)
        print(f"Features saved to {output_file}")
    else:
        print(f"No valid images found in {image_folder}")

# 调用特征提取函数，提取并保存每个城市的特征
extract_features('./anchor', 'anchor_image_swin.pt')

config.json:   0%|          | 0.00/71.8k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/352M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/255 [00:00<?, ?B/s]

Processing ./anchor: 100%|██████████| 152/152 [00:06<00:00, 22.27it/s]

Features saved to anchor_image_swin.pt





In [None]:
import os
import torch
from transformers import SwinForImageClassification, AutoImageProcessor
from PIL import Image
from tqdm import tqdm

# 获取脚本所在的目录路径并切换当前工作目录
# abspath = os.path.abspath(__file__)  # 获取脚本文件的绝对路径
# dname = os.path.dirname(abspath)     # 提取脚本所在目录的路径
# os.chdir(dname)                      # 切换当前工作目录到脚本所在的目录

# 加载预训练的 microsoft/swin-base-patch4-window7-224 模型
device = "cuda" if torch.cuda.is_available() else "cpu"
model = SwinForImageClassification.from_pretrained("microsoft/swin-base-patch4-window7-224").to(device)
feature_extractor = AutoImageProcessor.from_pretrained("microsoft/swin-base-patch4-window7-224")

# 定义特征提取函数 extract_features
def extract_features(image_folder, output_file):
    image_features_list = []
    # 读取 image_folder 中所有扩展名为 .jpg 或 .png 的文件，并将文件名按自然数顺序排序（例如，1.jpg、2.jpg、3.jpg 顺序排列）
    image_paths = sorted(
        [os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.lower().endswith(('.jpg', '.png'))],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
    )

    for image_path in tqdm(image_paths, desc=f"Processing {image_folder}"):
        try:
            # 加载并预处理图像
            image = Image.open(image_path)
            inputs = feature_extractor(images=image, return_tensors="pt").to(device)  # 使用 AutoImageProcessor 进行预处理

            # 获取图像特征
            with torch.no_grad():  # 禁用了梯度计算（不需要反向传播）
                outputs = model(**inputs)  # 获取模型输出
                image_features = outputs.logits  # 获取 logits（最后的分类输出）
                image_features /= image_features.norm(dim=-1, keepdim=True)  # 归一化处理
                image_features_list.append(image_features.cpu())  # 将特征向量移动到 CPU ，然后将其追加到列表中
        except Exception as e:
            print(f"Error processing {image_path}: {e}")

    # 将所有图像特征保存为矩阵
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)  # 将存储在 image_features_list 中的所有图像特征向量拼接成一个大的张量
        torch.save(image_features, output_file)
        print(f"Features saved to {output_file}")
    else:
        print(f"No valid images found in {image_folder}")

# 调用特征提取函数，提取并保存每个城市的特征
extract_features('./anchor', 'anchor_image_swin.pt')

In [9]:
import os
import torch
from transformers import SwinForImageClassification, AutoImageProcessor
from PIL import Image
from tqdm import tqdm

# 获取脚本所在的目录路径并切换当前工作目录
# abspath = os.path.abspath(__file__)  # 获取脚本文件的绝对路径
# dname = os.path.dirname(abspath)     # 提取脚本所在目录的路径
# os.chdir(dname)                      # 切换当前工作目录到脚本所在的目录

# 加载预训练的 microsoft/swin-base-patch4-window7-224 模型
device = "cuda" if torch.cuda.is_available() else "cpu"
model = SwinForImageClassification.from_pretrained("microsoft/swin-base-patch4-window7-224").to(device)
feature_extractor = AutoImageProcessor.from_pretrained("microsoft/swin-base-patch4-window7-224")

# 定义特征提取函数 extract_features
def extract_features(image_folder, output_file):
    image_features_list = []
    # 读取 image_folder 中所有扩展名为 .jpg 或 .png 的文件，并将文件名按自然数顺序排序（例如，1.jpg、2.jpg、3.jpg 顺序排列）
    image_paths = sorted(
        [os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.lower().endswith(('.jpg', '.png'))],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
    )

    for image_path in tqdm(image_paths, desc=f"Processing {image_folder}"):
        try:
            # 加载并预处理图像
            image = Image.open(image_path)
            inputs = feature_extractor(images=image, return_tensors="pt").to(device)  # 使用 AutoImageProcessor 进行预处理

            # 获取图像特征
            with torch.no_grad():  # 禁用了梯度计算（不需要反向传播）
                outputs = model(**inputs)  # 获取模型输出
                image_features = outputs.logits  # 获取 logits（最后的分类输出）
                image_features /= image_features.norm(dim=-1, keepdim=True)  # 归一化处理
                image_features_list.append(image_features.cpu())  # 将特征向量移动到 CPU ，然后将其追加到列表中
        except Exception as e:
            print(f"Error processing {image_path}: {e}")

    # 将所有图像特征保存为矩阵
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)  # 将存储在 image_features_list 中的所有图像特征向量拼接成一个大的张量
        torch.save(image_features, output_file)
        print(f"Features saved to {output_file}")
    else:
        print(f"No valid images found in {image_folder}")

# 调用特征提取函数，提取并保存每个城市的特征
extract_features('./predict', 'predict_image_swin.pt')

Processing ./predict: 100%|██████████| 1366/1366 [00:57<00:00, 23.74it/s]

Features saved to predict_image_swin.pt





In [11]:
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.preprocessing import normalize
import numpy as np
import os

# 获取脚本所在的目录路径并切换当前工作目录
# abspath = os.path.abspath(__file__)  # 获取脚本文件的绝对路径
# dname = os.path.dirname(abspath)     # 提取脚本所在目录的路径
# os.chdir(dname)                      # 切换当前工作目录到脚本所在的目录

# 加载预训练的 Sentence Transformer 模型
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

# 读取数据
df = pd.read_csv('./anchor.csv')

# 用于存储每个嵌入
embeddings = []

# 遍历每一行，读取 'semantic_description' 列
for index, row in df.iterrows():
    semantic_description = row['semantic_description']

    # 编码句子，得到嵌入
    sentence_embedding = model.encode([semantic_description])  # 编码 'semantic_description' 列

    # 归一化句子的嵌入
    normalized_embedding = normalize(sentence_embedding, axis=1)

    # 转换为 PyTorch tensor
    final_embedding_tensor = torch.tensor(normalized_embedding[0], dtype=torch.float32)

    # 保存嵌入
    embeddings.append(final_embedding_tensor)

# 将嵌入转换为 PyTorch 张量
embedding_tensor = torch.stack(embeddings)  # Shape: [num_samples, embedding_dim]

# 保存到 .pt 文件
torch.save(embedding_tensor, 'anchor_text.pt')

In [13]:
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.preprocessing import normalize
import json
import os

# 获取脚本所在的目录路径并切换当前工作目录
# abspath = os.path.abspath(__file__)  # 获取脚本文件的绝对路径
# dname = os.path.dirname(abspath)     # 提取脚本所在目录的路径
# os.chdir(dname)                      # 切换当前工作目录到脚本所在的目录

# 加载预训练的 Sentence Transformer 模型
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

# 更新后的 urbanclip 模板
urbanclip_templates = [
    "{} area featuring {}.",
    "{} area featuring {} with cars.",
    "{} area featuring {} with facade.",
    "{} area featuring {} on the road.",
    "{} area featuring {} with many trees.",
    "{} area featuring {} in city."
]

# 读取 urban_taxonomy.json
with open('./urban_taxonomy.json', 'r') as f:
    urban_taxonomy = json.load(f)

# 用于存储每个嵌入
embeddings = []

# 遍历所有小类（功能类别）
for category, uots in urban_taxonomy.items():  # urban_taxonomy 是一个字典，key是功能分类名，value是该分类下的具体对象类型列表
    for uot in uots:
        # 每个模板生成一个句子，包含类别信息
        sentences = [template.format(uot, category) for template in urbanclip_templates]

        # 编码句子，得到嵌入
        sentence_embeddings = model.encode(sentences)

        # 归一化每个句子的嵌入（参考 UrbanCLIP zeroshot.py 的处理思路）
        normalized_embeddings = normalize(sentence_embeddings, axis=1)

        # 计算6个句子的归一化嵌入的平均值
        avg_embedding = np.mean(normalized_embeddings, axis=0)

        # 归一化最终的平均嵌入
        final_embedding = normalize([avg_embedding])[0]

        # 转换为 PyTorch tensor
        final_embedding_tensor = torch.tensor(final_embedding, dtype=torch.float32)

        # 保存嵌入
        embeddings.append(final_embedding_tensor)

# 将嵌入转换为 PyTorch 张量
embedding_tensor = torch.stack(embeddings)  # Shape: [num_samples, embedding_dim]

# 保存到 .pt 文件
torch.save(embedding_tensor, 'predict_text.pt')