<a href="https://colab.research.google.com/github/MoqiSheng/MoqiSheng.github.io/blob/main/sim0211.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!unzip /content/drive/MyDrive/UrbanASIFpr/data.zip -d /content/data

Archive:  /content/drive/MyDrive/UrbanASIFpr/data.zip
   creating: /content/data/industrial/
  inflating: /content/data/industrial/1000.png  
  inflating: /content/data/industrial/1001.png  
  inflating: /content/data/industrial/1004.png  
  inflating: /content/data/industrial/1008.png  
  inflating: /content/data/industrial/1009.png  
  inflating: /content/data/industrial/1013.png  
  inflating: /content/data/industrial/1017.png  
  inflating: /content/data/industrial/1020.png  
  inflating: /content/data/industrial/1022.png  
  inflating: /content/data/industrial/1026.png  
  inflating: /content/data/industrial/1033.png  
  inflating: /content/data/industrial/1051.png  
  inflating: /content/data/industrial/1054.png  
  inflating: /content/data/industrial/1055.png  
  inflating: /content/data/industrial/1059.png  
  inflating: /content/data/industrial/1063.png  
  inflating: /content/data/industrial/1064.png  
  inflating: /content/data/industrial/1067.png  
  inflating: /content/dat

In [6]:
import os
import torch
from torchvision import models, transforms
from PIL import Image
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
from transformers import ViTFeatureExtractor, ViTModel
from tqdm import tqdm  # 导入tqdm

# Step 1: 从CSV文件加载类别描述
description_df = pd.read_csv('./description.csv')  # 假设CSV文件名为description.csv
class_descriptions = dict(zip(description_df['urban_function_category'], description_df['semantic_description']))

# Step 2: 使用SentenceTransformer进行文本嵌入
text_encoder = SentenceTransformer('all-mpnet-base-v2')
class_embeddings = text_encoder.encode(list(class_descriptions.values()))

# Step 3: 加载ViT图像编码器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vit_feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
vit_model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k').to(device)

# 图像预处理（ViT需要的标准化和缩放）
def extract_image_features(image_path):
    """提取图像的特征向量"""
    img = Image.open(image_path).convert("RGB")
    img = vit_feature_extractor(images=img, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = vit_model(**img)
    return outputs.last_hidden_state.mean(dim=1).cpu().numpy().flatten()  # 获取图像的特征向量

# Step 4: 从文件夹加载图像并提取特征
image_features = {}
image_folder = "./data"  # 图像文件夹路径

categories = list(class_descriptions.keys())  # 确保类别顺序一致

for class_name in categories:
    class_folder = os.path.join(image_folder, class_name)
    class_features = []
    for img_name in os.listdir(class_folder):
        img_path = os.path.join(class_folder, img_name)
        if img_path.endswith(('.jpg', '.png')):  # 确保只处理图像文件
            features = extract_image_features(img_path)
            class_features.append(features)
    image_features[class_name] = np.mean(class_features, axis=0)  # 对同一类别的图像取均值特征

# Step 5: 计算图像嵌入之间的相似度
image_embeddings = np.array([image_features[class_name] for class_name in categories])
text_embeddings = np.array(class_embeddings)

# Step 6: 计算类别相似度矩阵
image_sim = cosine_similarity(image_embeddings)
text_sim = cosine_similarity(text_embeddings)

# Step 7: 计算不同alpha下的类别相似度矩阵
alpha_values = np.linspace(0, 1, 11)  # 从0到1，取11个值

# 指定的类别顺序
desired_order = [
    "residential",
    "commercial",
    "hotel",
    "industrial",
    "education",
    "health care",
    "civic, governmental, and cultural",
    "sports and recreation",
    "outdoors and natural",
    "transportation"
]

# 使用tqdm显示进度条
for alpha in tqdm(alpha_values, desc="Calculating similarity matrices", unit="alpha"):
    final_similarity = alpha * image_sim + (1 - alpha) * text_sim

    # Step 8: 将每个alpha对应的相似度矩阵保存到CSV文件
    # 根据指定顺序重新排列相似度矩阵
    sorted_final_similarity = final_similarity[np.array([desired_order.index(cat) for cat in categories])]
    sorted_final_similarity = sorted_final_similarity[:, np.array([desired_order.index(cat) for cat in categories])]

    # 将排序后的相似度矩阵保存到CSV文件
    final_similarity_df = pd.DataFrame(sorted_final_similarity, index=desired_order, columns=desired_order)

    # 保存文件，文件名包含当前的alpha值
    filename = f"final_similarity_matrix_alpha_{alpha:.2f}.csv"
    final_similarity_df.to_csv(filename)

    # 输出相似度矩阵
    print(f"Similarity matrix for alpha = {alpha:.2f}:")
    print(final_similarity_df)


Calculating similarity matrices: 100%|██████████| 11/11 [00:00<00:00, 214.39alpha/s]

Similarity matrix for alpha = 0.00:
                                   residential  commercial     hotel  \
residential                           1.000000    0.519053  0.447989   
commercial                            0.519053    1.000000  0.444192   
hotel                                 0.447989    0.444192  1.000000   
industrial                            0.644263    0.513296  0.425852   
education                             0.527115    0.388816  0.432450   
health care                           0.463484    0.339296  0.364665   
civic, governmental, and cultural     0.482874    0.513420  0.409632   
sports and recreation                 0.562200    0.399357  0.317322   
outdoors and natural                  0.544050    0.442455  0.443796   
transportation                        0.535151    0.416212  0.463486   

                                   industrial  education  health care  \
residential                          0.644263   0.527115     0.463484   
commercial               


