<a href="https://colab.research.google.com/github/MoqiSheng/MoqiSheng.github.io/blob/main/function1229.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!unzip /content/drive/MyDrive/CommonFeatures/correct_function.zip -d /content/Data

Archive:  /content/drive/MyDrive/CommonFeatures/correct_function.zip
   creating: /content/Data/civic, governmental and cultural/
  inflating: /content/Data/civic, governmental and cultural/0.png  
  inflating: /content/Data/civic, governmental and cultural/1035.png  
  inflating: /content/Data/civic, governmental and cultural/1056.png  
  inflating: /content/Data/civic, governmental and cultural/1219.png  
  inflating: /content/Data/civic, governmental and cultural/1246.png  
  inflating: /content/Data/civic, governmental and cultural/1266.png  
  inflating: /content/Data/civic, governmental and cultural/1272.png  
  inflating: /content/Data/civic, governmental and cultural/1286.png  
  inflating: /content/Data/civic, governmental and cultural/1287.png  
  inflating: /content/Data/civic, governmental and cultural/1288.png  
  inflating: /content/Data/civic, governmental and cultural/1289.png  
  inflating: /content/Data/civic, governmental and cultural/1290.png  
  inflating: /content

In [3]:
!ls Data

'civic, governmental and cultural'   education	    industrial		   'sports and recreation'
 commercial			    'health care'  'outdoors and natural'   transportation
 correct_function.py		     hotel	    residential


In [4]:
import os
import torch
from transformers import AutoFeatureExtractor, AutoModel
from PIL import Image
from tqdm import tqdm
import pandas as pd

# 设置设备为 GPU（如果可用）
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# 加载预训练的 facebook/vit-large-patch14-336 模型和特征提取器
model_name = "facebook/vit-large-patch14-336"
print(f"Loading model and feature extractor for {model_name}...")
model = AutoModel.from_pretrained(model_name).to(device)
feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
model.eval()  # 设置模型为评估模式

# 定义特征提取函数
def extract_features(image_folder, output_file):
    image_features_list = []
    image_filenames = []

    # 获取并按文件名中的数字顺序排序图像路径
    image_paths = sorted(
        [
            os.path.join(image_folder, img)
            for img in os.listdir(image_folder)
            if img.lower().endswith(('.jpg', '.png'))
        ],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
    )

    print(f"Found {len(image_paths)} images in {image_folder}.")

    # 处理每一张图像
    for image_path in tqdm(image_paths, desc=f"Processing {image_folder}"):
        try:
            # 加载并确保图像为 RGB 格式
            image = Image.open(image_path).convert("RGB")

            # 使用特征提取器进行预处理
            inputs = feature_extractor(images=image, return_tensors="pt").to(device)

            # 禁用梯度计算，提高效率
            with torch.no_grad():
                outputs = model(**inputs)

                # 提取 [CLS] token 的特征
                image_features = outputs.last_hidden_state[:, 0]

                # 归一化特征向量
                image_features /= image_features.norm(dim=-1, keepdim=True)

                # 将特征移动到 CPU 并添加到列表中
                image_features_list.append(image_features.cpu())
                image_filenames.append(os.path.basename(image_path))

        except Exception as e:
            print(f"处理 {image_path} 时出错: {e}")

    # 将所有特征拼接成一个大的张量
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)
        torch.save(image_features, output_file)
        print(f"特征已保存到 {output_file}")
    else:
        print(f"在 {image_folder} 中未找到有效的图像")
        return None, None

    return image_features, image_filenames

# 定义计算余弦相似度并保存为 CSV 的函数
def compute_and_save_cosine_similarity(features, filenames, csv_output):
    if features is None or filenames is None:
        print("特征或文件名为空，无法计算余弦相似度。")
        return

    print("计算余弦相似度矩阵...")

    # 确保特征已经归一化
    # 余弦相似度可以通过特征矩阵与其转置的点积计算
    similarity_matrix = torch.mm(features, features.t()).numpy()

    print(f"相似度矩阵大小: {similarity_matrix.shape}")

    # 将相似度矩阵转换为 DataFrame
    df = pd.DataFrame(similarity_matrix, index=filenames, columns=filenames)

    # 保存为 CSV 文件
    df.to_csv(csv_output)
    print(f"余弦相似度矩阵已保存到 {csv_output}")

# 主流程
if __name__ == "__main__":
    # 定义图像文件夹和输出文件
    image_folder = './Data/civic, governmental and cultural'
    feature_output = './imgs_vit_1.pt'
    csv_output = './cosine_similarity_matrix.csv'

    # 提取特征
    features, filenames = extract_features(image_folder, feature_output)

    # 计算余弦相似度并保存为 CSV
    compute_and_save_cosine_similarity(features, filenames, csv_output)


Using device: cuda
Loading model and feature extractor for facebook/vit-large-patch14-336...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


OSError: facebook/vit-large-patch14-336 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

In [5]:
 pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-kw85g05g
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-kw85g05g
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ftfy (from clip==1.0)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l[?25hdone
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369489 sha256=60fb964f90981a8aff8ff3b55aff3f4becbdcfaaa49f1520189692a3f450eea0
  Stored in directory: /tmp/pip-ephem-wheel-cache-m0mu_3n7/wheels/da/2b/4c/d6691fa9597aac8bb

In [4]:
import os
import torch
import clip
from PIL import Image
from tqdm import tqdm
import pandas as pd

# 确保你已经安装了 CLIP 库。如果尚未安装，可以使用以下命令：
# pip install git+https://github.com/openai/CLIP.git

# 设置设备为 GPU（如果可用）
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# 加载预训练的 CLIP 模型和预处理函数
model_name = "ViT-L/14@336px"  # 确保该模型名称在 CLIP 库中可用
print(f"Loading CLIP model: {model_name}...")
model, preprocess = clip.load(model_name, device=device)
model.eval()  # 设置模型为评估模式
print("CLIP 模型加载完成。")

# 定义特征提取函数
def extract_features(image_folder, output_file):
    image_features_list = []
    image_filenames = []

    # 获取并按文件名中的数字顺序排序图像路径
    image_paths = sorted(
        [
            os.path.join(image_folder, img)
            for img in os.listdir(image_folder)
            if img.lower().endswith(('.jpg', '.png'))
        ],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
    )

    print(f"在文件夹 {image_folder} 中找到 {len(image_paths)} 张图像。")

    # 处理每一张图像
    for image_path in tqdm(image_paths, desc=f"Processing {image_folder}"):
        try:
            # 加载并预处理图像
            image = Image.open(image_path).convert("RGB")
            image_input = preprocess(image).unsqueeze(0).to(device)  # 添加批次维度并移动到设备

            # 禁用梯度计算，提高效率
            with torch.no_grad():
                image_features = model.encode_image(image_input)  # 编码图像
                image_features /= image_features.norm(dim=-1, keepdim=True)  # 归一化特征向量

            # 将特征移动到 CPU 并添加到列表中
            image_features_list.append(image_features.cpu())
            image_filenames.append(os.path.basename(image_path))

        except Exception as e:
            print(f"处理 {image_path} 时出错: {e}")

    # 将所有特征拼接成一个大的张量
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)  # [num_images, feature_dim]
        torch.save(image_features, output_file)
        print(f"特征已保存到 {output_file}")
    else:
        print(f"在 {image_folder} 中未找到有效的图像")
        return None, None

    return image_features, image_filenames

# 定义计算余弦相似度并保存为 CSV 的函数
def compute_and_save_cosine_similarity(features, filenames, csv_output):
    if features is None or filenames is None:
        print("特征或文件名为空，无法计算余弦相似度。")
        return

    print("计算余弦相似度矩阵...")

    # 余弦相似度可以通过特征矩阵与其转置的点积计算（假设特征已经归一化）
    similarity_matrix = torch.mm(features, features.t()).numpy()  # [num_images, num_images]

    print(f"相似度矩阵大小: {similarity_matrix.shape}")

    # 将相似度矩阵转换为 DataFrame
    df = pd.DataFrame(similarity_matrix, index=filenames, columns=filenames)

    # 保存为 CSV 文件
    df.to_csv(csv_output)
    print(f"余弦相似度矩阵已保存到 {csv_output}")

# 主流程
if __name__ == "__main__":
    # 定义图像文件夹和输出文件路径
    image_folder = './Data/commercial'
    feature_output = './imgs_vit_commercial.pt'
    csv_output = './cosine_similarity_matrix_commercial.csv'

    # 确保输出目录存在
    os.makedirs(os.path.dirname(feature_output), exist_ok=True)
    os.makedirs(os.path.dirname(csv_output), exist_ok=True)

    # 提取特征
    features, filenames = extract_features(image_folder, feature_output)

    # 计算余弦相似度并保存为 CSV
    compute_and_save_cosine_similarity(features, filenames, csv_output)


Using device: cuda
Loading CLIP model: ViT-L/14@336px...


100%|███████████████████████████████████████| 891M/891M [00:09<00:00, 95.6MiB/s]


CLIP 模型加载完成。
在文件夹 ./Data/commercial 中找到 208 张图像。


Processing ./Data/commercial: 100%|██████████| 208/208 [00:06<00:00, 31.68it/s]


特征已保存到 ./imgs_vit_commercial.pt
计算余弦相似度矩阵...
相似度矩阵大小: (208, 208)
余弦相似度矩阵已保存到 ./cosine_similarity_matrix_commercial.csv


In [5]:
import os
import torch
import clip
from PIL import Image
from tqdm import tqdm
import pandas as pd

# 确保你已经安装了 CLIP 库。如果尚未安装，可以使用以下命令：
# pip install git+https://github.com/openai/CLIP.git

# 设置设备为 GPU（如果可用）
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# 加载预训练的 CLIP 模型和预处理函数
model_name = "ViT-L/14@336px"  # 确保该模型名称在 CLIP 库中可用
print(f"Loading CLIP model: {model_name}...")
model, preprocess = clip.load(model_name, device=device)
model.eval()  # 设置模型为评估模式
print("CLIP 模型加载完成。")

# 定义特征提取函数
def extract_features(image_folder, output_file):
    image_features_list = []
    image_filenames = []

    # 获取并按文件名中的数字顺序排序图像路径
    image_paths = sorted(
        [
            os.path.join(image_folder, img)
            for img in os.listdir(image_folder)
            if img.lower().endswith(('.jpg', '.png'))
        ],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
    )

    print(f"在文件夹 {image_folder} 中找到 {len(image_paths)} 张图像。")

    # 处理每一张图像
    for image_path in tqdm(image_paths, desc=f"Processing {image_folder}"):
        try:
            # 加载并预处理图像
            image = Image.open(image_path).convert("RGB")
            image_input = preprocess(image).unsqueeze(0).to(device)  # 添加批次维度并移动到设备

            # 禁用梯度计算，提高效率
            with torch.no_grad():
                image_features = model.encode_image(image_input)  # 编码图像
                image_features /= image_features.norm(dim=-1, keepdim=True)  # 归一化特征向量

            # 将特征移动到 CPU 并添加到列表中
            image_features_list.append(image_features.cpu())
            image_filenames.append(os.path.basename(image_path))

        except Exception as e:
            print(f"处理 {image_path} 时出错: {e}")

    # 将所有特征拼接成一个大的张量
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)  # [num_images, feature_dim]
        torch.save(image_features, output_file)
        print(f"特征已保存到 {output_file}")
    else:
        print(f"在 {image_folder} 中未找到有效的图像")
        return None, None

    return image_features, image_filenames

# 定义计算余弦相似度并保存为 CSV 的函数
def compute_and_save_cosine_similarity(features, filenames, csv_output, mean_output):
    if features is None or filenames is None:
        print("特征或文件名为空，无法计算余弦相似度。")
        return

    print("计算余弦相似度矩阵...")

    # 余弦相似度可以通过特征矩阵与其转置的点积计算（假设特征已经归一化）
    similarity_matrix = torch.mm(features, features.t()).numpy()  # [num_images, num_images]

    print(f"相似度矩阵大小: {similarity_matrix.shape}")

    # 将相似度矩阵转换为 DataFrame
    df = pd.DataFrame(similarity_matrix, index=filenames, columns=filenames)

    # 计算每列的均值并添加为最后一行
    column_means = df.mean(axis=0)  # 计算每列均值
    df.loc['Mean', :] = column_means  # 添加均值作为最后一行

    # 保存为 CSV 文件
    df.to_csv(csv_output)
    print(f"余弦相似度矩阵已保存到 {csv_output}")

    # 找出均值大于 0.8 的列名并保存
    high_similarity_columns = column_means[column_means > 0.8].index.tolist()
    with open(mean_output, 'w') as f:
        for col in high_similarity_columns:
            f.write(f"{col}\n")
    print(f"均值大于 0.8 的列名已保存到 {mean_output}")

# 主流程
if __name__ == "__main__":
    # 定义图像文件夹和输出文件路径
    image_folder = './Data/commercial'
    feature_output = './imgs_vit_commercial.pt'
    csv_output = './cosine_similarity_matrix_commercial.csv'
    mean_output = './commercial.txt'  # 保存均值大于 0.8 的列名

    # 确保输出目录存在
    os.makedirs(os.path.dirname(feature_output), exist_ok=True)
    os.makedirs(os.path.dirname(csv_output), exist_ok=True)
    os.makedirs(os.path.dirname(mean_output), exist_ok=True)

    # 提取特征
    features, filenames = extract_features(image_folder, feature_output)

    # 计算余弦相似度并保存为 CSV
    compute_and_save_cosine_similarity(features, filenames, csv_output, mean_output)


Using device: cuda
Loading CLIP model: ViT-L/14@336px...
CLIP 模型加载完成。
在文件夹 ./Data/commercial 中找到 208 张图像。


Processing ./Data/commercial: 100%|██████████| 208/208 [00:05<00:00, 36.56it/s]


特征已保存到 ./imgs_vit_commercial.pt
计算余弦相似度矩阵...
相似度矩阵大小: (208, 208)
余弦相似度矩阵已保存到 ./cosine_similarity_matrix_commercial.csv
均值大于 0.8 的列名已保存到 ./commercial.txt


In [6]:
import os
import torch
import clip
from PIL import Image
from tqdm import tqdm
import pandas as pd

# 确保你已经安装了 CLIP 库。如果尚未安装，可以使用以下命令：
# pip install git+https://github.com/openai/CLIP.git

# 设置设备为 GPU（如果可用）
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# 加载预训练的 CLIP 模型和预处理函数
model_name = "ViT-L/14@336px"  # 确保该模型名称在 CLIP 库中可用
print(f"Loading CLIP model: {model_name}...")
model, preprocess = clip.load(model_name, device=device)
model.eval()  # 设置模型为评估模式
print("CLIP 模型加载完成。")

# 从 high_similarity_columns.txt 读取图片文件名
def load_high_similarity_images(file_path):
    with open(file_path, 'r') as f:
        image_filenames = [line.strip() for line in f.readlines()]
    return image_filenames

# 提取选定图像的特征
def extract_selected_features(image_folder, selected_filenames):
    image_features_list = []

    # 处理每个选定的图像
    for image_filename in tqdm(selected_filenames, desc="Processing selected images"):
        image_path = os.path.join(image_folder, image_filename)

        try:
            # 加载并预处理图像
            image = Image.open(image_path).convert("RGB")
            image_input = preprocess(image).unsqueeze(0).to(device)  # 添加批次维度并移动到设备

            # 禁用梯度计算，提高效率
            with torch.no_grad():
                image_features = model.encode_image(image_input)  # 编码图像
                image_features /= image_features.norm(dim=-1, keepdim=True)  # 归一化特征向量

            # 将特征移动到 CPU 并添加到列表中
            image_features_list.append(image_features.cpu())

        except Exception as e:
            print(f"处理 {image_path} 时出错: {e}")

    # 将所有特征拼接成一个大的张量
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)  # [num_images, feature_dim]
        return image_features
    else:
        print(f"未找到有效的图像")
        return None

# 计算特征的均值
def compute_feature_mean(features):
    if features is None:
        print("没有有效的特征可用于计算均值。")
        return None
    feature_mean = features.mean(dim=0)  # 计算所有图像特征的均值
    return feature_mean

# 计算余弦相似度
def compute_cosine_similarity(feature_mean, all_features):
    # 计算特征均值和所有嵌入的余弦相似度
    cosine_similarities = torch.matmul(all_features, feature_mean)  # 点积（余弦相似度，假设已经归一化）
    return cosine_similarities

# 主流程
if __name__ == "__main__":
    # 定义文件路径
    image_folder = './Data/commercial'  # 图像所在文件夹路径
    high_similarity_file = './commercial.txt'  # 包含高相似度图像的文件路径
    feature_file = './imgs_vit_commercial.pt'  # 所有图像的特征文件路径

    # 读取 high_similarity_columns.txt 文件中的图像文件名
    selected_filenames = load_high_similarity_images(high_similarity_file)

    # 提取选定图像的特征
    selected_features = extract_selected_features(image_folder, selected_filenames)

    # 计算特征均值
    feature_mean = compute_feature_mean(selected_features)

    if feature_mean is not None:
        print(f"计算得到的特征均值：{feature_mean}")

        # 加载 imgs_vit_commercial.pt 中保存的所有特征
        all_features = torch.load(feature_file)
        print(f"加载所有图像的特征，形状为: {all_features.shape}")

        # 计算余弦相似度
        cosine_similarities = compute_cosine_similarity(feature_mean, all_features)

        # 将相似度转换为 DataFrame 并保存为 CSV
        similarity_df = pd.DataFrame(cosine_similarities.numpy(), columns=["Cosine Similarity"])
        similarity_df["Image Filename"] = os.listdir(image_folder)  # 添加文件名列

        # 按相似度排序并保存到 CSV
        similarity_df = similarity_df.sort_values(by="Cosine Similarity", ascending=False)
        similarity_df.to_csv('./cosine_similarity_with_mean.csv', index=False)

        print("余弦相似度计算完成，结果已保存到 cosine_similarity_with_mean.csv")
    else:
        print("没有有效的图像特征可用于计算均值。")

Using device: cuda
Loading CLIP model: ViT-L/14@336px...
CLIP 模型加载完成。


Processing selected images: 100%|██████████| 109/109 [00:02<00:00, 37.14it/s]

计算得到的特征均值：tensor([ 2.0981e-02,  3.9764e-02,  1.8112e-02,  2.4582e-02, -1.0033e-02,
         1.6434e-02,  1.9516e-02, -2.9358e-02,  3.7231e-02, -3.5980e-02,
        -1.5366e-02, -1.6769e-02, -1.6876e-02,  3.1799e-02, -7.8430e-03,
         2.4902e-02, -1.6174e-02, -5.2071e-04,  5.3162e-02, -1.9531e-02,
        -1.3123e-02,  2.2354e-03, -1.2657e-02,  3.2867e-02, -4.1656e-03,
        -1.0452e-02,  3.3722e-02, -3.7441e-03, -4.1138e-02,  1.0544e-02,
         4.0436e-03,  1.3916e-02,  4.9477e-03, -2.7313e-02, -1.3763e-02,
         3.5828e-02, -3.1036e-02, -2.1881e-02,  5.4693e-04, -1.1475e-02,
        -1.2726e-02, -3.0098e-03,  1.5381e-02, -1.7792e-02,  3.4790e-02,
         2.1027e-02,  3.0746e-02, -1.0727e-02,  9.2316e-03, -1.5732e-02,
         1.8143e-02, -7.8430e-03,  1.4763e-02,  1.8616e-02, -8.7280e-03,
        -2.8133e-03,  1.9577e-02,  4.2877e-03, -2.4185e-02, -2.3224e-02,
         1.7899e-02, -2.9129e-02,  1.6632e-02,  6.3858e-03,  2.5543e-02,
         7.6904e-03, -1.5869e-03,  9.1095


  all_features = torch.load(feature_file)


In [19]:
import os
import torch
import clip
from PIL import Image
from tqdm import tqdm
import pandas as pd

# 确保你已经安装了 CLIP 库。如果尚未安装，可以使用以下命令：
# pip install git+https://github.com/openai/CLIP.git

# 设置设备为 GPU（如果可用）
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# 加载预训练的 CLIP 模型和预处理函数
model_name = "ViT-L/14@336px"  # 确保该模型名称在 CLIP 库中可用
print(f"Loading CLIP model: {model_name}...")
model, preprocess = clip.load(model_name, device=device)
model.eval()  # 设置模型为评估模式
print("CLIP 模型加载完成。")

# 定义特征提取函数
def extract_features(image_folder, output_file):
    image_features_list = []
    image_filenames = []

    # 获取并按文件名中的数字顺序排序图像路径
    image_paths = sorted(
        [
            os.path.join(image_folder, img)
            for img in os.listdir(image_folder)
            if img.lower().endswith(('.jpg', '.png'))
        ],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
    )

    print(f"在文件夹 {image_folder} 中找到 {len(image_paths)} 张图像。")

    # 处理每一张图像
    for image_path in tqdm(image_paths, desc=f"Processing {image_folder}"):
        try:
            # 加载并预处理图像
            image = Image.open(image_path).convert("RGB")
            image_input = preprocess(image).unsqueeze(0).to(device)  # 添加批次维度并移动到设备

            # 禁用梯度计算，提高效率
            with torch.no_grad():
                image_features = model.encode_image(image_input)  # 编码图像
                image_features /= image_features.norm(dim=-1, keepdim=True)  # 归一化特征向量

            # 将特征移动到 CPU 并添加到列表中
            image_features_list.append(image_features.cpu())
            image_filenames.append(os.path.basename(image_path))

        except Exception as e:
            print(f"处理 {image_path} 时出错: {e}")

    # 将所有特征拼接成一个大的张量
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)  # [num_images, feature_dim]
        torch.save(image_features, output_file)
        print(f"特征已保存到 {output_file}")
    else:
        print(f"在 {image_folder} 中未找到有效的图像")
        return None, None

    return image_features, image_filenames

# 定义计算余弦相似度并保存为 CSV 的函数
def compute_and_save_cosine_similarity(features, filenames, csv_output, mean_output):
    if features is None or filenames is None:
        print("特征或文件名为空，无法计算余弦相似度。")
        return

    print("计算余弦相似度矩阵...")

    # 余弦相似度可以通过特征矩阵与其转置的点积计算（假设特征已经归一化）
    similarity_matrix = torch.mm(features, features.t()).numpy()  # [num_images, num_images]

    print(f"相似度矩阵大小: {similarity_matrix.shape}")

    # 将相似度矩阵转换为 DataFrame
    df = pd.DataFrame(similarity_matrix, index=filenames, columns=filenames)

    # 计算每列的均值并添加为最后一行
    column_means = df.mean(axis=0)  # 计算每列均值
    df.loc['Mean', :] = column_means  # 添加均值作为最后一行

    # 保存为 CSV 文件
    df.to_csv(csv_output)
    print(f"余弦相似度矩阵已保存到 {csv_output}")

    # 找出均值大于 0.8 的列名并保存
    high_similarity_columns = column_means[column_means > 0.8].index.tolist()
    with open(mean_output, 'w') as f:
        for col in high_similarity_columns:
            f.write(f"{col}\n")
    print(f"均值大于 0.8 的列名已保存到 {mean_output}")

# 从 high_similarity_columns.txt 读取图片文件名
def load_high_similarity_images(file_path):
    if not os.path.exists(file_path):
        print(f"文件 {file_path} 不存在。")
        return []
    with open(file_path, 'r') as f:
        image_filenames = [line.strip() for line in f.readlines()]
    print(f"从 {file_path} 加载了 {len(image_filenames)} 个高相似度图像文件名。")
    return image_filenames

# 提取选定图像的特征
def extract_selected_features(image_folder, selected_filenames):
    image_features_list = []

    print(f"提取选定图像的特征，共 {len(selected_filenames)} 张图像。")

    # 处理每个选定的图像
    for image_filename in tqdm(selected_filenames, desc="Processing selected images"):
        image_path = os.path.join(image_folder, image_filename)

        try:
            # 加载并预处理图像
            image = Image.open(image_path).convert("RGB")
            image_input = preprocess(image).unsqueeze(0).to(device)  # 添加批次维度并移动到设备

            # 禁用梯度计算，提高效率
            with torch.no_grad():
                image_features = model.encode_image(image_input)  # 编码图像
                image_features /= image_features.norm(dim=-1, keepdim=True)  # 归一化特征向量

            # 将特征移动到 CPU 并添加到列表中
            image_features_list.append(image_features.cpu())

        except Exception as e:
            print(f"处理 {image_path} 时出错: {e}")

    # 将所有特征拼接成一个大的张量
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)  # [num_images, feature_dim]
        torch.save(image_features, './selected_imgs_vit_commercial.pt')  # 可选：保存选定图像的特征
        print(f"选定图像的特征已提取并保存。")
        return image_features
    else:
        print(f"未找到有效的选定图像。")
        return None

# 计算特征的均值
def compute_feature_mean(features):
    if features is None:
        print("没有有效的特征可用于计算均值。")
        return None
    feature_mean = features.mean(dim=0)  # 计算所有图像特征的均值
    return feature_mean

# 计算余弦相似度
def compute_cosine_similarity(feature_mean, all_features):
    # 计算特征均值和所有嵌入的余弦相似度
    cosine_similarities = torch.matmul(all_features, feature_mean)  # 点积（余弦相似度，假设已经归一化）
    return cosine_similarities

# 主流程
def main():
    # 定义图像文件夹和输出文件路径
    image_folder = './Data/transportation'
    feature_output = './imgs_vit_commercial.pt'
    csv_output = './cosine_similarity_matrix_commercial.csv'
    mean_output = './commercial.txt'  # 保存均值大于 0.8 的列名
    high_similarity_file = mean_output  # 与 mean_output 相同
    feature_mean_output = './transportation.pt'

    # 输出文件夹路径
    output_dirs = [
        os.path.dirname(feature_output),
        os.path.dirname(csv_output),
        os.path.dirname(mean_output),
        os.path.dirname(feature_mean_output)
    ]

    # 确保输出目录存在
    for dir_path in output_dirs:
        if dir_path:  # 避免空字符串
            os.makedirs(dir_path, exist_ok=True)

    # 提取所有图像的特征
    features, filenames = extract_features(image_folder, feature_output)

    # 计算余弦相似度并保存为 CSV
    compute_and_save_cosine_similarity(features, filenames, csv_output, mean_output)

    # 加载高相似度图像文件名
    selected_filenames = load_high_similarity_images(high_similarity_file)

    if not selected_filenames:
        print("没有高相似度的图像文件名可处理。")
        return

    # 提取选定图像的特征
    selected_features = extract_selected_features(image_folder, selected_filenames)

    # 计算特征均值
    feature_mean = compute_feature_mean(selected_features)

    if feature_mean is not None:
        print(f"计算得到的特征均值：{feature_mean}")

        # Save feature_mean to a .pt file
        torch.save(feature_mean, feature_mean_output)
        print(f"Feature mean saved to {feature_mean_output}")

        # 加载 imgs_vit_commercial.pt 中保存的所有特征
        if not os.path.exists(feature_output):
            print(f"特征文件 {feature_output} 不存在。无法加载所有图像的特征。")
            return
        all_features = torch.load(feature_output)
        print(f"加载所有图像的特征，形状为: {all_features.shape}")

        # 确保所有特征都是归一化的
        all_features = all_features / all_features.norm(dim=-1, keepdim=True)

        # 计算余弦相似度
        cosine_similarities = compute_cosine_similarity(feature_mean, all_features)

        # 将相似度转换为 DataFrame 并保存为 CSV
        similarity_df = pd.DataFrame({
            "Image Filename": filenames,
            "Cosine Similarity": cosine_similarities.numpy()
        })

        # 按相似度排序并保存到 CSV
        similarity_df = similarity_df.sort_values(by="Cosine Similarity", ascending=False)
        similarity_df.to_csv('./cosine_similarity_with_mean.csv', index=False)

        print("余弦相似度计算完成，结果已保存到 cosine_similarity_with_mean.csv")
    else:
        print("没有有效的图像特征可用于计算均值。")

if __name__ == "__main__":
    main()


Using device: cuda
Loading CLIP model: ViT-L/14@336px...
CLIP 模型加载完成。
在文件夹 ./Data/transportation 中找到 47 张图像。


Processing ./Data/transportation: 100%|██████████| 47/47 [00:01<00:00, 36.76it/s]


特征已保存到 ./imgs_vit_commercial.pt
计算余弦相似度矩阵...
相似度矩阵大小: (47, 47)
余弦相似度矩阵已保存到 ./cosine_similarity_matrix_commercial.csv
均值大于 0.8 的列名已保存到 ./commercial.txt
从 ./commercial.txt 加载了 1 个高相似度图像文件名。
提取选定图像的特征，共 1 张图像。


Processing selected images: 100%|██████████| 1/1 [00:00<00:00, 36.56it/s]

选定图像的特征已提取并保存。
计算得到的特征均值：tensor([ 1.9943e-02,  4.9652e-02,  5.6061e-02,  3.1067e-02, -8.9111e-03,
         2.6215e-02,  2.4109e-02, -3.8727e-02,  2.9312e-02, -2.2003e-02,
        -4.0054e-04, -1.0071e-02, -1.3031e-02,  4.1779e-02,  1.3725e-02,
         4.1595e-02, -1.5419e-02,  5.6458e-04,  6.9580e-02,  4.0092e-03,
         1.2901e-02,  6.3362e-03, -1.8021e-02,  2.9663e-02, -1.4961e-02,
        -1.5472e-02,  5.4443e-02,  8.7357e-03, -1.7670e-02,  5.3177e-03,
        -1.0201e-02,  2.8854e-02,  1.1971e-02, -4.0924e-02, -9.9335e-03,
         3.5187e-02, -4.5563e-02, -2.2156e-02,  9.3002e-03, -3.8177e-02,
        -1.3840e-02,  1.4877e-02,  1.5312e-02, -1.9989e-02,  4.3671e-02,
         3.7018e-02,  2.4658e-02, -2.4017e-02, -1.4168e-02,  2.4071e-03,
         3.1616e-02, -1.5373e-03,  1.0330e-02,  9.2363e-04,  6.1646e-03,
        -5.7640e-03,  3.4119e-02, -1.3359e-02, -4.3701e-02, -3.5950e-02,
         2.8610e-02, -8.6899e-03,  1.2825e-02,  1.0544e-02,  1.8143e-02,
         1.3748e-02,  1.03


  all_features = torch.load(feature_output)


In [7]:
import os
import torch
import clip
from PIL import Image
from tqdm import tqdm
import pandas as pd

# Ensure you have installed the CLIP library. If not, install it using:
# pip install git+https://github.com/openai/CLIP.git

# Set device to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load the pre-trained CLIP model and preprocessing function
model_name = "ViT-L/14@336px"  # Ensure this model name is available in the CLIP library
print(f"Loading CLIP model: {model_name}...")
model, preprocess = clip.load(model_name, device=device)
model.eval()  # Set model to evaluation mode
print("CLIP model loaded successfully.")

# Define the feature extraction function
def extract_features(image_folder, output_file):
    image_features_list = []
    image_filenames = []

    # Get and sort image paths. Adjust sorting if filenames are not purely numeric.
    image_paths = sorted(
        [
            os.path.join(image_folder, img)
            for img in os.listdir(image_folder)
            if img.lower().endswith(('.jpg', '.jpeg', '.png'))
        ],
        key=lambda x: os.path.splitext(os.path.basename(x))[0]  # Sort by filename without extension
    )

    print(f"Found {len(image_paths)} images in folder: {image_folder}")

    # Process each image
    for image_path in tqdm(image_paths, desc=f"Processing {os.path.basename(image_folder)}"):
        try:
            # Load and preprocess image
            image = Image.open(image_path).convert("RGB")
            image_input = preprocess(image).unsqueeze(0).to(device)  # Add batch dimension and move to device

            # Disable gradient calculation for efficiency
            with torch.no_grad():
                image_features = model.encode_image(image_input)  # Encode image
                image_features /= image_features.norm(dim=-1, keepdim=True)  # Normalize feature vector

            # Move features to CPU and append to list
            image_features_list.append(image_features.cpu())
            image_filenames.append(os.path.basename(image_path))

        except Exception as e:
            print(f"Error processing {image_path}: {e}")

    # Concatenate all features into a single tensor
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)  # Shape: [num_images, feature_dim]
        torch.save(image_features, output_file)
        print(f"Features saved to {output_file}")
    else:
        print(f"No valid images found in {image_folder}")
        return None, None

    return image_features, image_filenames

# Define the function to compute and save cosine similarity matrix (optional)
def compute_and_save_cosine_similarity(features, filenames, csv_output, mean_output):
    if features is None or filenames is None:
        print("Features or filenames are empty. Cannot compute cosine similarity.")
        return

    print("Calculating cosine similarity matrix...")

    # Cosine similarity via dot product (features are already normalized)
    similarity_matrix = torch.mm(features, features.t()).numpy()  # Shape: [num_images, num_images]

    print(f"Similarity matrix size: {similarity_matrix.shape}")

    # Convert similarity matrix to DataFrame
    df = pd.DataFrame(similarity_matrix, index=filenames, columns=filenames)

    # Calculate mean of each column and add as the last row
    column_means = df.mean(axis=0)  # Mean of each column
    df.loc['Mean', :] = column_means  # Add mean as the last row

    # Save the similarity matrix to CSV
    df.to_csv(csv_output)
    print(f"Cosine similarity matrix saved to {csv_output}")

    # Identify and save column names with mean similarity > 0.8
    high_similarity_columns = column_means[column_means > 0.8].index.tolist()
    with open(mean_output, 'w') as f:
        for col in high_similarity_columns:
            f.write(f"{col}\n")
    print(f"Column names with mean similarity > 0.8 saved to {mean_output}")

# Define the function to compute the mean feature
def compute_feature_mean(features):
    if features is None:
        print("No valid features available to compute mean.")
        return None
    feature_mean = features.mean(dim=0)  # Compute mean across all image features
    return feature_mean

# Main processing function
def main():
    # Define the root data directory
    data_root = './Data'

    # Define the list of folders to process
    folders_to_process = [
        'civic, governmental and cultural',
        'education',
        'industrial',
        'sports and recreation',
        'commercial',
        'health care',
        'outdoors and natural',
        'transportation',
        'hotel',
        'residential'
    ]

    # Define directories to save features and mean features
    features_dir = './features'
    mean_features_dir = './mean_features'

    # Create directories if they don't exist
    os.makedirs(features_dir, exist_ok=True)
    os.makedirs(mean_features_dir, exist_ok=True)

    # Iterate over each folder and process
    for folder in folders_to_process:
        folder_path = os.path.join(data_root, folder)

        # Check if the folder exists
        if not os.path.isdir(folder_path):
            print(f"Folder {folder_path} does not exist. Skipping...")
            continue

        print(f"\nProcessing folder: {folder}")

        # Define output file paths
        feature_output = os.path.join(features_dir, f"{folder.replace(' ', '_')}_features.pt")
        mean_output = os.path.join(mean_features_dir, f"{folder.replace(' ', '_')}_mean.pt")

        # Extract features
        features, filenames = extract_features(folder_path, feature_output)

        if features is None:
            print(f"Skipping mean computation for {folder} due to no features.")
            continue

        # Compute mean feature
        feature_mean = compute_feature_mean(features)

        if feature_mean is not None:
            # Save the mean feature
            torch.save(feature_mean, mean_output)
            print(f"Mean feature saved to {mean_output}")
        else:
            print(f"Failed to compute mean feature for {folder}.")

        # Optional: Compute and save cosine similarity matrix
        # Uncomment the following lines if you want to compute similarity matrices
        """
        csv_output = os.path.join(features_dir, f"{folder.replace(' ', '_')}_cosine_similarity.csv")
        high_similarity_output = os.path.join(features_dir, f"{folder.replace(' ', '_')}_high_similarity.txt")
        compute_and_save_cosine_similarity(features, filenames, csv_output, high_similarity_output)
        """

    print("\nAll specified folders have been processed.")

if __name__ == "__main__":
    main()


Using device: cuda
Loading CLIP model: ViT-L/14@336px...
CLIP model loaded successfully.

Processing folder: civic, governmental and cultural
Found 36 images in folder: ./Data/civic, governmental and cultural


Processing civic, governmental and cultural: 100%|██████████| 36/36 [00:01<00:00, 33.30it/s]


Features saved to ./features/civic,_governmental_and_cultural_features.pt
Mean feature saved to ./mean_features/civic,_governmental_and_cultural_mean.pt

Processing folder: education
Found 99 images in folder: ./Data/education


Processing education: 100%|██████████| 99/99 [00:02<00:00, 36.34it/s]


Features saved to ./features/education_features.pt
Mean feature saved to ./mean_features/education_mean.pt

Processing folder: industrial
Found 145 images in folder: ./Data/industrial


Processing industrial: 100%|██████████| 145/145 [00:03<00:00, 36.56it/s]


Features saved to ./features/industrial_features.pt
Mean feature saved to ./mean_features/industrial_mean.pt

Processing folder: sports and recreation
Found 28 images in folder: ./Data/sports and recreation


Processing sports and recreation: 100%|██████████| 28/28 [00:00<00:00, 36.96it/s]


Features saved to ./features/sports_and_recreation_features.pt
Mean feature saved to ./mean_features/sports_and_recreation_mean.pt

Processing folder: commercial
Found 208 images in folder: ./Data/commercial


Processing commercial: 100%|██████████| 208/208 [00:05<00:00, 36.41it/s]


Features saved to ./features/commercial_features.pt
Mean feature saved to ./mean_features/commercial_mean.pt

Processing folder: health care
Found 27 images in folder: ./Data/health care


Processing health care: 100%|██████████| 27/27 [00:00<00:00, 35.58it/s]


Features saved to ./features/health_care_features.pt
Mean feature saved to ./mean_features/health_care_mean.pt

Processing folder: outdoors and natural
Found 38 images in folder: ./Data/outdoors and natural


Processing outdoors and natural: 100%|██████████| 38/38 [00:01<00:00, 36.66it/s]


Features saved to ./features/outdoors_and_natural_features.pt
Mean feature saved to ./mean_features/outdoors_and_natural_mean.pt

Processing folder: transportation
Found 47 images in folder: ./Data/transportation


Processing transportation: 100%|██████████| 47/47 [00:01<00:00, 36.66it/s]


Features saved to ./features/transportation_features.pt
Mean feature saved to ./mean_features/transportation_mean.pt

Processing folder: hotel
Found 30 images in folder: ./Data/hotel


Processing hotel: 100%|██████████| 30/30 [00:00<00:00, 36.86it/s]


Features saved to ./features/hotel_features.pt
Mean feature saved to ./mean_features/hotel_mean.pt

Processing folder: residential
Found 335 images in folder: ./Data/residential


Processing residential: 100%|██████████| 335/335 [00:09<00:00, 35.98it/s]

Features saved to ./features/residential_features.pt
Mean feature saved to ./mean_features/residential_mean.pt

All specified folders have been processed.





In [4]:
import os
import torch
import clip
from PIL import Image
from tqdm import tqdm
import pandas as pd
import random

# 设置设备为 GPU（如果可用）
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# 加载预训练的 CLIP 模型和预处理函数
model_name = "ViT-L/14@336px"  # 确保该模型名称在 CLIP 库中可用
print(f"Loading CLIP model: {model_name}...")
model, preprocess = clip.load(model_name, device=device)
model.eval()  # 设置模型为评估模式
print("CLIP 模型加载完成。")

# 定义特征提取函数
def extract_features(image_folders, output_file, num_images=10):
    image_features_list = []
    image_filenames = []

    # 处理每个文件夹
    for image_folder in image_folders:
        # 获取图像路径并按文件名中的数字顺序排序
        image_paths = sorted(
            [
                os.path.join(image_folder, img)
                for img in os.listdir(image_folder)
                if img.lower().endswith(('.jpg', '.png'))
            ],
            key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
        )

        # 如果图像数量不足 num_images, 选取所有图像
        if len(image_paths) > num_images:
            image_paths = random.sample(image_paths, num_images)  # 随机选择 num_images 张图像

        print(f"从文件夹 {image_folder} 中选择 {len(image_paths)} 张图像。")

        # 处理每一张图像
        for image_path in tqdm(image_paths, desc=f"Processing {image_folder}"):
            try:
                # 加载并预处理图像
                image = Image.open(image_path).convert("RGB")
                image_input = preprocess(image).unsqueeze(0).to(device)  # 添加批次维度并移动到设备

                # 禁用梯度计算，提高效率
                with torch.no_grad():
                    image_features = model.encode_image(image_input)  # 编码图像
                    image_features /= image_features.norm(dim=-1, keepdim=True)  # 归一化特征向量

                # 将特征移动到 CPU 并添加到列表中
                image_features_list.append(image_features.cpu())
                image_filenames.append(os.path.basename(image_path))

            except Exception as e:
                print(f"处理 {image_path} 时出错: {e}")

    # 将所有特征拼接成一个大的张量
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)  # [num_images, feature_dim]
        torch.save(image_features, output_file)
        print(f"特征已保存到 {output_file}")
    else:
        print(f"未找到有效的图像")
        return None, None

    return image_features, image_filenames

# 定义计算余弦相似度并保存为 CSV 的函数
def compute_and_save_cosine_similarity(features, filenames, csv_output):
    if features is None or filenames is None:
        print("特征或文件名为空，无法计算余弦相似度。")
        return

    print("计算余弦相似度矩阵...")

    # 余弦相似度可以通过特征矩阵与其转置的点积计算（假设特征已经归一化）
    similarity_matrix = torch.mm(features, features.t()).numpy()  # [num_images, num_images]

    print(f"相似度矩阵大小: {similarity_matrix.shape}")

    # 将相似度矩阵转换为 DataFrame
    df = pd.DataFrame(similarity_matrix, index=filenames, columns=filenames)

    # 保存为 CSV 文件
    df.to_csv(csv_output)
    print(f"余弦相似度矩阵已保存到 {csv_output}")

# 主流程
if __name__ == "__main__":
    # 定义文件夹路径和输出文件路径
    image_folders = ['./Data/commercial', './Data/residential', './Data/industrial']  # 可添加更多文件夹
    feature_output = './features/combined_features.pt'
    csv_output = './cosine_similarity_combined.csv'

    # 确保输出目录存在
    os.makedirs(os.path.dirname(feature_output), exist_ok=True)
    os.makedirs(os.path.dirname(csv_output), exist_ok=True)

    # 提取特征
    features, filenames = extract_features(image_folders, feature_output, num_images=10)

    # 计算余弦相似度并保存为 CSV
    compute_and_save_cosine_similarity(features, filenames, csv_output)

Using device: cuda
Loading CLIP model: ViT-L/14@336px...


100%|███████████████████████████████████████| 891M/891M [01:41<00:00, 9.19MiB/s]


CLIP 模型加载完成。
从文件夹 ./Data/commercial 中选择 10 张图像。


Processing ./Data/commercial: 100%|██████████| 10/10 [00:01<00:00,  7.96it/s]


从文件夹 ./Data/residential 中选择 10 张图像。


Processing ./Data/residential: 100%|██████████| 10/10 [00:00<00:00, 37.10it/s]


从文件夹 ./Data/industrial 中选择 10 张图像。


Processing ./Data/industrial: 100%|██████████| 10/10 [00:00<00:00, 35.26it/s]

特征已保存到 ./features/combined_features.pt
计算余弦相似度矩阵...
相似度矩阵大小: (30, 30)
余弦相似度矩阵已保存到 ./cosine_similarity_combined.csv



